1 // See www.openfst.org for extensive documentation on this weighted
2 // finite-state transducer library.
4 // Simple concrete, mutable FST whose states and arcs are stored in STL vectors.
6 #ifndef FST_VECTOR_FST_H_
7 #define FST_VECTOR_FST_H_
15 #include <fst/fst-decl.h> // For optional argument declarations
16 #include <fst/mutable-fst.h>
17 #include <fst/test-properties.h>
22 template <class A, class S>
25 template <class F, class G>
26 void Cast(const F &, G *);
28 // Arcs (of type A) implemented by an STL vector per state. M specifies Arc
29 // allocator (default declared in fst-decl.h).
30 template <class A, class M /* = std::allocator<A> */>
34 using StateId = typename Arc::StateId;
35 using Weight = typename Arc::Weight;
36 using ArcAllocator = M;
37 using StateAllocator =
38 typename ArcAllocator::template rebind<VectorState<Arc, M>>::other;
40 // Provide STL allocator for arcs.
41 explicit VectorState(const ArcAllocator &alloc)
42 : final_(Weight::Zero()), niepsilons_(0), noepsilons_(0), arcs_(alloc) {}
44 VectorState(const VectorState<A, M> &state, const ArcAllocator &alloc)
45 : final_(state.Final()),
46 niepsilons_(state.NumInputEpsilons()),
47 noepsilons_(state.NumOutputEpsilons()),
48 arcs_(state.arcs_.begin(), state.arcs_.end(), alloc) {}
51 final_ = Weight::Zero();
57 Weight Final() const { return final_; }
59 size_t NumInputEpsilons() const { return niepsilons_; }
61 size_t NumOutputEpsilons() const { return noepsilons_; }
63 size_t NumArcs() const { return arcs_.size(); }
65 const Arc &GetArc(size_t n) const { return arcs_[n]; }
67 const Arc *Arcs() const { return !arcs_.empty() ? &arcs_[0] : nullptr; }
69 Arc *MutableArcs() { return !arcs_.empty() ? &arcs_[0] : nullptr; }
71 void ReserveArcs(size_t n) { arcs_.reserve(n); }
73 void SetFinal(Weight weight) { final_ = std::move(weight); }
75 void SetNumInputEpsilons(size_t n) { niepsilons_ = n; }
77 void SetNumOutputEpsilons(size_t n) { noepsilons_ = n; }
79 void AddArc(const Arc &arc) {
80 if (arc.ilabel == 0) ++niepsilons_;
81 if (arc.olabel == 0) ++noepsilons_;
85 void SetArc(const Arc &arc, size_t n) {
86 if (arcs_[n].ilabel == 0) --niepsilons_;
87 if (arcs_[n].olabel == 0) --noepsilons_;
88 if (arc.ilabel == 0) ++niepsilons_;
89 if (arc.olabel == 0) ++noepsilons_;
99 void DeleteArcs(size_t n) {
100 for (size_t i = 0; i < n; ++i) {
101 if (arcs_.back().ilabel == 0) --niepsilons_;
102 if (arcs_.back().olabel == 0) --noepsilons_;
107 // For state class allocation.
108 void *operator new(size_t size, StateAllocator *alloc) {
109 return alloc->allocate(1);
112 // For state destruction and memory freeing.
113 static void Destroy(VectorState<A, M> *state, StateAllocator *alloc) {
115 state->~VectorState<A, M>();
116 alloc->deallocate(state, 1);
121 Weight final_; // Final weight.
122 size_t niepsilons_; // # of input epsilons
123 size_t noepsilons_; // # of output epsilons
124 std::vector<A, ArcAllocator> arcs_; // Arc container.
129 // States are implemented by STL vectors, templated on the
130 // State definition. This does not manage the Fst properties.
132 class VectorFstBaseImpl : public FstImpl<typename S::Arc> {
135 using Arc = typename State::Arc;
136 using StateId = typename Arc::StateId;
137 using Weight = typename Arc::Weight;
139 VectorFstBaseImpl() : start_(kNoStateId) {}
141 ~VectorFstBaseImpl() override {
142 for (StateId s = 0; s < states_.size(); ++s) {
143 State::Destroy(states_[s], &state_alloc_);
147 StateId Start() const { return start_; }
149 Weight Final(StateId state) const { return states_[state]->Final(); }
151 StateId NumStates() const { return states_.size(); }
153 size_t NumArcs(StateId state) const { return states_[state]->NumArcs(); }
155 size_t NumInputEpsilons(StateId state) const {
156 return GetState(state)->NumInputEpsilons();
159 size_t NumOutputEpsilons(StateId state) const {
160 return GetState(state)->NumOutputEpsilons();
163 void SetStart(StateId state) { start_ = state; }
165 void SetFinal(StateId state, Weight weight) {
166 states_[state]->SetFinal(std::move(weight));
170 states_.push_back(new (&state_alloc_) State(arc_alloc_));
171 return states_.size() - 1;
174 StateId AddState(State *state) {
175 states_.push_back(state);
176 return states_.size() - 1;
179 void AddArc(StateId state, const Arc &arc) { states_[state]->AddArc(arc); }
181 void DeleteStates(const std::vector<StateId> &dstates) {
182 std::vector<StateId> newid(states_.size(), 0);
183 for (StateId i = 0; i < dstates.size(); ++i) newid[dstates[i]] = kNoStateId;
185 for (StateId state = 0; state < states_.size(); ++state) {
186 if (newid[state] != kNoStateId) {
187 newid[state] = nstates;
188 if (state != nstates) states_[nstates] = states_[state];
191 State::Destroy(states_[state], &state_alloc_);
194 states_.resize(nstates);
195 for (StateId state = 0; state < states_.size(); ++state) {
196 auto *arcs = states_[state]->MutableArcs();
198 auto nieps = states_[state]->NumInputEpsilons();
199 auto noeps = states_[state]->NumOutputEpsilons();
200 for (size_t i = 0; i < states_[state]->NumArcs(); ++i) {
201 const auto t = newid[arcs[i].nextstate];
202 if (t != kNoStateId) {
203 arcs[i].nextstate = t;
204 if (i != narcs) arcs[narcs] = arcs[i];
207 if (arcs[i].ilabel == 0) --nieps;
208 if (arcs[i].olabel == 0) --noeps;
211 states_[state]->DeleteArcs(states_[state]->NumArcs() - narcs);
212 states_[state]->SetNumInputEpsilons(nieps);
213 states_[state]->SetNumOutputEpsilons(noeps);
215 if (Start() != kNoStateId) SetStart(newid[Start()]);
218 void DeleteStates() {
219 for (StateId state = 0; state < states_.size(); ++state) {
220 State::Destroy(states_[state], &state_alloc_);
223 SetStart(kNoStateId);
226 void DeleteArcs(StateId state, size_t n) { states_[state]->DeleteArcs(n); }
228 void DeleteArcs(StateId state) { states_[state]->DeleteArcs(); }
230 State *GetState(StateId state) { return states_[state]; }
232 const State *GetState(StateId state) const { return states_[state]; }
234 void SetState(StateId state, State *vstate) { states_[state] = vstate; }
236 void ReserveStates(StateId n) { states_.reserve(n); }
238 void ReserveArcs(StateId state, size_t n) { states_[state]->ReserveArcs(n); }
240 // Provide information needed for generic state iterator.
241 void InitStateIterator(StateIteratorData<Arc> *data) const {
242 data->base = nullptr;
243 data->nstates = states_.size();
246 // Provide information needed for generic arc iterator.
247 void InitArcIterator(StateId state, ArcIteratorData<Arc> *data) const {
248 data->base = nullptr;
249 data->narcs = states_[state]->NumArcs();
250 data->arcs = states_[state]->Arcs();
251 data->ref_count = nullptr;
255 std::vector<State *> states_; // States represenation.
256 StateId start_; // Initial state.
257 typename State::StateAllocator state_alloc_; // For state allocation.
258 typename State::ArcAllocator arc_alloc_; // For arc allocation.
260 VectorFstBaseImpl(const VectorFstBaseImpl &) = delete;
261 VectorFstBaseImpl &operator=(const VectorFstBaseImpl &) = delete;
264 // This is a VectorFstBaseImpl container that holds VectorStates and manages FST
267 class VectorFstImpl : public VectorFstBaseImpl<S> {
270 using Arc = typename State::Arc;
271 using Label = typename Arc::Label;
272 using StateId = typename Arc::StateId;
273 using Weight = typename Arc::Weight;
275 using FstImpl<Arc>::SetInputSymbols;
276 using FstImpl<Arc>::SetOutputSymbols;
277 using FstImpl<Arc>::SetType;
278 using FstImpl<Arc>::SetProperties;
279 using FstImpl<Arc>::Properties;
281 using VectorFstBaseImpl<S>::Start;
282 using VectorFstBaseImpl<S>::NumStates;
283 using VectorFstBaseImpl<S>::GetState;
284 using VectorFstBaseImpl<S>::ReserveArcs;
286 friend class MutableArcIterator<VectorFst<Arc, S>>;
288 using BaseImpl = VectorFstBaseImpl<S>;
292 SetProperties(kNullProperties | kStaticProperties);
295 explicit VectorFstImpl(const Fst<Arc> &fst);
297 static VectorFstImpl<S> *Read(std::istream &strm, const FstReadOptions &opts);
299 void SetStart(StateId state) {
300 BaseImpl::SetStart(state);
301 SetProperties(SetStartProperties(Properties()));
304 void SetFinal(StateId state, Weight weight) {
305 const auto old_weight = BaseImpl::Final(state);
306 const auto properties =
307 SetFinalProperties(Properties(), old_weight, weight);
308 BaseImpl::SetFinal(state, std::move(weight));
309 SetProperties(properties);
313 const auto state = BaseImpl::AddState();
314 SetProperties(AddStateProperties(Properties()));
318 void AddArc(StateId state, const Arc &arc) {
319 auto *vstate = GetState(state);
320 const auto *parc = vstate->NumArcs() == 0
322 : &(vstate->GetArc(vstate->NumArcs() - 1));
323 SetProperties(AddArcProperties(Properties(), state, arc, parc));
324 BaseImpl::AddArc(state, arc);
327 void DeleteStates(const std::vector<StateId> &dstates) {
328 BaseImpl::DeleteStates(dstates);
329 SetProperties(DeleteStatesProperties(Properties()));
332 void DeleteStates() {
333 BaseImpl::DeleteStates();
334 SetProperties(DeleteAllStatesProperties(Properties(), kStaticProperties));
337 void DeleteArcs(StateId state, size_t n) {
338 BaseImpl::DeleteArcs(state, n);
339 SetProperties(DeleteArcsProperties(Properties()));
342 void DeleteArcs(StateId state) {
343 BaseImpl::DeleteArcs(state);
344 SetProperties(DeleteArcsProperties(Properties()));
347 // Properties always true of this FST class
348 static constexpr uint64 kStaticProperties = kExpanded | kMutable;
351 // Minimum file format version supported.
352 static constexpr int kMinFileVersion = 2;
356 constexpr uint64 VectorFstImpl<S>::kStaticProperties;
359 constexpr int VectorFstImpl<S>::kMinFileVersion;
362 VectorFstImpl<S>::VectorFstImpl(const Fst<Arc> &fst) {
364 SetInputSymbols(fst.InputSymbols());
365 SetOutputSymbols(fst.OutputSymbols());
366 BaseImpl::SetStart(fst.Start());
367 if (fst.Properties(kExpanded, false)) {
368 BaseImpl::ReserveStates(CountStates(fst));
370 for (StateIterator<Fst<Arc>> siter(fst); !siter.Done(); siter.Next()) {
371 const auto state = siter.Value();
372 BaseImpl::AddState();
373 BaseImpl::SetFinal(state, fst.Final(state));
374 ReserveArcs(state, fst.NumArcs(state));
375 for (ArcIterator<Fst<Arc>> aiter(fst, state); !aiter.Done(); aiter.Next()) {
376 const auto &arc = aiter.Value();
377 BaseImpl::AddArc(state, arc);
380 SetProperties(fst.Properties(kCopyProperties, false) | kStaticProperties);
384 VectorFstImpl<S> *VectorFstImpl<S>::Read(std::istream &strm,
385 const FstReadOptions &opts) {
386 std::unique_ptr<VectorFstImpl<S>> impl(new VectorFstImpl());
388 if (!impl->ReadHeader(strm, opts, kMinFileVersion, &hdr)) return nullptr;
389 impl->BaseImpl::SetStart(hdr.Start());
390 if (hdr.NumStates() != kNoStateId) impl->ReserveStates(hdr.NumStates());
392 for (; hdr.NumStates() == kNoStateId || state < hdr.NumStates(); ++state) {
394 if (!weight.Read(strm)) break;
395 impl->BaseImpl::AddState();
396 auto *vstate = impl->GetState(state);
397 vstate->SetFinal(weight);
399 ReadType(strm, &narcs);
401 LOG(ERROR) << "VectorFst::Read: Read failed: " << opts.source;
404 impl->ReserveArcs(state, narcs);
405 for (int64 i = 0; i < narcs; ++i) {
407 ReadType(strm, &arc.ilabel);
408 ReadType(strm, &arc.olabel);
409 arc.weight.Read(strm);
410 ReadType(strm, &arc.nextstate);
412 LOG(ERROR) << "VectorFst::Read: Read failed: " << opts.source;
415 impl->BaseImpl::AddArc(state, arc);
418 if (hdr.NumStates() != kNoStateId && state != hdr.NumStates()) {
419 LOG(ERROR) << "VectorFst::Read: Unexpected end of file: " << opts.source;
422 return impl.release();
425 } // namespace internal
427 // Simple concrete, mutable FST. This class attaches interface to implementation
428 // and handles reference counting, delegating most methods to ImplToMutableFst.
429 // Also supports ReserveStates and ReserveArcs methods (cf. STL vector methods).
430 // The second optional template argument gives the State definition.
431 template <class A, class S /* = VectorState<A> */>
432 class VectorFst : public ImplToMutableFst<internal::VectorFstImpl<S>> {
435 using StateId = typename Arc::StateId;
438 using Impl = internal::VectorFstImpl<State>;
440 friend class StateIterator<VectorFst<Arc, State>>;
441 friend class ArcIterator<VectorFst<Arc, State>>;
442 friend class MutableArcIterator<VectorFst<A, S>>;
444 template <class F, class G>
445 friend void Cast(const F &, G *);
447 VectorFst() : ImplToMutableFst<Impl>(std::make_shared<Impl>()) {}
449 explicit VectorFst(const Fst<Arc> &fst)
450 : ImplToMutableFst<Impl>(std::make_shared<Impl>(fst)) {}
452 VectorFst(const VectorFst<Arc, State> &fst, bool safe = false)
453 : ImplToMutableFst<Impl>(fst) {}
455 // Get a copy of this VectorFst. See Fst<>::Copy() for further doc.
456 VectorFst<Arc, State> *Copy(bool safe = false) const override {
457 return new VectorFst<Arc, State>(*this, safe);
460 VectorFst<Arc, State> &operator=(const VectorFst<Arc, State> &fst) {
461 SetImpl(fst.GetSharedImpl());
465 VectorFst<Arc, State> &operator=(const Fst<Arc> &fst) override {
466 if (this != &fst) SetImpl(std::make_shared<Impl>(fst));
470 // Reads a VectorFst from an input stream, returning nullptr on error.
471 static VectorFst<Arc, State> *Read(std::istream &strm,
472 const FstReadOptions &opts) {
473 auto *impl = Impl::Read(strm, opts);
474 return impl ? new VectorFst<Arc, State>(std::shared_ptr<Impl>(impl))
478 // Read a VectorFst from a file, returning nullptr on error; empty filename
479 // reads from standard input.
480 static VectorFst<Arc, State> *Read(const string &filename) {
481 auto *impl = ImplToExpandedFst<Impl, MutableFst<Arc>>::Read(filename);
482 return impl ? new VectorFst<Arc, State>(std::shared_ptr<Impl>(impl))
486 bool Write(std::ostream &strm, const FstWriteOptions &opts) const override {
487 return WriteFst(*this, strm, opts);
490 bool Write(const string &filename) const override {
491 return Fst<Arc>::WriteFile(filename);
495 static bool WriteFst(const FST &fst, std::ostream &strm,
496 const FstWriteOptions &opts);
498 void InitStateIterator(StateIteratorData<Arc> *data) const override {
499 GetImpl()->InitStateIterator(data);
502 void InitArcIterator(StateId s, ArcIteratorData<Arc> *data) const override {
503 GetImpl()->InitArcIterator(s, data);
506 inline void InitMutableArcIterator(StateId s,
507 MutableArcIteratorData<Arc> *) override;
509 using ImplToMutableFst<Impl, MutableFst<Arc>>::ReserveArcs;
510 using ImplToMutableFst<Impl, MutableFst<Arc>>::ReserveStates;
513 using ImplToMutableFst<Impl, MutableFst<Arc>>::GetImpl;
514 using ImplToMutableFst<Impl, MutableFst<Arc>>::MutateCheck;
515 using ImplToMutableFst<Impl, MutableFst<Arc>>::SetImpl;
517 explicit VectorFst(std::shared_ptr<Impl> impl)
518 : ImplToMutableFst<Impl>(impl) {}
521 // Writes FST to file in Vector format, potentially with a pass over the machine
522 // before writing to compute number of states.
523 template <class Arc, class State>
525 bool VectorFst<Arc, State>::WriteFst(const FST &fst, std::ostream &strm,
526 const FstWriteOptions &opts) {
527 static constexpr int file_version = 2;
528 bool update_header = true;
530 hdr.SetStart(fst.Start());
531 hdr.SetNumStates(kNoStateId);
532 size_t start_offset = 0;
533 if (fst.Properties(kExpanded, false) || opts.stream_write ||
534 (start_offset = strm.tellp()) != -1) {
535 hdr.SetNumStates(CountStates(fst));
536 update_header = false;
538 const auto properties =
539 fst.Properties(kCopyProperties, false) | Impl::kStaticProperties;
540 internal::FstImpl<Arc>::WriteFstHeader(fst, strm, opts, file_version,
541 "vector", properties, &hdr);
542 StateId num_states = 0;
543 for (StateIterator<FST> siter(fst); !siter.Done(); siter.Next()) {
544 const auto s = siter.Value();
545 fst.Final(s).Write(strm);
546 const int64 narcs = fst.NumArcs(s);
547 WriteType(strm, narcs);
548 for (ArcIterator<FST> aiter(fst, s); !aiter.Done(); aiter.Next()) {
549 const auto &arc = aiter.Value();
550 WriteType(strm, arc.ilabel);
551 WriteType(strm, arc.olabel);
552 arc.weight.Write(strm);
553 WriteType(strm, arc.nextstate);
559 LOG(ERROR) << "VectorFst::Write: Write failed: " << opts.source;
563 hdr.SetNumStates(num_states);
564 return internal::FstImpl<Arc>::UpdateFstHeader(
565 fst, strm, opts, file_version, "vector", properties, &hdr,
568 if (num_states != hdr.NumStates()) {
569 LOG(ERROR) << "Inconsistent number of states observed during write";
576 // Specialization for VectorFst; see generic version in fst.h for sample usage
577 // (but use the VectorFst type instead). This version should inline.
578 template <class Arc, class State>
579 class StateIterator<VectorFst<Arc, State>> {
581 using StateId = typename Arc::StateId;
583 explicit StateIterator(const VectorFst<Arc, State> &fst)
584 : nstates_(fst.GetImpl()->NumStates()), s_(0) {}
586 bool Done() const { return s_ >= nstates_; }
588 StateId Value() const { return s_; }
590 void Next() { ++s_; }
592 void Reset() { s_ = 0; }
595 const StateId nstates_;
599 // Specialization for VectorFst; see generic version in fst.h for sample usage
600 // (but use the VectorFst type instead). This version should inline.
601 template <class Arc, class State>
602 class ArcIterator<VectorFst<Arc, State>> {
604 using StateId = typename Arc::StateId;
606 ArcIterator(const VectorFst<Arc, State> &fst, StateId s)
607 : arcs_(fst.GetImpl()->GetState(s)->Arcs()),
608 narcs_(fst.GetImpl()->GetState(s)->NumArcs()),
611 bool Done() const { return i_ >= narcs_; }
613 const Arc &Value() const { return arcs_[i_]; }
615 void Next() { ++i_; }
617 void Reset() { i_ = 0; }
619 void Seek(size_t a) { i_ = a; }
621 size_t Position() const { return i_; }
623 constexpr uint32 Flags() const { return kArcValueFlags; }
625 void SetFlags(uint32, uint32) {}
633 // Specialization for VectorFst; see generic version in mutable-fst.h for sample
634 // usage (but use the VectorFst type instead). This version should inline.
635 template <class Arc, class State>
636 class MutableArcIterator<VectorFst<Arc, State>>
637 : public MutableArcIteratorBase<Arc> {
639 using StateId = typename Arc::StateId;
640 using Weight = typename Arc::Weight;
642 MutableArcIterator(VectorFst<Arc, State> *fst, StateId s) : i_(0) {
644 state_ = fst->GetMutableImpl()->GetState(s);
645 properties_ = &fst->GetImpl()->properties_;
648 bool Done() const final { return i_ >= state_->NumArcs(); }
650 const Arc &Value() const final { return state_->GetArc(i_); }
652 void Next() final { ++i_; }
654 size_t Position() const final { return i_; }
656 void Reset() final { i_ = 0; }
658 void Seek(size_t a) final { i_ = a; }
660 void SetValue(const Arc &arc) final {
661 const auto &oarc = state_->GetArc(i_);
662 if (oarc.ilabel != oarc.olabel) *properties_ &= ~kNotAcceptor;
663 if (oarc.ilabel == 0) {
664 *properties_ &= ~kIEpsilons;
665 if (oarc.olabel == 0) *properties_ &= ~kEpsilons;
667 if (oarc.olabel == 0) *properties_ &= ~kOEpsilons;
668 if (oarc.weight != Weight::Zero() && oarc.weight != Weight::One()) {
669 *properties_ &= ~kWeighted;
671 state_->SetArc(arc, i_);
672 if (arc.ilabel != arc.olabel) {
673 *properties_ |= kNotAcceptor;
674 *properties_ &= ~kAcceptor;
676 if (arc.ilabel == 0) {
677 *properties_ |= kIEpsilons;
678 *properties_ &= ~kNoIEpsilons;
679 if (arc.olabel == 0) {
680 *properties_ |= kEpsilons;
681 *properties_ &= ~kNoEpsilons;
684 if (arc.olabel == 0) {
685 *properties_ |= kOEpsilons;
686 *properties_ &= ~kNoOEpsilons;
688 if (arc.weight != Weight::Zero() && arc.weight != Weight::One()) {
689 *properties_ |= kWeighted;
690 *properties_ &= ~kUnweighted;
692 *properties_ &= kSetArcProperties | kAcceptor | kNotAcceptor | kEpsilons |
693 kNoEpsilons | kIEpsilons | kNoIEpsilons | kOEpsilons |
694 kNoOEpsilons | kWeighted | kUnweighted;
697 uint32 Flags() const final { return kArcValueFlags; }
699 void SetFlags(uint32, uint32) final {}
707 // Provides information needed for the generic mutable arc iterator.
708 template <class Arc, class State>
709 inline void VectorFst<Arc, State>::InitMutableArcIterator(
710 StateId s, MutableArcIteratorData<Arc> *data) {
711 data->base = new MutableArcIterator<VectorFst<Arc, State>>(this, s);
714 // A useful alias when using StdArc.
715 using StdVectorFst = VectorFst<StdArc>;
719 #endif // FST_VECTOR_FST_H_