1 // See www.openfst.org for extensive documentation on this weighted
2 // finite-state transducer library.
10 #include <fst/flags.h>
13 // Include these for registration.
14 #include <fst/compact-fst.h>
15 #include <fst/const-fst.h>
16 #include <fst/edit-fst.h>
17 #include <fst/matcher-fst.h>
18 #include <fst/vector-fst.h>
20 // FST flag definitions.
22 DEFINE_bool(fst_verify_properties, false,
23 "Verify FST properties queried by TestProperties");
25 DEFINE_bool(fst_default_cache_gc, true, "Enable garbage collection of cache");
27 DEFINE_int64(fst_default_cache_gc_limit, 1 << 20LL,
28 "Cache byte size that triggers garbage collection");
30 DEFINE_bool(fst_align, false, "Write FST data aligned where appropriate");
32 DEFINE_string(save_relabel_ipairs, "", "Save input relabel pairs to file");
33 DEFINE_string(save_relabel_opairs, "", "Save output relabel pairs to file");
35 DEFINE_string(fst_read_mode, "read",
36 "Default file reading mode for mappable files");
40 // Registers VectorFst, ConstFst and EditFst for common arcs types.
41 REGISTER_FST(VectorFst, StdArc);
42 REGISTER_FST(VectorFst, LogArc);
43 REGISTER_FST(VectorFst, Log64Arc);
44 REGISTER_FST(ConstFst, StdArc);
45 REGISTER_FST(ConstFst, LogArc);
46 REGISTER_FST(ConstFst, Log64Arc);
47 REGISTER_FST(EditFst, StdArc);
48 REGISTER_FST(EditFst, LogArc);
49 REGISTER_FST(EditFst, Log64Arc);
51 // Register CompactFst for common arcs with the default (uint32) size type
52 REGISTER_FST(CompactStringFst, StdArc);
53 REGISTER_FST(CompactStringFst, LogArc);
54 REGISTER_FST(CompactWeightedStringFst, StdArc);
55 REGISTER_FST(CompactWeightedStringFst, LogArc);
56 REGISTER_FST(CompactAcceptorFst, StdArc);
57 REGISTER_FST(CompactAcceptorFst, LogArc);
58 REGISTER_FST(CompactUnweightedFst, StdArc);
59 REGISTER_FST(CompactUnweightedFst, LogArc);
60 REGISTER_FST(CompactUnweightedAcceptorFst, StdArc);
61 REGISTER_FST(CompactUnweightedAcceptorFst, LogArc);
63 // FST type definitions for lookahead FSTs.
64 const char arc_lookahead_fst_type[] = "arc_lookahead";
65 const char ilabel_lookahead_fst_type[] = "ilabel_lookahead";
66 const char olabel_lookahead_fst_type[] = "olabel_lookahead";
68 // Identifies stream data as an FST (and its endianity).
69 constexpr int32 kFstMagicNumber = 2125659606;
71 // Checks for FST magic number in stream, to indicate caller function that the
72 // stream content is an FST header.
73 bool IsFstHeader(std::istream &strm, const string &source) {
74 int64 pos = strm.tellg();
76 int32 magic_number = 0;
77 ReadType(strm, &magic_number);
78 if (magic_number != kFstMagicNumber) {
85 // Checks FST magic number and reads in the header; if rewind = true,
86 // the stream is repositioned before call if possible.
87 bool FstHeader::Read(std::istream &strm, const string &source, bool rewind) {
89 if (rewind) pos = strm.tellg();
90 int32 magic_number = 0;
91 ReadType(strm, &magic_number);
92 if (magic_number != kFstMagicNumber) {
93 LOG(ERROR) << "FstHeader::Read: Bad FST header: " << source;
94 if (rewind) strm.seekg(pos);
97 ReadType(strm, &fsttype_);
98 ReadType(strm, &arctype_);
99 ReadType(strm, &version_);
100 ReadType(strm, &flags_);
101 ReadType(strm, &properties_);
102 ReadType(strm, &start_);
103 ReadType(strm, &numstates_);
104 ReadType(strm, &numarcs_);
106 LOG(ERROR) << "FstHeader::Read: Read failed: " << source;
109 if (rewind) strm.seekg(pos);
113 // Writes FST magic number and FST header.
114 bool FstHeader::Write(std::ostream &strm, const string &source) const {
115 WriteType(strm, kFstMagicNumber);
116 WriteType(strm, fsttype_);
117 WriteType(strm, arctype_);
118 WriteType(strm, version_);
119 WriteType(strm, flags_);
120 WriteType(strm, properties_);
121 WriteType(strm, start_);
122 WriteType(strm, numstates_);
123 WriteType(strm, numarcs_);
127 string FstHeader::DebugString() const {
128 std::ostringstream ostrm;
129 ostrm << "fsttype: \"" << fsttype_ << "\" arctype: \"" << arctype_
130 << "\" version: \"" << version_ << "\" flags: \"" << flags_
131 << "\" properties: \"" << properties_ << "\" start: \"" << start_
132 << "\" numstates: \"" << numstates_ << "\" numarcs: \"" << numarcs_
137 FstReadOptions::FstReadOptions(const string &source, const FstHeader *header,
138 const SymbolTable *isymbols,
139 const SymbolTable *osymbols)
145 read_osymbols(true) {
146 mode = ReadMode(FLAGS_fst_read_mode);
149 FstReadOptions::FstReadOptions(const string &source,
150 const SymbolTable *isymbols,
151 const SymbolTable *osymbols)
157 read_osymbols(true) {
158 mode = ReadMode(FLAGS_fst_read_mode);
161 FstReadOptions::FileReadMode FstReadOptions::ReadMode(const string &mode) {
162 if (mode == "read") return READ;
163 if (mode == "map") return MAP;
164 LOG(ERROR) << "Unknown file read mode " << mode;
168 string FstReadOptions::DebugString() const {
169 std::ostringstream ostrm;
170 ostrm << "source: \"" << source << "\" mode: \""
171 << (mode == READ ? "READ" : "MAP") << "\" read_isymbols: \""
172 << (read_isymbols ? "true" : "false") << "\" read_osymbols: \""
173 << (read_osymbols ? "true" : "false") << "\" header: \""
174 << (header ? "set" : "null") << "\" isymbols: \""
175 << (isymbols ? "set" : "null") << "\" osymbols: \""
176 << (osymbols ? "set" : "null") << "\"";