1 #cython: nonecheck=True
2 # See www.openfst.org for extensive documentation on this weighted
3 # finite-state transducer library.
6 """Python interface to the FST scripting API.
8 Operations which construct new FSTs are implemented as traditional functions, as
9 are two-argument boolean functions like `equal` and `equivalent`. Destructive
10 operations---those that mutate an FST, in place---are instance methods, as is
11 `write`. Operator overloading is not used. The following example, based on
12 Mohri et al. 2002, shows the construction of an ASR system given a pronunciation
13 lexicon L, grammar G, a transducer from context-dependent phones to
14 context-independent phones C, and an HMM set H:
16 L = fst.Fst.read("L.fst")
17 G = fst.Fst.read("G.fst")
18 C = fst.Fst.read("C.fst")
19 H = fst.Fst.read("H.fst")
20 LG = fst.determinize(fst.compose(L, G))
21 CLG = fst.determinize(fst.compose(C, LG))
22 HCLG = fst.determinize(fst.compose(H, CLG))
23 HCLG.minimize() # NB: works in-place.
25 Python variables here use snake_case and constants are in all caps, minus the
29 # Overview of the file:
34 # * Weight and helpers
35 # * _SymbolTable, _EncodeMapperSymbolTable, _FstSymbolTable,
36 # _MutableFstSymbolTable, SymbolTable, and helpers
37 # * SymbolTableIterator
39 # * _Fst, _MutableFst, Fst, and helpers
41 # * Arc, ArcIterator, and MutableArcIterator
45 # * FarReader and FarWriter
46 # * Cleanup operations for module entrance and exit.
48 # TODO(kbg): Try breaking this apart into smaller pieces.
50 # A few of the more idiosyncratic choices made here are due to "impedance
51 # mismatches" between C++ and Python, as follows.
53 # Another issue is that due to differences in C++ and Python scope rules, most
54 # C++ class instances have to be heap-allocated. Since all are packed into
55 # Python class instances, Python destructors are used to semi-automatically
56 # free C++ instances. The one exception are the various `...Options` structs.
57 # All that is included here are the constructors; there is no need to include
58 # the names of the struct members. Cython does not draw any meaningful
59 # distinction between structs and C++ classes, so these look just like class
62 # Cython's type annotations (e.g., `string`) are used when the variables will
63 # be sent as arguments to C++ functions, but are not used for variables used
66 # Internal functions which may raise a Python error do not have a C++ return
67 # type simply because this leads the C++ compiler to think that the resulting
68 # value could be used before it is populated.
74 from libc.stdint cimport INT32_MAX
75 from libc.stdint cimport SIZE_MAX
76 from posix.unistd cimport getpid
79 from libcpp cimport bool
80 from libcpp.cast cimport const_cast
81 from libcpp.cast cimport static_cast
84 from ios cimport ofstream
85 from memory cimport static_pointer_cast
87 # Cython operator workarounds.
88 from cython.operator cimport address as addr # &foo
89 from cython.operator cimport dereference as deref # *foo
90 from cython.operator cimport preincrement as inc # ++foo
99 # TODO(kbg): Figure out how to access static class variables so I don't have
104 ## Custom exceptions.
107 class FstError(Exception):
112 class FstArgError(FstError, ValueError):
117 class FstBadWeightError(FstError, ValueError):
122 class FstDeletedConstructorError(FstError, RuntimeError):
127 class FstIndexError(FstError, IndexError):
132 class FstIOError(FstError, IOError):
137 class FstOpError(FstError, RuntimeError):
145 cdef string tostring(data, encoding="utf8") except *:
146 """Converts strings to bytestrings.
148 This function converts Python bytestrings and Unicode strings to bytestrings
149 encoded in UTF-8. It is used to process most Python string arguments before
150 passing them to the lower-level library.
153 data: A Unicode string or bytestring.
154 encoding: The desired encoding, defaulting to UTF-8.
160 FstArgError: Cannot encode string.
163 This function is not visible to Python users.
165 # A Python bytestring can be implicitly cast to a C++ string.
166 if isinstance(data, bytes):
168 elif isinstance(data, unicode):
169 return data.encode(encoding)
170 raise FstArgError("Cannot encode as string: {!r}".format(data))
173 cdef string weighttostring(data, encoding="utf8") except *:
174 """Converts strings or numerics to bytestrings.
176 This function converts Python bytestrings, Unicode strings, and numerics
177 which can be cast to floats to bytestrings encoded in UTF-8. It is used to
178 process Python string arguments so they can be used to construct Weight
179 objects. In most cases, weights are underlyingly floating-point, but since
180 not all weights are, they can only be constructed using a string.
183 data: A Unicode string, bytestring, or type which can be converted to a
190 FstArgError: Cannot encode string.
191 ValueError: Invalid literal for float.
194 This function is not visible to Python users.
196 # A Python bytestring can be implicitly cast to a C++ string.
197 if isinstance(data, bytes):
199 elif isinstance(data, unicode):
200 return data.encode(encoding)
201 elif isinstance(data, numbers.Number):
202 return str(data).encode(encoding)
203 raise FstArgError("Cannot encode as string: {!r}".format(data))
206 cdef fst.ComposeFilter _get_compose_filter(
207 const string &compose_filter) except *:
208 """Matches string with the appropriate ComposeFilter enum value.
210 This function takes a string argument and returns the matching ComposeFilter
211 enum value used to initialize ComposeOptions instances. ComposeOptions is used
212 by difference and intersection in addition to composition.
215 compose_filter: A string matching a known composition filter; one of:
216 "alt_sequence", "auto", "match", "null", "sequence", "trivial".
219 A ComposeFilter enum value.
222 FstArgError: Unknown compose filter type.
224 This function is not visible to Python users.
226 cdef fst.ComposeFilter compose_filter_enum
227 if not fst.GetComposeFilter(compose_filter, addr(compose_filter_enum)):
228 raise FstArgError("Unknown compose filter type: {!r}".format(
230 return compose_filter_enum
233 cdef fst.DeterminizeType _get_determinize_type(const string &det_type) except *:
234 """Matches string with the appropriate DeterminizeType enum value.
237 det_type: A string matching a known determinization type; one of:
238 "functional", "nonfunctional", "disambiguate".
241 A DeterminizeType enum value.
244 FstArgError: Unknown determinization type.
246 This function is not visible to Python users.
248 cdef fst.DeterminizeType det_type_enum
249 if not fst.GetDeterminizeType(det_type, addr(det_type_enum)):
250 raise FstArgError("Unknown determinization type: {!r}".format(det_type))
254 cdef fst.QueueType _get_queue_type(const string &queue_type) except *:
255 """Matches string with the appropriate QueueType enum value.
257 This function takes a string argument and returns the matching QueueType enum
258 value passed to the RmEpsilonOptions constructor.
261 queue_type: A string matching a known queue type; one of: "auto", "fifo",
262 "lifo", "shortest", "state", "top".
265 A QueueType enum value.
268 FstArgError: Unknown queue type.
270 This function is not visible to Python users.
272 cdef fst.QueueType queue_type_enum
273 if not fst.GetQueueType(queue_type, addr(queue_type_enum)):
274 raise FstArgError("Unknown queue type: {!r}".format(queue_type))
275 return queue_type_enum
278 cdef fst.RandArcSelection _get_rand_arc_selection(
279 const string &select) except *:
280 """Matches string with the appropriate RandArcSelection enum value.
282 This function takes a string argument and returns the matching
283 RandArcSelection enum value passed to the RandGenOptions constructor.
286 select: A string matching a known random arc selection type; one of:
287 "uniform", "log_prob", "fast_log_prob".
290 A RandArcSelection enum value.
293 FstArgError: Unknown random arc selection type.
295 This function is not visible to Python users.
297 cdef fst.RandArcSelection select_enum
298 if not fst.GetRandArcSelection(select, addr(select_enum)):
299 raise FstArgError("Unknown random arc selection type: {!r}".format(select))
303 cdef fst.ReplaceLabelType _get_replace_label_type(
304 const string &replace_label_type, bool epsilon_on_replace) except *:
305 """Matches string with the appropriate ReplaceLabelType enum value.
307 This function takes a string argument and returns the matching
308 ReplaceLabelType enum value passed to the ReplaceOptions constructor.
311 replace_label_type: A string matching a known replace label type; one of:
312 "neither", "input", "output", "both".
313 epsilon_on_replace: Should call/return arcs be epsilon arcs?
316 A ReplaceLabelType enum value.
319 FstArgError: Unknown replace label type.
321 This function is not visible to Python users.
323 cdef fst.ReplaceLabelType replace_label_type_enum
324 if not fst.GetReplaceLabelType(replace_label_type, epsilon_on_replace,
325 addr(replace_label_type_enum)):
326 raise FstArgError("Unknown replace label type: {!r}".format(
328 return replace_label_type_enum
331 ## Weight and helpers.
334 cdef class Weight(object):
337 Weight(weight_type, weight_string)
341 This class represents an FST weight. When passed as an argument to an FST
342 operation, it should have the weight type of the input FST(s) to said
346 weight_type: A string indicating the weight type.
347 weight_string: A string indicating the underlying weight.
350 FstArgError: Weight type not found.
351 FstBadWeightError: Invalid weight.
355 return "<{} Weight {} at 0x{:x}>".format(self.type(), self.to_string(),
359 return self.to_string()
361 # This attempts to convert the string form into a float, raising
362 # ValueError when that is not appropriate.
365 return float(self.to_string())
367 def __init__(self, weight_type, weight):
368 self._weight.reset(new fst.WeightClass(tostring(weight_type),
369 weighttostring(weight)))
372 cdef void _check_weight(self) except *:
373 if self.type() == b"none":
374 raise FstArgError("Weight type not found")
375 if self.to_string() == b"BadNumber":
376 raise FstBadWeightError("Invalid weight")
378 cpdef Weight copy(self):
382 Returns a copy of the Weight.
384 cdef Weight result = Weight.__new__(Weight)
385 result._weight.reset(new
386 fst.WeightClass(<fst.WeightClass> deref(self._weight)))
389 # To get around the inability to declare cdef class methods, we define the
390 # C++ part out-of-class and then call it from within.
393 def Zero(cls, weight_type):
395 Weight.Zero(weight_type)
397 Constructs semiring zero.
399 return _Weight_Zero(weight_type)
402 def One(cls, weight_type):
404 Weight.One(weight_type)
406 Constructs semiring One.
408 return _Weight_One(weight_type)
411 def NoWeight(cls, weight_type):
413 Weight.NoWeight(weight_type)
415 Constructs a non-member weight in the semiring.
417 return _Weight_NoWeight(weight_type)
419 def __richcmp__(Weight w1, Weight w2, int op):
420 # TODO(kbg): Replace this with __eq__ once Cython 0.27 is widely available.
422 return fst.Eq(deref(w1._weight), deref(w2._weight))
424 return fst.Ne(deref(w1._weight), deref(w2._weight))
426 raise NotImplementedError("Invalid operator {!r}".format(op))
428 cpdef string to_string(self):
429 return self._weight.get().ToString()
431 cpdef string type(self):
434 Returns a string indicating the weight type.
436 return self._weight.get().Type()
439 cdef Weight _plus(Weight lhs, Weight rhs):
440 cdef Weight result = Weight.__new__(Weight)
441 result._weight.reset(new fst.WeightClass(fst.Plus(deref(lhs._weight),
442 deref(rhs._weight))))
446 def plus(Weight lhs, Weight rhs):
450 Computes the sum of two Weights in the same semiring.
452 This function computes lhs \oplus rhs, raising an exception if lhs and rhs
453 are not in the same semiring.
456 lhs: Left-hand side Weight.
457 rhs: Right-hand side Weight.
463 FstArgError: Weight type not found (or not in same semiring).
464 FstBadWeightError: invalid weight.
466 cdef Weight result = _plus(lhs, rhs)
467 result._check_weight()
471 cdef Weight _times(Weight lhs, Weight rhs):
472 cdef Weight result = Weight.__new__(Weight)
473 result._weight.reset(new fst.WeightClass(fst.Times(deref(lhs._weight),
474 deref(rhs._weight))))
478 def times(Weight lhs, Weight rhs):
482 Computes the product of two Weights in the same semiring.
484 This function computes lhs \otimes rhs, raising an exception if lhs and rhs
485 are not in the same semiring.
488 lhs: Left-hand side Weight.
489 rhs: Right-hand side Weight.
495 FstArgError: Weight type not found (or not in same semiring).
496 FstBadWeightError: Invalid weight.
498 cdef Weight result = _times(lhs, rhs)
499 result._check_weight()
503 cdef Weight _divide(Weight lhs, Weight rhs):
504 cdef Weight result = Weight.__new__(Weight)
505 result._weight.reset(new fst.WeightClass(fst.Divide(deref(lhs._weight),
506 deref(rhs._weight))))
510 def divide(Weight lhs, Weight rhs):
514 Computes the quotient of two Weights in the same semiring.
516 This function computes lhs \oslash rhs, raising an exception if lhs and rhs
517 are not in the same semiring. As there is no way to specify whether to use
518 left vs. right division, this assumes a commutative semiring in which these
519 are equivalent operations.
522 lhs: Left-hand side Weight.
523 rhs: Right-hand side Weight.
529 FstArgError: Weight type not found (or not in same semiring).
530 FstBadWeightError: Invalid weight.
532 cdef Weight result = _divide(lhs, rhs)
533 result._check_weight()
537 cdef Weight _power(Weight w, size_t n):
538 cdef Weight result = Weight.__new__(Weight)
539 result._weight.reset(new fst.WeightClass(fst.Power(deref(w._weight), n)))
543 def power(Weight w, size_t n):
547 Computes the iterated product of a weight.
557 FstArgError: Weight type not found (or not in same semiring).
558 FstBadWeightError: Invalid weight.
560 cdef Weight result = _power(w, n)
561 result._check_weight()
565 cdef fst.WeightClass _get_WeightClass_or_Zero(const string &weight_type,
567 """Converts weight string to a WeightClass.
569 This function constructs a WeightClass instance of the desired weight type.
570 If the first argument is null, the weight is set to semiring Zero.
573 weight_type: A string denoting the desired weight type.
574 weight: A object indicating the desired weight; if omitted, the weight is
575 set to semiring Zero.
578 A WeightClass object.
580 This function is not visible to Python users.
582 cdef fst.WeightClass result
584 result = fst.WeightClass.Zero(weight_type)
585 elif isinstance(weight, Weight):
586 result = deref(<fst.WeightClass *> (<Weight> weight)._weight.get())
588 result = fst.WeightClass(weight_type, weighttostring(weight))
589 if result.ToString() == b"BadNumber":
590 raise FstBadWeightError(weighttostring(weight))
594 cdef fst.WeightClass _get_WeightClass_or_One(const string &weight_type,
596 """Converts weight string to a WeightClass.
598 This function constructs a WeightClass instance of the desired weight type.
599 If the first argument is null, the weight is set to semiring One.
602 weight_type: A string denoting the desired weight type.
603 weight: A object indicating the desired weight; if omitted, the weight is
607 A WeightClass object.
609 This function is not visible to Python users.
611 cdef fst.WeightClass result
613 result = fst.WeightClass.One(weight_type)
614 elif isinstance(weight, Weight):
615 result = deref(<fst.WeightClass *> (<Weight> weight)._weight.get())
617 result = fst.WeightClass(weight_type, weighttostring(weight))
618 if result.ToString() == b"BadNumber":
619 raise FstBadWeightError(weighttostring(weight))
623 cdef Weight _Weight_Zero(weight_type):
624 cdef Weight result = Weight.__new__(Weight)
625 result._weight.reset(new fst.WeightClass(fst.WeightClass.Zero(
626 tostring(weight_type))))
627 if result._weight.get().Type() == b"none":
628 raise FstArgError("Weight type not found")
632 cdef Weight _Weight_One(weight_type):
633 cdef Weight result = Weight.__new__(Weight)
634 result._weight.reset(new fst.WeightClass(
635 fst.WeightClass.One(tostring(weight_type))))
636 if result._weight.get().Type() == b"none":
637 raise FstArgError("Weight type not found")
641 cdef Weight _Weight_NoWeight(weight_type):
642 cdef Weight result = Weight.__new__(Weight)
643 result._weight.reset(new fst.WeightClass(
644 fst.WeightClass.NoWeight(tostring(weight_type))))
648 ## _SymbolTable, _MutableSymbolTable, _EncodeMapperSymbolTable, _FstSymbolTable,
649 ## _MutableFstSymbolTable, SymbolTable, and helpers.
651 # SymbolTable hierarchy:
653 # _SymbolTable: abstract base class; has-a SymbolTable*
654 # _EncodeMapperSymbolTable(_SymbolTable): constant symbol table returned by
655 # EncodeMapper.input_symbols/output_symbols
656 # _FstSymbolTable(_SymbolTable): constant symbol table returned by
657 # _Fst.input_symbols/output_symbols
659 # _MutableSymbolTable(_SymbolTable): abstract base class adding mutation methods
660 # _MutableFstSymbolTable(_MutableSymbolTable): mutable symbol table returned by
661 # _MutableFst.mutable_input_symbols/mutable_output_symbols
662 # SymbolTable(_MutableSymbolTable): adds constructor
665 cdef class _SymbolTable(object):
670 Base class for the symbol table hierarchy.
672 This class is the base class for SymbolTable. It has a "deleted" constructor
673 and implementations for the const methods of the wrapped SymbolTable.
676 # NB: Do not expose any non-const methods of the wrapped SymbolTable here.
677 # Doing so will allow undefined behavior.
680 raise FstDeletedConstructorError(
681 "Cannot construct {}".format(self.__class__.__name__))
684 return SymbolTableIterator(self)
686 cpdef int64 available_key(self):
690 Returns an integer indicating the next available key index in the table.
692 return self._table.AvailableKey()
694 cpdef string checksum(self):
698 Returns a string indicating the label-agnostic MD5 checksum for the table.
700 return self._table.CheckSum()
702 cpdef SymbolTable copy(self):
706 Returns a mutable copy of the SymbolTable.
708 return _init_SymbolTable(self._table.Copy())
714 Given a symbol or index, finds the other one.
716 This method returns the index associated with a symbol key, or the symbol
717 associated with a index key.
720 key: Either a string or an index.
723 If the key is a string, the associated index or NO_LABEL if not found; if
724 the key is an integer, the associated symbol or an empty string if
728 return self._table.FindIndex(tostring(key))
730 return self._table.FindSymbol(key)
732 cpdef int64 get_nth_key(self, ssize_t pos) except *:
734 get_nth_key(self, pos)
736 Retrieves the integer index of the n-th key in the table.
739 pos: The n-th key to retrieve.
742 The integer index of the n-th key, or NO_LABEL if not found.
744 return self._table.GetNthKey(pos)
746 cpdef string labeled_checksum(self):
748 labeled_checksum(self)
750 Returns a string indicating the label-dependent MD5 checksum for the table.
752 return self._table.LabeledCheckSum()
754 cpdef bool member(self, key):
758 Given a symbol or index, returns whether it is found in the table.
760 This method returns a boolean indicating whether the given symbol or index
761 is present in the table. If one intends to perform subsequent lookup, it is
762 better to simply call the find method, catching the KeyError.
765 key: Either a string or an index.
768 Whether or not the key is present (as a string or a index) in the table.
771 return self._table.MemberSymbol(tostring(key))
773 return self._table.MemberIndex(key)
775 def __contains__(self, key):
776 return self.member(key)
778 cpdef string name(self):
782 Returns the symbol table's name.
784 return self._table.Name()
786 cpdef size_t num_symbols(self):
790 Returns the number of symbols in the symbol table.
792 return self._table.NumSymbols()
794 cpdef void write(self, filename) except *:
796 write(self, filename)
798 Serializes symbol table to a file.
800 This methods writes the SymbolTable to a file in binary format.
803 filename: The string location of the output file.
806 FstIOError: Write failed.
808 if not self._table.Write(tostring(filename)):
809 raise FstIOError("Write failed: {!r}".format(filename))
811 cpdef void write_text(self, filename) except *:
813 write_text(self, filename)
815 Writes symbol table to text file.
817 This method writes the SymbolTable to a file in human-readable format.
820 filename: The string location of the output file.
823 FstIOError: Write failed.
825 if not self._table.WriteText(tostring(filename)):
826 raise FstIOError("Write failed: {!r}".format(filename))
829 cdef class _EncodeMapperSymbolTable(_SymbolTable):
834 Immutable SymbolTable class for tables stored in an EncodeMapper.
836 This class wraps a library const SymbolTable and exposes const methods of the
837 wrapped object. It is only to be returned by method, never constructed
841 # NB: Do not expose any non-const methods of the wrapped SymbolTable here.
842 # Doing so will allow undefined behavior.
845 return "<const EncodeMapper SymbolTable {!r} at 0x{:x}>".format(self.name(),
849 cdef class _FstSymbolTable(_SymbolTable):
854 Mutable SymbolTable class for tables stored in a mutable FST.
856 This class wraps a library SymbolTable and exposes methods of the wrapped
857 object. It is only to be returned by method, never constructed directly.
860 # NB: Do not expose any non-const methods of the wrapped SymbolTable here.
861 # Doing so will allow undefined behavior.
864 return "<const Fst SymbolTable {!r} at 0x{:x}>".format(self.name(),
868 cdef class _MutableSymbolTable(_SymbolTable):
873 Base class for mutable symbol tables.
875 This class is the base class for a mutable SymbolTable. It has a "deleted"
876 constructor and implementations of all methods of the wrapped SymbolTable.
879 cpdef int64 add_symbol(self, symbol, int64 key=kNoSymbol):
881 add_symbol(self, symbol, key=NO_SYMBOL)
883 Adds a symbol to the table and returns the index.
885 This method adds a symbol to the table. The caller can optionally
886 specify a non-negative integer index for the key.
889 symbol: A symbol string.
890 key: An index for the symbol; if not specified, the next index will be
894 The integer key of the new symbol.
896 cdef string symbol_string = tostring(symbol)
898 return self._table.AddSymbol(symbol_string, key)
900 return self._table.AddSymbol(symbol_string)
902 cpdef void add_table(self, _SymbolTable syms):
904 add_table(self, syms)
906 Adds another SymbolTable to this table.
908 This method merges another symbol table into the current table. All key
909 values will be offset by the current available key.
912 syms: A SymbolTable to be merged with the current table.
914 self._table.AddTable(deref(syms._table))
916 cpdef void set_name(self, new_name) except *:
917 self._table.SetName(tostring(new_name))
920 cdef class _MutableFstSymbolTable(_MutableSymbolTable):
924 Mutable SymbolTable assigned to an FST.
928 return "<Fst SymbolTable {!r} at 0x{:x}>".format(self.name(), id(self))
931 cdef class SymbolTable(_MutableSymbolTable):
934 SymbolTable(name="<unspecified>")
936 Mutable SymbolTable class.
938 This class wraps the library SymbolTable and exposes both const (i.e.,
939 access) and non-const (i.e., mutation) methods of wrapped object.
941 Unlike other classes in the hierarchy, it has a working constructor and can be
942 used to programmatically construct a SymbolTable in memory.
945 name: An optional string indicating the table's name.
949 return "<SymbolTable {!r} at 0x{:x}>".format(self.name(), id(self))
951 def __init__(self, name=b"<unspecified>"):
952 self._table = new fst.SymbolTable(tostring(name))
953 self._smart_table.reset(self._table)
956 def read(cls, filename):
958 SymbolTable.read(filename)
960 Reads symbol table from binary file.
962 This class method creates a new SymbolTable from a symbol table binary file.
965 filename: The string location of the input binary file.
968 A new SymbolTable instance.
970 See also: `SymbolTable.read_fst`, `SymbolTable.read_text`.
972 cdef fst.SymbolTable *tsyms = fst.SymbolTable.Read(tostring(filename))
974 raise FstIOError("Read failed: {!r}".format(filename))
975 return _init_SymbolTable(tsyms)
978 def read_text(cls, filename, bool allow_negative_labels=False):
980 SymbolTable.read_text(filename)
982 Reads symbol table from text file.
984 This class method creates a new SymbolTable from a symbol table text file.
987 filename: The string location of the input text file.
988 allow_negative_labels: Should negative labels be allowed? (Not
989 recommended; may cause conflicts).
992 A new SymbolTable instance.
994 See also: `SymbolTable.read`, `SymbolTable.read_fst`.
996 cdef unique_ptr[fst.SymbolTableTextOptions] opts
997 opts.reset(new fst.SymbolTableTextOptions(allow_negative_labels))
998 cdef fst.SymbolTable *tsyms = fst.SymbolTable.ReadText(tostring(filename),
1001 raise FstIOError("Read failed: {!r}".format(filename))
1002 return _init_SymbolTable(tsyms)
1005 def read_fst(cls, filename, bool input_table):
1007 SymbolTable.read_fst(filename, input_table)
1009 Reads symbol table from an FST file without loading the corresponding FST.
1011 This class method creates a new SymbolTable by reading either the input or
1012 output symbol table from an FST file, without loading the corresponding FST.
1015 filename: The string location of the input FST file.
1016 input_table: Should the input table be read (True) or the output table
1020 A new SymbolTable instance, or None if none can be read.
1023 FstIOError: Read failed.
1025 See also: `SymbolTable.read`, `SymbolTable.read_text`.
1027 cdef fst.SymbolTable *tsyms = fst.FstReadSymbols(filename, input_table)
1029 raise FstIOError("Read failed: {!r}".format(filename))
1030 return _init_SymbolTable(tsyms)
1033 cdef _EncodeMapperSymbolTable _init_EncodeMapperSymbolTable(
1034 fst.SymbolTable *table, shared_ptr[fst.EncodeMapperClass] encoder):
1035 cdef _EncodeMapperSymbolTable result = (
1036 _EncodeMapperSymbolTable.__new__(_EncodeMapperSymbolTable))
1037 result._table = table
1038 result._encoder = encoder
1042 cdef _FstSymbolTable _init_FstSymbolTable(fst.SymbolTable *table,
1043 shared_ptr[fst.FstClass] ifst):
1044 cdef _FstSymbolTable result = _FstSymbolTable.__new__(_FstSymbolTable)
1045 result._table = table
1050 cdef _MutableFstSymbolTable _init_MutableFstSymbolTable(fst.SymbolTable *table,
1051 shared_ptr[fst.MutableFstClass] ifst):
1052 cdef _MutableFstSymbolTable result = (
1053 _MutableFstSymbolTable.__new__(_MutableFstSymbolTable))
1054 result._table = table
1059 cdef SymbolTable _init_SymbolTable(fst.SymbolTable *table):
1060 cdef SymbolTable result = SymbolTable.__new__(SymbolTable)
1061 result._table = table
1065 # Constructive SymbolTable operations.
1068 cpdef SymbolTable compact_symbol_table(_SymbolTable syms):
1070 compact_symbol_table(syms)
1072 Constructively relabels a SymbolTable to make it a contiguous mapping.
1075 syms: Input SymbolTable.
1078 A new compacted SymbolTable.
1080 return _init_SymbolTable(fst.CompactSymbolTable(deref(syms._table)))
1083 cpdef SymbolTable merge_symbol_table(_SymbolTable lhs, _SymbolTable rhs):
1085 merge_symbol_table(lhs, rhs)
1087 Merges all symbols from the left table into the right.
1089 This function creates a new SymbolTable which is the merger of the two input
1090 symbol Tables. Symbols in the right-hand table that conflict with those in the
1091 left-hand table will be assigned values from the left-hand table. Thus the
1092 returned table will never modify symbol assignments from the left-hand side,
1093 but may do so on the right.
1095 If the left-hand table is associated with an FST, it may be necessary to
1096 relabel it using the output table.
1099 lhs: Left-hand side SymbolTable.
1100 rhs: Left-hand side SymbolTable.
1103 A new merged SymbolTable.
1105 See also: `relabel_symbols`.
1107 return _init_SymbolTable(fst.MergeSymbolTable(deref(lhs._table),
1108 deref(rhs._table), NULL))
1111 ## SymbolTableIterator.
1114 cdef class SymbolTableIterator(object):
1117 SymbolTableIterator(syms)
1119 This class is used for iterating over a symbol table.
1123 return "<SymbolTableIterator at 0x{:x}>".format(id(self))
1125 def __init__(self, _SymbolTable syms):
1126 self._siter.reset(new fst.SymbolTableIterator(deref(syms._table)))
1128 # This just registers this class as a possible iterator.
1132 # Magic method used to get a Pythonic API out of the C++ API.
1136 cdef int64 value = self.value()
1137 cdef string symbol = self.symbol()
1139 return (value, symbol)
1141 cpdef bool done(self):
1145 Indicates whether the iterator is exhausted or not.
1148 True if the iterator is exhausted, False otherwise.
1150 return self._siter.get().Done()
1152 cpdef void next(self):
1156 Advances the iterator.
1158 self._siter.get().Next()
1160 cpdef void reset(self):
1164 Resets the iterator to the initial position.
1166 self._siter.get().Reset()
1168 cpdef string symbol(self):
1172 Returns the current symbol string.
1174 This method returns the current symbol string at this point in the table.
1179 return self._siter.get().Symbol()
1181 cpdef int64 value(self):
1185 Returns the current integer index of the symbol.
1190 return self._siter.get().Value()
1196 cdef class EncodeMapper(object):
1199 EncodeMapper(arc_type="standard", encode_labels=False, encode_weights=False)
1201 Arc encoder class, wrapping EncodeMapperClass.
1203 This class provides an object which can be used to encode or decode FST arcs.
1204 This is most useful to convert an FST to an unweighted acceptor, on which
1205 some FST operations are more efficient, and then decoding the FST afterwards.
1207 To use an instance of this class to encode or decode a mutable FST, pass it
1208 as the first argument to the FST instance methods `encode` and `decode`.
1210 For implementational reasons, it is not currently possible to use an encoder
1211 on disk to construct this class.
1214 arc_type: A string indicating the arc type.
1215 encode_labels: Should labels be encoded?
1216 encode_weights: Should weights be encoded?
1220 return "<EncodeMapper at 0x{:x}>".format(id(self))
1223 arc_type=b"standard",
1224 bool encode_labels=False,
1225 bool encode_weights=False):
1226 cdef uint32 flags = fst.GetEncodeFlags(encode_labels, encode_weights)
1227 self._encoder.reset(new fst.EncodeMapperClass(tostring(arc_type), flags,
1229 if not self._encoder:
1230 raise FstOpError("Unknown arc type: {!r}".format(arc_type))
1232 cpdef string arc_type(self):
1236 Returns a string indicating the arc type.
1238 return self._encoder.get().ArcType()
1240 # Python's equivalent to operator().
1242 def __call__(self, Arc arc):
1244 self(state, ilabel, olabel, weight, nextstate)
1246 Uses the encoder to encode an arc.
1249 ilabel: The integer index of the input label.
1250 olabel: The integer index of the output label.
1251 weight: A Weight or weight string indicating the desired final weight; if
1252 null, it is set to semiring One.
1253 nextstate: The integer index of the destination state.
1256 FstOpError: Incompatible or invalid weight.
1258 return _init_Arc(self._encoder.get().__call__(deref(arc._arc)))
1260 cpdef uint32 flags(self):
1264 Returns the encoder's flags.
1266 return self._encoder.get().Flags()
1268 cpdef _EncodeMapperSymbolTable input_symbols(self):
1272 Returns the encoder's input symbol table, or None if none is present.
1274 cdef fst.SymbolTable *syms = const_cast[SymbolTable_ptr](
1275 self._encoder.get().InputSymbols())
1278 return _init_EncodeMapperSymbolTable(syms, self._encoder)
1280 cpdef _EncodeMapperSymbolTable output_symbols(self):
1282 output_symbols(self)
1284 Returns the encoder's output symbol table, or None if none is present.
1286 cdef fst.SymbolTable *syms = const_cast[SymbolTable_ptr](
1287 self._encoder.get().OutputSymbols())
1290 return _init_EncodeMapperSymbolTable(syms, self._encoder)
1292 cpdef uint64 properties(self, uint64 mask):
1294 properties(self, mask)
1296 Provides property bits.
1298 This method provides user access to the properties of the encoder.
1301 mask: The property mask to be compared to the encoder's properties.
1304 A 64-bit bitmask representing the requested properties.
1306 return self._encoder.get().Properties(mask)
1308 cpdef void set_input_symbols(self, _SymbolTable syms) except *:
1310 set_input_symbols(self, syms)
1312 Sets the encoder's input symbol table.
1315 syms: A SymbolTable.
1317 See also: `set_output_symbols`.
1319 self._encoder.get().SetInputSymbols(syms._table)
1321 cpdef void set_output_symbols(self, _SymbolTable syms) except *:
1323 set_output_symbols(self, syms)
1325 Sets the encoder's output symbol table.
1328 syms: A SymbolTable.
1330 See also: `set_input_symbols`.
1332 self._encoder.get().SetOutputSymbols(syms._table)
1334 cpdef string weight_type(self):
1338 Returns a string indicating the weight type.
1340 return self._encoder.get().WeightType()
1343 ## _Fst, _MutableFst, Fst, and helpers.
1347 # _Fst: base class; has-a FstClass*.
1348 # _MutableFst(_Fst): adds mutable methods.
1349 # Fst(filename): pseudo-constructor.
1352 cdef class _Fst(object):
1357 Immutable FST class, wrapping FstClass.
1359 This class is the basic user-facing FST object. It does not itself support any
1360 mutation operations.
1363 # IPython notebook magic to produce an SVG of the FST.
1364 def _repr_svg_(self):
1365 """IPython notebook magic to produce an SVG of the FST using GraphViz.
1367 This method produces an SVG of the internal graph. Users wishing to create
1368 publication-quality graphs should instead use the method `draw`, which
1369 exposes additional parameters.
1372 OSError: Cannot locate the `dot` executable.
1373 subprocess.CalledProcessError: `dot` returned non-zero exit code.
1375 See also: `draw`, `text`.
1377 # Throws OSError if the dot executable is not found.
1378 proc = subprocess.Popen(["dot", "-Tsvg"], stdin=subprocess.PIPE,
1379 stdout=subprocess.PIPE, stderr=subprocess.PIPE)
1380 cdef stringstream sstrm
1381 fst.DrawFst(deref(self._fst), self._fst.get().InputSymbols(),
1382 self._fst.get().OutputSymbols(), NULL,
1383 self._fst.get().Properties(fst.kAcceptor, True) ==
1385 b"", 8.5, 11, True, False, 0.4, 0.25, 14, 5, b"g", False,
1386 addr(sstrm), b"_repr_svg")
1387 (sout, serr) = proc.communicate(sstrm.str())
1388 if proc.returncode != 0: # Just to be explicit.
1389 raise subprocess.CalledProcessError(proc.returncode, self._DOT_TSVG)
1390 return sout.decode("utf8")
1393 return "<{} Fst at 0x{:x}>".format(self.fst_type(), id(self))
1396 raise FstDeletedConstructorError(
1397 "Cannot construct {}".format(self.__class__.__name__))
1399 # Other magic methods.
1402 return self.text(acceptor=self._fst.get().Properties(fst.kAcceptor, True) ==
1404 show_weight_one=self._fst.get().Properties(fst.kWeighted, True) ==
1407 cpdef string arc_type(self):
1411 Returns a string indicating the arc type.
1413 return self._fst.get().ArcType()
1415 cpdef ArcIterator arcs(self, int64 state):
1419 Returns an iterator over arcs leaving the specified state.
1422 state: The source state ID.
1427 See also: `mutable_arcs`, `states`.
1429 return ArcIterator(self, state)
1431 cpdef _Fst copy(self):
1435 Makes a copy of the FST.
1437 return _init_XFst(new fst.FstClass(deref(self._fst)))
1439 cpdef void draw(self,
1441 _SymbolTable isymbols=None,
1442 _SymbolTable osymbols=None,
1443 SymbolTable ssymbols=None,
1444 bool acceptor=False,
1448 bool portrait=False,
1449 bool vertical=False,
1451 double nodesep=0.25,
1455 bool show_weight_one=False):
1457 draw(self, filename, isymbols=None, osymbols=None, ssymbols=None,
1458 acceptor=False, title="", width=8.5, height=11, portrait=False,
1459 vertical=False, ranksep=0.4, nodesep=0.25, fontsize=14,
1460 precision=5, float_format="g", show_weight_one=False):
1462 Writes out the FST in Graphviz text format.
1464 This method writes out the FST in the dot graph description language. The
1465 graph can be rendered using the `dot` executable provided by Graphviz.
1468 filename: The string location of the output dot/Graphviz file.
1469 isymbols: An optional symbol table used to label input symbols.
1470 osymbols: An optional symbol table used to label output symbols.
1471 ssymbols: An optional symbol table used to label states.
1472 acceptor: Should the figure be rendered in acceptor format if possible?
1473 title: An optional string indicating the figure title.
1474 width: The figure width, in inches.
1475 height: The figure height, in inches.
1476 portrait: Should the figure be rendered in portrait rather than
1478 vertical: Should the figure be rendered bottom-to-top rather than
1480 ranksep: The minimum separation separation between ranks, in inches.
1481 nodesep: The minimum separation between nodes, in inches.
1482 fontsize: Font size, in points.
1483 precision: Numeric precision for floats, in number of chars.
1484 float_format: One of: 'e', 'f' or 'g'.
1485 show_weight_one: Should weights equivalent to semiring One be printed?
1489 cdef string filename_string = tostring(filename)
1490 cdef unique_ptr[ofstream] ostrm
1491 ostrm.reset(new ofstream(filename_string))
1492 cdef fst.SymbolTable *ssymbols_ptr = NULL
1493 if ssymbols is not None:
1494 ssymbols_ptr = ssymbols._table
1495 fst.DrawFst(deref(self._fst),
1496 self._fst.get().InputSymbols() if isymbols is None
1497 else isymbols._table,
1498 self._fst.get().OutputSymbols() if osymbols is None
1499 else osymbols._table,
1500 ssymbols_ptr, acceptor, tostring(title), width, height, portrait,
1501 vertical, ranksep, nodesep, fontsize, precision,
1502 tostring(float_format), show_weight_one, ostrm.get(),
1505 cpdef Weight final(self, int64 state):
1509 Returns the final weight of a state.
1512 state: The integer index of a state.
1515 The final Weight of that state.
1518 FstIndexError: State index out of range.
1520 cdef Weight weight = Weight.__new__(Weight)
1521 weight._weight.reset(new fst.WeightClass(self._fst.get().Final(state)))
1524 cpdef string fst_type(self):
1528 Returns a string indicating the FST type.
1530 return self._fst.get().FstType()
1532 cpdef _FstSymbolTable input_symbols(self):
1536 Returns the FST's input symbol table, or None if none is present.
1538 See also: `input_symbols`.
1540 cdef fst.SymbolTable *syms = const_cast[SymbolTable_ptr](
1541 self._fst.get().InputSymbols())
1544 return _init_FstSymbolTable(syms, self._fst)
1546 cpdef size_t num_arcs(self, int64 state) except *:
1548 num_arcs(self, state)
1550 Returns the number of arcs leaving a state.
1553 state: The integer index of a state.
1556 The number of arcs leaving that state.
1559 FstIndexError: State index out of range.
1561 See also: `num_states`.
1563 cdef size_t result = self._fst.get().NumArcs(state)
1564 if result == SIZE_MAX:
1565 raise FstIndexError("State index out of range")
1568 cpdef size_t num_input_epsilons(self, int64 state) except *:
1570 num_input_epsilons(self, state)
1572 Returns the number of arcs with epsilon input labels leaving a state.
1575 state: The integer index of a state.
1578 The number of epsilon-input-labeled arcs leaving that state.
1581 FstIndexError: State index out of range.
1583 See also: `num_output_epsilons`.
1585 cdef size_t result = self._fst.get().NumInputEpsilons(state)
1586 if result == SIZE_MAX:
1587 raise FstIndexError("State index out of range")
1590 cpdef size_t num_output_epsilons(self, int64 state) except *:
1592 num_output_epsilons(self, state)
1594 Returns the number of arcs with epsilon output labels leaving a state.
1597 state: The integer index of a state.
1600 The number of epsilon-output-labeled arcs leaving that state.
1603 FstIndexError: State index out of range.
1605 See also: `num_input_epsilons`.
1607 cdef size_t result = self._fst.get().NumOutputEpsilons(state)
1608 if result == SIZE_MAX:
1609 raise FstIndexError("State index out of range")
1612 cpdef _FstSymbolTable output_symbols(self):
1614 output_symbols(self)
1616 Returns the FST's output symbol table, or None if none is present.
1618 See also: `input_symbols`.
1620 cdef fst.SymbolTable *syms = const_cast[SymbolTable_ptr](
1621 self._fst.get().OutputSymbols())
1624 return _init_FstSymbolTable(syms, self._fst)
1626 cpdef uint64 properties(self, uint64 mask, bool test):
1628 properties(self, mask, test)
1630 Provides property bits.
1632 This method provides user access to the properties attributes for the FST.
1633 The resulting value is a long integer, but when it is cast to a boolean,
1634 it represents whether or not the FST has the `mask` property.
1637 mask: The property mask to be compared to the FST's properties.
1638 test: Should any unknown values be computed before comparing against
1642 A 64-bit bitmask representing the requested properties.
1644 return self._fst.get().Properties(mask, test)
1646 cpdef int64 start(self):
1650 Returns the start state.
1652 return self._fst.get().Start()
1654 cpdef StateIterator states(self):
1658 Returns an iterator over all states in the FST.
1661 A StateIterator object for the FST.
1663 See also: `arcs`, `mutable_arcs`.
1665 return StateIterator(self)
1667 cpdef string text(self, _SymbolTable isymbols=None,
1668 _SymbolTable osymbols=None, _SymbolTable ssymbols=None,
1669 bool acceptor=False, bool show_weight_one=False, missing_sym=b""):
1671 text(self, isymbols=None, osymbols=None, ssymbols=None, acceptor=False,
1672 show_weight_one=False, missing_sym="")
1674 Produces a human-readable string representation of the FST.
1676 This method generates a human-readable string representation of the FST.
1677 The caller may optionally specify SymbolTables used to label input labels,
1678 output labels, or state labels, respectively.
1681 isymbols: An optional symbol table used to label input symbols.
1682 osymbols: An optional symbol table used to label output symbols.
1683 ssymbols: An optional symbol table used to label states.
1684 acceptor: Should the FST be rendered in acceptor format if possible?
1685 show_weight_one: Should weights equivalent to semiring One be printed?
1686 missing_symbol: The string to be printed when symbol table lookup fails.
1689 A formatted string representing the machine.
1691 # Prints FST to stringstream, then returns resulting string.
1692 cdef fst.SymbolTable *ssymbols_ptr = NULL
1693 if ssymbols is not None:
1694 ssymbols_ptr = ssymbols._table
1695 cdef stringstream sstrm
1696 fst.PrintFst(deref(self._fst), sstrm, b"<pywrapfst>",
1697 self._fst.get().InputSymbols() if isymbols is None
1698 else isymbols._table,
1699 self._fst.get().OutputSymbols() if osymbols is None
1700 else osymbols._table,
1701 ssymbols_ptr, acceptor, show_weight_one, tostring(missing_sym))
1704 cpdef bool verify(self):
1708 Verifies that an FST's contents are sane.
1711 True if the contents are sane, False otherwise.
1713 return fst.Verify(deref(self._fst))
1715 cpdef string weight_type(self):
1719 Provides the FST's weight type.
1722 A string representing the weight type.
1724 return self._fst.get().WeightType()
1726 cpdef void write(self, filename) except *:
1728 write(self, filename)
1730 Serializes FST to a file.
1732 This method writes the FST to a file in a binary format.
1735 filename: The string location of the output file.
1738 FstIOError: Write failed.
1740 if not self._fst.get().Write(tostring(filename)):
1741 raise FstIOError("Write failed: {!r}".format(filename))
1743 cpdef string write_to_string(self):
1745 write_to_string(self)
1747 Serializes FST to a string.
1752 See also: `read_from_string`.
1754 return self._fst.get().WriteToString()
1757 cdef class _MutableFst(_Fst):
1762 Mutable FST class, wrapping MutableFstClass.
1764 This class extends _Fst by adding mutation operations.
1767 cdef void _check_mutating_imethod(self) except *:
1768 """Checks whether an operation mutating the FST has produced an error.
1770 This function is not visible to Python users.
1772 if self._fst.get().Properties(fst.kError, True) == fst.kError:
1773 raise FstOpError("Operation failed")
1775 cdef void _add_arc(self, int64 state, Arc arc) except *:
1776 if not self._fst.get().ValidStateId(state):
1777 raise FstIndexError("State index out of range")
1778 if not self._mfst.get().AddArc(state, deref(arc._arc)):
1779 raise FstOpError("Incompatible or invalid weight type")
1780 self._check_mutating_imethod()
1782 def add_arc(self, int64 state, Arc arc):
1784 add_arc(self, state, arc)
1786 Adds a new arc to the FST and return self.
1789 state: The integer index of the source state.
1790 arc: The arc to add.
1796 FstIndexError: State index out of range.
1797 FstOpdexError: Incompatible or invalid weight type.
1799 See also: `add_state`.
1801 self._add_arc(state, arc)
1804 cpdef int64 add_state(self) except *:
1808 Adds a new state to the FST and returns the state ID.
1811 The integer index of the new state.
1813 See also: `add_arc`, `set_start`, `set_final`.
1815 cdef int64 result = self._mfst.get().AddState()
1816 self._check_mutating_imethod()
1819 cdef void _arcsort(self, sort_type=b"ilabel") except *:
1820 cdef fst.ArcSortType sort_type_enum
1821 if not fst.GetArcSortType(tostring(sort_type), addr(sort_type_enum)):
1822 raise FstArgError("Unknown sort type {!r}".format(sort_type))
1823 fst.ArcSort(self._mfst.get(), sort_type_enum)
1824 self._check_mutating_imethod()
1826 def arcsort(self, sort_type=b"ilabel"):
1828 arcsort(self, sort_type="ilabel")
1830 Sorts arcs leaving each state of the FST.
1832 This operation destructively sorts arcs leaving each state using either
1833 input or output labels.
1836 sort_type: Either "ilabel" (sort arcs according to input labels) or
1837 "olabel" (sort arcs according to output labels).
1843 FstArgError: Unknown sort type.
1845 See also: `topsort`.
1847 self._arcsort(sort_type)
1850 cdef void _closure(self, bool closure_plus=False) except *:
1851 fst.Closure(self._mfst.get(), fst.GetClosureType(closure_plus))
1852 self._check_mutating_imethod()
1854 def closure(self, bool closure_plus=False):
1856 closure(self, closure_plus=False)
1858 Computes concatenative closure.
1860 This operation destructively converts the FST to its concatenative closure.
1861 If A transduces string x to y with weight a, then the closure transduces x
1862 to y with weight a, xx to yy with weight a \otimes a, xxx to yyy with weight
1863 a \otimes a \otimes a, and so on. The empty string is also transduced to
1864 itself with semiring One if `closure_plus` is False.
1867 closure_plus: If False, do not accept the empty string.
1872 self._closure(closure_plus)
1875 cdef void _concat(self, _Fst ifst) except *:
1876 fst.Concat(self._mfst.get(), deref(ifst._fst))
1877 self._check_mutating_imethod()
1879 def concat(self, _Fst ifst):
1883 Computes the concatenation (product) of two FSTs.
1885 This operation destructively concatenates the FST with a second FST. If A
1886 transduces string x to y with weight a and B transduces string w to v with
1887 weight b, then their concatenation transduces string xw to yv with weight a
1891 ifst: The second input FST.
1899 cdef void _connect(self) except *:
1900 fst.Connect(self._mfst.get())
1901 self._check_mutating_imethod()
1907 Removes unsuccessful paths.
1909 This operation destructively trims the FST, removing states and arcs that
1910 are not part of any successful path.
1918 cdef void _decode(self, EncodeMapper encoder) except *:
1919 fst.Decode(self._mfst.get(), deref(encoder._encoder))
1920 self._check_mutating_imethod()
1922 def decode(self, EncodeMapper encoder):
1924 decode(self, encoder)
1926 Decodes encoded labels and/or weights.
1928 This operation reverses the encoding performed by `encode`.
1931 encoder: An EncodeMapper object used to encode the FST.
1938 self._decode(encoder)
1941 cdef void _delete_arcs(self, int64 state, size_t n=0) except *:
1942 if not (self._mfst.get().DeleteArcs(state, n) if n else
1943 self._mfst.get().DeleteArcs(state)):
1944 raise FstIndexError("State index out of range")
1945 self._check_mutating_imethod()
1947 def delete_arcs(self, int64 state, size_t n=0):
1949 delete_arcs(self, state, n=0)
1951 Deletes arcs leaving a particular state.
1954 state: The integer index of a state.
1955 n: An optional argument indicating how many arcs to be deleted. If this
1956 argument is omitted or passed as zero, all arcs from this state are
1963 FstIndexError: State index out of range.
1965 See also: `delete_states`.
1967 self._delete_arcs(state, n)
1970 cdef void _delete_states(self, states=None) except *:
1971 # Only the former signature has a possible indexing failure.
1973 if not self._mfst.get().DeleteStates(<const vector[int64]> states):
1974 raise FstIndexError("State index out of range")
1976 self._mfst.get().DeleteStates()
1977 self._check_mutating_imethod()
1979 def delete_states(self, states=None):
1981 delete_states(self, states=None)
1986 states: An optional iterable of integer indices of the states to be
1987 deleted. If this argument is omitted, all states are deleted.
1993 FstIndexError: State index out of range.
1995 See also: `delete_arcs`.
1997 self._delete_states(states)
2000 cdef void _encode(self, EncodeMapper encoder) except *:
2001 fst.Encode(self._mfst.get(), encoder._encoder.get())
2002 self._check_mutating_imethod()
2004 def encode(self, EncodeMapper encoder):
2006 encode(self, encoder)
2008 Encodes labels and/or weights.
2010 This operation allows for the representation of a weighted transducer as a
2011 weighted acceptor, an unweighted transducer, or an unweighted acceptor by
2012 considering the pair (input label, output label), the pair (input label,
2013 weight), or the triple (input label, output label, weight) as a single
2014 label. Applying this operation mutates the EncodeMapper argument, which
2015 can then be used to decode.
2018 encoder: An EncodeMapper object to be used as the encoder.
2025 self._encode(encoder)
2028 cdef void _invert(self) except *:
2029 fst.Invert(self._mfst.get())
2030 self._check_mutating_imethod()
2036 Inverts the FST's transduction.
2038 This operation destructively inverts the FST's transduction by exchanging
2039 input and output labels.
2047 cdef void _minimize(self, float delta=fst.kDelta,
2048 bool allow_nondet=False) except *:
2049 # This runs in-place when the second argument is null.
2050 fst.Minimize(self._mfst.get(), NULL, delta, allow_nondet)
2051 self._check_mutating_imethod()
2053 def minimize(self, float delta=fst.kDelta, bool allow_nondet=False):
2055 minimize(self, delta=0.0009765625, allow_nondet=False)
2059 This operation destructively performs the minimization of deterministic
2060 weighted automata and transducers. If the input FST A is an acceptor, this
2061 operation produces the minimal acceptor B equivalent to A, i.e. the
2062 acceptor with a minimal number of states that is equivalent to A. If the
2063 input FST A is a transducer, this operation internally builds an equivalent
2064 transducer with a minimal number of states. However, this minimality is
2065 obtained by allowing transition having strings of symbols as output labels,
2066 this known in the litterature as a real-time transducer. Such transducers
2067 are not directly supported by the library. This function will convert such
2068 transducer by expanding each string-labeled transition into a sequence of
2069 transitions. This will results in the creation of new states, hence losing
2070 the minimality property.
2073 delta: Comparison/quantization delta.
2074 allow_nondet: Attempt minimization of non-deterministic FST?
2079 self._minimize(delta)
2082 cpdef MutableArcIterator mutable_arcs(self, int64 state):
2084 mutable_arcs(self, state)
2086 Returns a mutable iterator over arcs leaving the specified state.
2089 state: The source state ID.
2092 A MutableArcIterator.
2094 See also: `arcs`, `states`.
2096 return MutableArcIterator(self, state)
2098 def mutable_input_symbols(self):
2100 mutable_input_symbols(self)
2102 Returns the FST's (mutable) input symbol table, or None if none is present.
2104 cdef fst.SymbolTable *tst = self._mfst.get().MutableInputSymbols()
2107 return _init_MutableFstSymbolTable(tst, self._mfst)
2109 def mutable_output_symbols(self):
2111 mutable_output_symbols(self)
2113 Returns the FST's (mutable) output symbol table, or None if none is present.
2115 cdef fst.SymbolTable *tst = self._mfst.get().MutableOutputSymbols()
2118 return _init_MutableFstSymbolTable(tst, self._mfst)
2120 cpdef int64 num_states(self):
2124 Returns the number of states.
2126 return self._mfst.get().NumStates()
2128 cdef void _project(self, bool project_output=False) except *:
2129 fst.Project(self._mfst.get(), fst.GetProjectType(project_output))
2130 self._check_mutating_imethod()
2132 def project(self, bool project_output=False):
2134 project(self, project_output=False)
2136 Converts the FST to an acceptor using input or output labels.
2138 This operation destructively projects an FST onto its domain or range by
2139 either copying each arc's input label to its output label (the default) or
2143 project_output: Should the output labels be projected?
2148 See also: `decode`, `encode`, `relabel_pairs`, `relabel_symbols`.
2150 self._project(project_output)
2153 cdef void _prune(self, float delta=fst.kDelta, int64 nstate=fst.kNoStateId,
2154 weight=None) except *:
2155 # Threshold is set to semiring Zero (no pruning) if no weight is specified.
2156 cdef fst.WeightClass wc = _get_WeightClass_or_Zero(self.weight_type(),
2158 fst.Prune(self._mfst.get(), wc, nstate, delta)
2159 self._check_mutating_imethod()
2162 float delta=fst.kDelta,
2163 int64 nstate=fst.kNoStateId,
2166 prune(self, delta=0.0009765625, nstate=NO_STATE_ID, weight=None)
2168 Removes paths with weights below a certain threshold.
2170 This operation deletes states and arcs in the input FST that do not belong
2171 to a successful path whose weight is no more (w.r.t the natural semiring
2172 order) than the threshold t \otimes-times the weight of the shortest path in
2173 the input FST. Weights must be commutative and have the path property.
2176 delta: Comparison/quantization delta.
2177 nstate: State number threshold.
2178 weight: A Weight or weight string indicating the desired weight threshold
2179 below which paths are pruned; if omitted, no paths are pruned.
2184 See also: The constructive variant.
2186 self._prune(delta, nstate, weight)
2189 cdef void _push(self,
2190 float delta=fst.kDelta,
2191 bool remove_total_weight=False,
2192 bool to_final=False) except *:
2193 fst.Push(self._mfst.get(), fst.GetReweightType(to_final), delta,
2194 remove_total_weight)
2195 self._check_mutating_imethod()
2198 float delta=fst.kDelta,
2199 bool remove_total_weight=False,
2200 bool to_final=False):
2202 push(self, delta=0.0009765625, remove_total_weight=False, to_final=False)
2204 Pushes weights towards the initial or final states.
2206 This operation destructively produces an equivalent transducer by pushing
2207 the weights towards the initial state or toward the final states. When
2208 pushing weights towards the initial state, the sum of the weight of the
2209 outgoing transitions and final weight at any non-initial state is equal to
2210 one in the resulting machine. When pushing weights towards the final states,
2211 the sum of the weight of the incoming transitions at any state is equal to
2212 one. Weights need to be left distributive when pushing towards the initial
2213 state and right distributive when pushing towards the final states.
2216 delta: Comparison/quantization delta.
2217 remove_total_weight: If pushing weights, should the total weight be
2219 to_final: Push towards final states?
2224 See also: The constructive variant, which also supports label pushing.
2226 self._push(delta, remove_total_weight, to_final)
2229 cdef void _relabel_pairs(self, ipairs=None, opairs=None) except *:
2230 cdef unique_ptr[vector[fst.LabelPair]] _ipairs
2231 _ipairs.reset(new vector[fst.LabelPair]())
2232 cdef unique_ptr[vector[fst.LabelPair]] _opairs
2233 _opairs.reset(new vector[fst.LabelPair]())
2237 for (before, after) in ipairs:
2238 _ipairs.get().push_back(fst.LabelPair(before, after))
2240 for (before, after) in opairs:
2241 _opairs.get().push_back(fst.LabelPair(before, after))
2242 if _ipairs.get().empty() and _opairs.get().empty():
2243 raise FstArgError("No relabeling pairs specified.")
2244 fst.Relabel(self._mfst.get(), deref(_ipairs), deref(_opairs))
2245 self._check_mutating_imethod()
2247 def relabel_pairs(self, ipairs=None, opairs=None):
2249 relabel_pairs(self, ipairs=None, opairs=None)
2251 Replaces input and/or output labels using pairs of labels.
2253 This operation destructively relabels the input and/or output labels of the
2254 FST using pairs of the form (old_ID, new_ID); omitted indices are
2258 ipairs: An iterable containing (older index, newer index) integer pairs.
2259 opairs: An iterable containing (older index, newer index) integer pairs.
2265 FstArgError: No relabeling pairs specified.
2267 See also: `decode`, `encode`, `project`, `relabel_tables`.
2269 self._relabel_pairs(ipairs, opairs)
2272 cdef void _relabel_tables(self,
2273 _SymbolTable old_isymbols=None,
2274 _SymbolTable new_isymbols=None,
2275 unknown_isymbol=b"",
2276 bool attach_new_isymbols=True,
2277 _SymbolTable old_osymbols=None,
2278 _SymbolTable new_osymbols=None,
2279 unknown_osymbol=b"",
2280 bool attach_new_osymbols=True) except *:
2281 if new_isymbols is None and new_osymbols is None:
2282 raise FstArgError("No new SymbolTables specified")
2283 cdef fst.SymbolTable *new_isymbols_ptr = NULL
2284 if new_isymbols is not None:
2285 new_isymbols_ptr = new_isymbols._table
2286 cdef fst.SymbolTable *new_osymbols_ptr = NULL
2287 if new_osymbols is not None:
2288 new_osymbols_ptr = new_osymbols._table
2289 fst.Relabel(self._mfst.get(),
2290 self._fst.get().InputSymbols() if old_isymbols is None else
2291 old_isymbols._table, new_isymbols_ptr, tostring(unknown_isymbol),
2292 attach_new_isymbols,
2293 self._fst.get().OutputSymbols() if old_osymbols is None else
2294 old_osymbols._table, new_osymbols_ptr, tostring(unknown_osymbol),
2295 attach_new_osymbols)
2296 self._check_mutating_imethod()
2298 def relabel_tables(self,
2299 _SymbolTable old_isymbols=None,
2300 _SymbolTable new_isymbols=None,
2301 unknown_isymbol=b"",
2302 bool attach_new_isymbols=True,
2303 _SymbolTable old_osymbols=None,
2304 _SymbolTable new_osymbols=None,
2305 unknown_osymbol=b"",
2306 bool attach_new_osymbols=True):
2308 relabel_tables(self, old_isymbols=None, new_isymbols=None,
2309 unknown_isymbol="", attach_new_isymbols=True,
2310 old_osymbols=None, new_osymbols=None,
2311 unknown_osymbol="", attach_new_osymbols=True)
2313 Replaces input and/or output labels using SymbolTables.
2315 This operation destructively relabels the input and/or output labels of the
2316 FST using user-specified symbol tables; omitted symbols are identity-mapped.
2319 old_isymbols: The old SymbolTable for input labels, defaulting to the
2320 FST's input symbol table.
2321 new_isymbols: A SymbolTable used to relabel the input labels
2322 unknown_isymbol: Input symbol to use to relabel OOVs (if empty,
2323 OOVs raise an exception)
2324 attach_new_isymbols: Should new_isymbols be made the FST's input symbol
2326 old_osymbols: The old SymbolTable for output labels, defaulting to the
2327 FST's output symbol table.
2328 new_osymbols: A SymbolTable used to relabel the output labels.
2329 unknown_osymbol: Outnput symbol to use to relabel OOVs (if empty,
2330 OOVs raise an exception)
2331 attach_new_isymbols: Should new_osymbols be made the FST's output symbol
2338 FstArgError: No SymbolTable specified.
2340 See also: `decode`, `encode`, `project`, `relabel_pairs`.
2342 self._relabel_tables(old_isymbols, new_isymbols,
2343 unknown_isymbol, attach_new_isymbols,
2344 old_osymbols, new_osymbols,
2345 unknown_osymbol, attach_new_osymbols)
2348 cdef void _reserve_arcs(self, int64 state, size_t n) except *:
2349 if not self._mfst.get().ReserveArcs(state, n):
2350 raise FstIndexError("State index out of range")
2351 self._check_mutating_imethod()
2353 def reserve_arcs(self, int64 state, size_t n):
2355 reserve_arcs(self, state, n)
2357 Reserve n arcs at a particular state (best effort).
2360 state: The integer index of a state.
2361 n: The number of arcs to reserve.
2367 FstIndexError: State index out of range.
2369 See also: `reserve_states`.
2371 self._reserve_arcs(state, n)
2374 cdef void _reserve_states(self, int64 n) except *:
2375 self._mfst.get().ReserveStates(n)
2376 self._check_mutating_imethod()
2378 def reserve_states(self, int64 n):
2380 reserve_states(self, n)
2382 Reserve n states (best effort).
2385 n: The number of states to reserve.
2390 See also: `reserve_arcs`.
2392 self._reserve_states(n)
2395 cdef void _reweight(self, potentials, bool to_final=False) except *:
2396 cdef unique_ptr[vector[fst.WeightClass]] _potentials
2397 _potentials.reset(new vector[fst.WeightClass]())
2398 cdef string weight_type = self.weight_type()
2399 for weight in potentials:
2400 _potentials.get().push_back(_get_WeightClass_or_One(self.weight_type(),
2402 fst.Reweight(self._mfst.get(), deref(_potentials),
2403 fst.GetReweightType(to_final))
2404 self._check_mutating_imethod()
2406 def reweight(self, potentials, bool to_final=False):
2408 reweight(self, potentials, to_final=False)
2410 Reweights an FST using an iterable of potentials.
2412 This operation destructively reweights an FST according to the potentials
2413 and in the direction specified by the user. An arc of weight w, with an
2414 origin state of potential p and destination state of potential q, is
2415 reweighted by p^{-1} \otimes (w \otimes q) when reweighting towards the
2416 initial state, and by (p \otimes w) \otimes q^{-1} when reweighting towards
2417 the final states. The weights must be left distributive when reweighting
2418 towards the initial state and right distributive when reweighting towards
2419 the final states (e.g., TropicalWeight and LogWeight).
2422 potentials: An iterable of Weight or weight strings.
2423 to_final: Push towards final states?
2428 self._reweight(potentials, to_final)
2431 cdef void _rmepsilon(self,
2435 int64 nstate=fst.kNoStateId,
2436 float delta=fst.kDelta) except *:
2437 cdef fst.WeightClass wc = _get_WeightClass_or_Zero(self.weight_type(),
2439 cdef unique_ptr[fst.RmEpsilonOptions] opts
2440 opts.reset(new fst.RmEpsilonOptions(_get_queue_type(tostring(queue_type)),
2441 connect, wc, nstate, delta))
2442 fst.RmEpsilon(self._mfst.get(), deref(opts))
2443 self._check_mutating_imethod()
2449 int64 nstate=fst.kNoStateId,
2450 float delta=fst.kDelta):
2452 rmepsilon(self, queue_type="auto", connect=True, weight=None,
2453 nstate=NO_STATE_ID, delta=0.0009765625):
2455 Removes epsilon transitions.
2457 This operation destructively removes epsilon transitions, i.e., those where
2458 both input and output labels are epsilon) from an FST.
2461 queue_type: A string matching a known queue type; one of: "auto", "fifo",
2462 "lifo", "shortest", "state", "top".
2463 connect: Should output be trimmed?
2464 weight: A Weight or weight string indicating the desired weight threshold
2465 below which paths are pruned; if omitted, no paths are pruned.
2466 nstate: State number threshold.
2467 delta: Comparison/quantization delta.
2472 self._rmepsilon(queue_type, connect, weight, nstate, delta)
2475 cdef void _set_final(self, int64 state, weight=None) except *:
2476 if not self._mfst.get().ValidStateId(state):
2477 raise FstIndexError("State index out of range")
2478 cdef fst.WeightClass wc = _get_WeightClass_or_One(self.weight_type(),
2480 if not self._mfst.get().SetFinal(state, wc):
2481 raise FstOpError("Incompatible or invalid weight")
2482 self._check_mutating_imethod()
2484 def set_final(self, int64 state, weight=None):
2486 set_final(self, state, weight)
2488 Sets the final weight for a state.
2491 state: The integer index of a state.
2492 weight: A Weight or weight string indicating the desired final weight; if
2493 omitted, it is set to semiring One.
2499 FstIndexError: State index out of range.
2500 FstOpError: Incompatible or invalid weight.
2502 See also: `set_start`.
2504 self._set_final(state, weight)
2507 cdef void _set_input_symbols(self, _SymbolTable syms) except *:
2509 self._mfst.get().SetInputSymbols(NULL)
2511 self._mfst.get().SetInputSymbols(syms._table)
2512 self._check_mutating_imethod()
2514 def set_input_symbols(self, _SymbolTable syms):
2516 set_input_symbols(self, syms)
2518 Sets the input symbol table.
2520 Passing None as a value will delete the input symbol table.
2523 syms: A SymbolTable.
2528 See also: `set_output_symbols`.
2530 self._set_input_symbols(syms)
2533 cdef void _set_output_symbols(self, _SymbolTable syms) except *:
2535 self._mfst.get().SetOutputSymbols(NULL)
2537 self._mfst.get().SetOutputSymbols(syms._table)
2538 self._check_mutating_imethod()
2540 def set_output_symbols(self, _SymbolTable syms):
2542 set_output_symbols(self, syms)
2544 Sets the output symbol table.
2546 Passing None as a value will delete the output symbol table.
2549 syms: A SymbolTable.
2554 See also: `set_input_symbols`.
2556 self._set_output_symbols(syms)
2559 cdef void _set_properties(self, uint64 props, uint64 mask):
2560 self._mfst.get().SetProperties(props, mask)
2562 def set_properties(self, uint64 props, uint64 mask):
2564 set_properties(self, props, mask)
2566 Sets the properties bits.
2569 props: The properties to be set.
2570 mask: A mask to be applied to the `props` argument before setting the
2576 self._set_properties(props, mask)
2579 cdef void _set_start(self, int64 state) except *:
2580 if not self._mfst.get().SetStart(state):
2581 raise FstIndexError("State index out of range")
2582 self._check_mutating_imethod()
2584 def set_start(self, int64 state):
2586 set_start(self, state)
2588 Sets a state to be the initial state state.
2591 state: The integer index of a state.
2597 FstIndexError: State index out of range.
2599 See also: `set_final`.
2601 self._set_start(state)
2604 cdef void _topsort(self) except *:
2605 # TopSort returns False if the FST is cyclic, and thus can't be TopSorted.
2606 if not fst.TopSort(self._mfst.get()):
2607 logging.warning("Cannot topsort cyclic FST.")
2608 self._check_mutating_imethod()
2614 Sorts transitions by state IDs.
2616 This operation destructively topologically sorts the FST, if it is acyclic;
2617 otherwise it remains unchanged. Once sorted, all transitions are from lower
2618 state IDs to higher state IDs
2623 See also: `arcsort`.
2628 cdef void _union(self, _Fst ifst) except *:
2629 fst.Union(self._mfst.get(), deref(ifst._fst))
2630 self._check_mutating_imethod()
2632 def union(self, _Fst ifst):
2636 Computes the union (sum) of two FSTs.
2638 This operation computes the union (sum) of two FSTs. If A transduces string
2639 x to y with weight a and B transduces string w to v with weight b, then
2640 their union transduces x to y with weight a and w to v with weight b.
2643 ifst: The second input FST.
2652 # Pseudo-constructors for _Fst and _MutableFst.
2654 # _init_Fst and _init_MutableFst use an FstClass pointer to instantiate _Fst
2655 # and _MutableFst objects, respectively. The latter function is only safe to
2656 # call when the FST being wrapped is known to be kMutable. The caller can
2657 # safely use it when they have either checked this bit (e.g., by using
2658 # _init_XFst) or have themselves constructed a mutable container for the
2659 # FstClass pointer they're passing (e.g., most of the constructive operations,
2660 # storing their results in a VectorFstClass, a derivative of MutableFstClass).
2662 # _create_Fst constructs an empty VectorFstClass of a user-specified arc type,
2663 # and passes this pointer to _init_MutableFst.
2665 # _read_Fst reads an FST from disk, performing FST conversion if requested, and
2666 # then passes this pointer to _init_XFst.
2668 # The Python class Fst provides a wrapper for these two operations. The former
2669 # can be accessed by calling Fst(...), which acts like a class method, and the
2670 # latter via Fst.read(...), which acts like a static method. This is a bit
2671 # nasty, but totally hidden from the Python user.
2674 cdef _Fst _init_Fst(FstClass_ptr tfst):
2675 if tfst.Properties(fst.kError, True):
2676 raise FstOpError("Operation failed")
2677 cdef _Fst ofst = _Fst.__new__(_Fst)
2678 ofst._fst.reset(<FstClass_ptr> tfst)
2682 cdef _MutableFst _init_MutableFst(MutableFstClass_ptr tfst):
2683 if tfst.Properties(fst.kError, True):
2684 raise FstOpError("Operation failed")
2685 cdef _MutableFst ofst = _MutableFst.__new__(_MutableFst)
2686 ofst._fst.reset(<MutableFstClass_ptr> tfst)
2687 # Makes a copy of it as the derived type! Cool.
2688 ofst._mfst = static_pointer_cast[fst.MutableFstClass, fst.FstClass](ofst._fst)
2692 cdef _Fst _init_XFst(FstClass_ptr tfst):
2693 if tfst.Properties(fst.kMutable, True):
2694 return _init_MutableFst(static_cast[MutableFstClass_ptr](tfst))
2696 return _init_Fst(tfst)
2699 cdef _MutableFst _create_Fst(arc_type=b"standard"):
2700 cdef unique_ptr[fst.VectorFstClass] tfst
2701 tfst.reset(new fst.VectorFstClass(<string> tostring(arc_type)))
2702 if tfst.get() == NULL:
2703 raise FstOpError("Unknown arc type: {!r}".format(arc_type))
2704 return _init_MutableFst(tfst.release())
2707 cdef _Fst _read_Fst(filename, fst_type=None):
2708 cdef unique_ptr[fst.FstClass] tfst
2709 tfst.reset(fst.FstClass.Read(tostring(filename)))
2710 if tfst.get() == NULL:
2711 raise FstIOError("Read failed: {!r}".format(filename))
2712 # Converts if requested.
2713 cdef string fst_type_string
2715 fst_type_string = tostring(fst_type)
2716 if fst_type_string != tfst.get().FstType():
2717 tfst.reset(fst.Convert(deref(tfst), fst_type_string))
2718 if tfst.get() == NULL:
2719 raise FstOpError("Conversion to {!r} failed.".format(fst_type))
2720 return _init_XFst(tfst.release())
2723 cdef _Fst _deserialize_Fst(fst_string, fst_type=None):
2724 cdef unique_ptr[fst.FstClass] ofst
2725 ofst.reset(fst.FstClass.ReadFromString(fst_string))
2726 if fst_type is not None:
2727 fst_type_string = tostring(fst_type)
2728 if fst_type_string != ofst.get().FstType():
2729 ofst.reset(fst.Convert(deref(ofst), fst_type_string))
2730 if ofst.get() == NULL:
2731 raise FstOpError("Conversion to {!r} failed.".format(fst_type))
2732 return _init_XFst(ofst.release())
2738 Fst(arc_type="standard")
2740 Constructs an empty FST.
2743 arc_type: A string indicating the arc type.
2746 FstError: Unknown arc type.
2749 FstOpError: operation failed.
2752 def __new__(cls, arc_type=b"standard"):
2753 return _create_Fst(arc_type)
2756 def read(filename, fst_type=None):
2758 read(filename, fst_type=None)
2760 Reads an FST from a file.
2763 filename: The string location of the input file.
2764 fst_type: A string indicating the FST type to convert to; no conversion
2765 is performed if omitted or if the FST is already of the desired type.
2771 FstIOError: Read failed.
2772 FstOpError: Read-time conversion failed.
2774 return _read_Fst(filename, fst_type)
2777 def read_from_string(fst_string, fst_type=None):
2779 read_from_string(fst_string, fst_type=None)
2781 Reads an FST from a serialized string.
2784 fst_string: The string containing the serialized FST.
2785 fst_type: A string indicating the FST type to convert to; no conversion
2786 is performed if omitted or if the FST is already of the desired type.
2792 FstIOError: Read failed.
2793 FstOpError: Read-time conversion failed.
2795 See also: `write_to_string`.
2797 return _deserialize_Fst(fst_string, fst_type)
2803 NO_LABEL = fst.kNoLabel
2804 NO_STATE_ID = fst.kNoStateId
2805 # TODO(kbg): Figure out how to access static class variables so I don't have
2806 # to do it this way.
2807 NO_SYMBOL = kNoSymbol
2813 EXPANDED = fst.kExpanded
2814 MUTABLE = fst.kMutable
2816 ACCEPTOR = fst.kAcceptor
2817 NOT_ACCEPTOR = fst.kNotAcceptor
2818 I_DETERMINISTIC = fst.kIDeterministic
2819 NON_I_DETERMINISTIC = fst.kNonIDeterministic
2820 O_DETERMINISTIC = fst.kODeterministic
2821 NON_O_DETERMINISTIC = fst.kNonODeterministic
2822 EPSILONS = fst.kEpsilons
2823 NO_EPSILONS = fst.kNoEpsilons
2824 I_EPSILONS = fst.kIEpsilons
2825 NO_I_EPSILONS = fst.kNoIEpsilons
2826 O_EPSILONS = fst.kOEpsilons
2827 NO_O_EPSILONS = fst.kNoOEpsilons
2828 I_LABEL_SORTED = fst.kILabelSorted
2829 NOT_I_LABEL_SORTED = fst.kNotILabelSorted
2830 O_LABEL_SORTED = fst.kOLabelSorted
2831 NOT_O_LABEL_SORTED = fst.kNotOLabelSorted
2832 WEIGHTED = fst.kWeighted
2833 UNWEIGHTED = fst.kUnweighted
2834 CYCLIC = fst.kCyclic
2835 ACYCLIC = fst.kAcyclic
2836 INITIAL_CYCLIC = fst.kInitialCyclic
2837 INITIAL_ACYCLIC = fst.kInitialAcyclic
2838 TOP_SORTED = fst.kTopSorted
2839 NOT_TOP_SORTED = fst.kNotTopSorted
2840 ACCESSIBLE = fst.kAccessible
2841 NOT_ACCESSIBLE = fst.kNotAccessible
2842 COACCESSIBLE = fst.kCoAccessible
2843 NOT_COACCESSIBLE = fst.kNotCoAccessible
2844 STRING = fst.kString
2845 NOT_STRING = fst.kNotString
2846 WEIGHTED_CYCLES = fst.kWeightedCycles
2847 UNWEIGHTED_CYCLES = fst.kUnweightedCycles
2848 NULL_PROPERTIES = fst.kNullProperties
2849 COPY_PROPERTIES = fst.kCopyProperties
2850 INTRINSIC_PROPERTIES = fst.kIntrinsicProperties
2851 EXTRINSIC_PROPERTIES = fst.kExtrinsicProperties
2852 SET_START_PROPERTIES = fst.kSetStartProperties
2853 SET_FINAL_PROPERTIES = fst.kSetFinalProperties
2854 ADD_STATE_PROPERTIES = fst.kAddStateProperties
2855 ADD_ARC_PROPERTIES = fst.kAddArcProperties
2856 SET_ARC_PROPERTIES = fst.kSetArcProperties
2857 DELETE_STATE_PROPERTIES = fst.kDeleteStatesProperties
2858 DELETE_ARC_PROPERTIES = fst.kDeleteArcsProperties
2859 STATE_SORT_PROPERTIES = fst.kStateSortProperties
2860 ARC_SORT_PROPERTIES = fst.kArcSortProperties
2861 I_LABEL_INVARIANT_PROPERTIES = fst.kILabelInvariantProperties
2862 O_LABEL_INVARIANT_PROPERTIES = fst.kOLabelInvariantProperties
2863 WEIGHT_INVARIANT_PROPERTIES = fst.kWeightInvariantProperties
2864 ADD_SUPERFINAL_PROPERTIES = fst.kAddSuperFinalProperties
2865 RM_SUPERFINAL_PROPERTIES = fst.kRmSuperFinalProperties
2866 BINARY_PROPERTIES = fst.kBinaryProperties
2867 TRINARY_PROPERTIES = fst.kTrinaryProperties
2868 POS_TRINARY_PROPERTIES = fst.kPosTrinaryProperties
2869 NEG_TRINARY_PROPERTIES = fst.kNegTrinaryProperties
2870 FST_PROPERTIES = fst.kFstProperties
2873 ## Arc iterator properties.
2876 ARC_I_LABEL_VALUE = fst.kArcILabelValue
2877 ARC_O_LABEL_VALUE = fst.kArcOLabelValue
2878 ARC_WEIGHT_VALUE = fst.kArcWeightValue
2879 ARC_NEXT_STATE_VALUE = fst.kArcNextStateValue
2880 ARC_NO_CACHE = fst.kArcNoCache
2881 ARC_VALUE_FLAGS = fst.kArcValueFlags
2882 ARC_FLAGS = fst.kArcFlags
2885 ## EncodeMapper properties.
2888 ENCODE_LABELS = fst.kEncodeLabels
2889 ENCODE_WEIGHTS = fst.kEncodeWeights
2890 ENCODE_FLAGS = fst.kEncodeFlags
2893 ## Arc, ArcIterator, and MutableArcIterator.
2896 cdef class Arc(object):
2899 Arc(ilabel, olabel, weight, nextstate)
2901 This class represents an arc while remaining agnostic about the underlying arc
2902 type. Attributes of the arc can be accessed or mutated, and the arc can be
2906 ilabel: The input label.
2907 olabel: The output label.
2908 weight: The arc weight.
2909 nextstate: The destination state for the arc.
2913 return "<Arc at 0x{:x}>".format(id(self))
2915 def __init__(self, int64 ilabel, int64 olabel, weight, int64 nextstate):
2916 cdef fst.WeightClass wc = _get_WeightClass_or_One(b"tropical", weight)
2917 self._arc.reset(new fst.ArcClass(ilabel, olabel, wc, nextstate))
2919 cpdef Arc copy(self):
2920 return Arc(self.ilabel, self.olabel, self.weight, self.nextstate)
2925 return deref(self._arc).ilabel
2927 def __set__(self, int64 value):
2928 deref(self._arc).ilabel = value
2933 return deref(self._arc).olabel
2935 def __set__(self, int64 value):
2936 deref(self._arc).olabel = value
2941 cdef Weight weight = Weight.__new__(Weight)
2942 weight._weight.reset(new fst.WeightClass(deref(self._arc).weight))
2945 def __set__(self, weight):
2946 deref(self._arc).weight = _get_WeightClass_or_One(b"tropical", weight)
2951 return deref(self._arc).nextstate
2953 def __set__(self, int64 value):
2954 deref(self._arc).nextstate = value
2957 cdef Arc _init_Arc(const fst.ArcClass &arc):
2958 cdef Weight weight = Weight.__new__(Weight)
2959 weight._weight.reset(new fst.WeightClass(arc.weight))
2960 return Arc(<int64> arc.ilabel, <int64> arc.olabel, weight,
2961 <int64> arc.nextstate)
2964 cdef class ArcIterator(object):
2967 ArcIterator(ifst, state)
2969 This class is used for iterating over the arcs leaving some state of an FST.
2973 return "<ArcIterator at 0x{:x}>".format(id(self))
2975 def __init__(self, _Fst ifst, int64 state):
2976 if not ifst._fst.get().ValidStateId(state):
2977 raise FstIndexError("State index out of range")
2978 # Makes copy of the shared_ptr, potentially extending the FST's lifetime.
2979 self._fst = ifst._fst
2980 self._aiter.reset(new fst.ArcIteratorClass(deref(self._fst), state))
2982 # This just registers this class as a possible iterator.
2986 # Magic method used to get a Pythonic API out of the C++ API.
2990 result = self.value()
2994 cpdef bool done(self):
2998 Indicates whether the iterator is exhausted or not.
3001 True if the iterator is exhausted, False otherwise.
3003 return self._aiter.get().Done()
3005 cpdef uint32 flags(self):
3009 Returns the current iterator behavioral flags.
3012 The current iterator behavioral flags as an integer.
3014 return self._aiter.get().Flags()
3016 cpdef void next(self):
3020 Advances the iterator.
3022 self._aiter.get().Next()
3024 cpdef size_t position(self):
3028 Returns the position of the iterator.
3031 The iterator's position, expressed as an integer.
3033 return self._aiter.get().Position()
3035 cpdef void reset(self):
3039 Resets the iterator to the initial position.
3041 self._aiter.get().Reset()
3043 cpdef void seek(self, size_t a):
3047 Advance the iterator to a new position.
3050 a: The position to seek to.
3052 self._aiter.get().Seek(a)
3054 cpdef void set_flags(self, uint32 flags, uint32 mask):
3056 set_flags(self, flags, mask)
3058 Sets the current iterator behavioral flags.
3061 flags: The properties to be set.
3062 mask: A mask to be applied to the `flags` argument before setting them.
3064 self._aiter.get().SetFlags(flags, mask)
3066 cpdef object value(self):
3070 Returns the current arc.
3072 return _init_Arc(self._aiter.get().Value())
3075 cdef class MutableArcIterator(object):
3078 MutableArcIterator(ifst, state)
3080 This class is used for iterating over the arcs leaving some state of an FST,
3081 also permitting mutation of the current arc.
3085 return "<MutableArcIterator at 0x{:x}>".format(id(self))
3087 def __init__(self, _MutableFst ifst, int64 state):
3088 if not ifst._fst.get().ValidStateId(state):
3089 raise FstIndexError("State index out of range")
3090 # Makes copy of the shared_ptr, potentially extending the FST's lifetime.
3091 self._mfst = ifst._mfst
3092 self._aiter.reset(new fst.MutableArcIteratorClass(ifst._mfst.get(), state))
3094 cpdef bool done(self):
3098 Indicates whether the iterator is exhausted or not.
3101 True if the iterator is exhausted, False otherwise.
3103 return self._aiter.get().Done()
3105 cpdef uint32 flags(self):
3109 Returns the current iterator behavioral flags.
3112 The current iterator behavioral flags as an integer.
3114 return self._aiter.get().Flags()
3116 cpdef void next(self):
3120 Advances the iterator.
3122 self._aiter.get().Next()
3124 cpdef size_t position(self):
3128 Returns the position of the iterator.
3131 The iterator's position, expressed as an integer.
3133 return self._aiter.get().Position()
3135 cpdef void reset(self):
3139 Resets the iterator to the initial position.
3141 self._aiter.get().Reset()
3143 cpdef void seek(self, size_t a):
3147 Advance the iterator to a new position.
3150 a: The position to seek to.
3152 self._aiter.get().Seek(a)
3154 cpdef void set_flags(self, uint32 flags, uint32 mask):
3156 set_flags(self, flags, mask)
3158 Sets the current iterator behavioral flags.
3161 flags: The properties to be set.
3162 mask: A mask to be applied to the `flags` argument before setting them.
3164 self._aiter.get().SetFlags(flags, mask)
3166 cpdef void set_value(self, Arc arc):
3168 set_value(self, arc)
3170 Replace the current arc with a new arc.
3173 arc: The arc to replace the current arc with.
3175 self._aiter.get().SetValue(deref(arc._arc))
3177 cpdef object value(self):
3181 Returns the current arc.
3183 return _init_Arc(self._aiter.get().Value())
3189 cdef class StateIterator(object):
3194 This class is used for iterating over the states in an FST.
3198 return "<StateIterator at 0x{:x}>".format(id(self))
3200 def __init__(self, _Fst ifst):
3201 # Makes copy of the shared_ptr, potentially extending the FST's lifetime.
3202 self._fst = ifst._fst
3203 self._siter.reset(new fst.StateIteratorClass(deref(self._fst)))
3205 # This just registers this class as a possible iterator.
3209 # Magic method used to get a Pythonic API out of the C++ API.
3213 cdef int64 result = self.value()
3217 cpdef bool done(self):
3221 Indicates whether the iterator is exhausted or not.
3224 True if the iterator is exhausted, False otherwise.
3226 return self._siter.get().Done()
3228 cpdef void next(self):
3232 Advances the iterator.
3234 self._siter.get().Next()
3236 cpdef void reset(self):
3240 Resets the iterator to the initial position.
3242 self._siter.get().Reset()
3244 cpdef int64 value(self):
3248 Returns the current state index.
3250 return self._siter.get().Value()
3256 cdef _Fst _map(_Fst ifst,
3257 float delta=fst.kDelta,
3258 map_type=b"identity",
3260 cdef fst.MapType map_type_enum
3261 if not fst.GetMapType(tostring(map_type), addr(map_type_enum)):
3262 raise FstArgError("Unknown map type: {!r}".format(map_type))
3263 cdef fst.WeightClass wc = (_get_WeightClass_or_One(ifst.weight_type(),
3264 weight) if map_type_enum == fst.TIMES_MAPPER else
3265 _get_WeightClass_or_Zero(ifst.weight_type(), weight))
3266 return _init_XFst(fst.Map(deref(ifst._fst), map_type_enum, delta, wc))
3269 cpdef _Fst arcmap(_Fst ifst,
3270 float delta=fst.kDelta,
3271 map_type=b"identity",
3274 arcmap(ifst, delta=0.0009765625, map_type="identity", weight=None)
3276 Constructively applies a transform to all arcs and final states.
3278 This operation transforms each arc and final state in the input FST using
3279 one of the following:
3281 * identity: maps to self.
3282 * input_epsilon: replaces all input labels with epsilon.
3283 * invert: reciprocates all non-Zero weights.
3284 * output_epsilon: replaces all output labels with epsilon.
3285 * plus: adds a constant to all weights.
3286 * quantize: quantizes weights.
3287 * rmweight: replaces all non-Zero weights with 1.
3288 * superfinal: redirects final states to a new superfinal state.
3289 * times: right-multiplies a constant to all weights.
3290 * to_log: converts weights to the log semiring.
3291 * to_log64: converts weights to the log64 semiring.
3292 * to_standard: converts weights to the tropical ("standard") semiring.
3295 ifst: The input FST.
3296 delta: Comparison/quantization delta (ignored unless `map_type` is
3298 map_type: A string matching a known mapping operation (see above).
3299 weight: A Weight or weight string passed to the arc-mapper; ignored unless
3300 `map_type` is `plus` (in which case it defaults to semiring Zero) or
3301 `times` (in which case it defaults to semiring One).
3304 An FST with arcs and final states remapped.
3307 FstArgError: Unknown map type.
3309 See also: `statemap`.
3311 return _map(ifst, delta, map_type, weight)
3314 cpdef _MutableFst compose(_Fst ifst1,
3316 compose_filter=b"auto",
3319 compose(ifst1, ifst2, compose_filter="auto", connect=True)
3321 Constructively composes two FSTs.
3323 This operation computes the composition of two FSTs. If A transduces string
3324 x to y with weight a and B transduces y to z with weight b, then their
3325 composition transduces string x to z with weight a \otimes b. The output
3326 labels of the first transducer or the input labels of the second transducer
3327 must be sorted (or otherwise support appropriate matchers).
3330 ifst1: The first input FST.
3331 ifst2: The second input FST.
3332 compose_filter: A string matching a known composition filter; one of:
3333 "alt_sequence", "auto", "match", "null", "sequence", "trivial".
3334 connect: Should output be trimmed?
3339 See also: `arcsort`.
3341 cdef unique_ptr[fst.VectorFstClass] tfst
3342 tfst.reset(new fst.VectorFstClass(ifst1.arc_type()))
3343 cdef unique_ptr[fst.ComposeOptions] opts
3344 opts.reset(new fst.ComposeOptions(connect,
3345 _get_compose_filter(tostring(compose_filter))))
3346 fst.Compose(deref(ifst1._fst), deref(ifst2._fst), tfst.get(), deref(opts))
3347 return _init_MutableFst(tfst.release())
3350 cpdef _Fst convert(_Fst ifst, fst_type=None):
3352 convert(ifst, fst_type=None)
3354 Constructively converts an FST to a new internal representation.
3357 ifst: The input FST.
3358 fst_type: A string indicating the FST type to convert to, or None if
3359 no conversion is desired.
3362 An equivalent Fst converted to the desired FST type.
3365 FstOpError: Conversion failed.
3367 cdef string fst_type_string = b"" if fst_type is None else tostring(fst_type)
3368 cdef unique_ptr[fst.FstClass] tfst
3369 tfst.reset(fst.Convert(deref(ifst._fst), fst_type_string))
3370 # Script-land Convert returns the null pointer to signal failure.
3371 if tfst.get() == NULL:
3372 raise FstOpError("Conversion to {!r} failed".format(fst_type))
3373 return _init_XFst(tfst.release())
3376 cpdef _MutableFst determinize(_Fst ifst,
3377 float delta=fst.kDelta,
3378 det_type=b"functional",
3379 int64 nstate=fst.kNoStateId,
3380 int64 subsequential_label=0,
3382 bool increment_subsequential_label=False):
3384 determinize(ifst, delta=0.0009765625, det_type="functional",
3385 nstate=NO_STATE_ID, subsequential_label=0, weight=None,
3386 incremental_subsequential_label=False)
3388 Constructively determinizes a weighted FST.
3390 This operations creates an equivalent FST that has the property that no
3391 state has two transitions with the same input label. For this algorithm,
3392 epsilon transitions are treated as regular symbols (cf. `rmepsilon`).
3395 ifst: The input FST.
3396 delta: Comparison/quantization delta.
3397 det_type: Type of determinization; one of: "functional" (input transducer is
3398 functional), "nonfunctional" (input transducer is not functional) and
3399 disambiguate" (input transducer is not functional but only keep the min
3400 of ambiguous outputs).
3401 nstate: State number threshold.
3402 subsequential_label: Input label of arc corresponding to residual final
3403 output when producing a subsequential transducer.
3404 weight: A Weight or weight string indicating the desired weight threshold
3405 below which paths are pruned; if omitted, no paths are pruned.
3406 increment_subsequential_label: Increment subsequential when creating
3407 several arcs for the residual final output at a given state.
3410 An equivalent deterministic FST.
3413 FstArgError: Unknown determinization type.
3415 See also: `disambiguate`, `rmepsilon`.
3417 cdef unique_ptr[fst.VectorFstClass] tfst
3418 tfst.reset(new fst.VectorFstClass(ifst.arc_type()))
3419 # Threshold is set to semiring Zero (no pruning) if weight unspecified.
3420 cdef fst.WeightClass wc = _get_WeightClass_or_Zero(ifst.weight_type(),
3422 cdef fst.DeterminizeType determinize_type_enum
3423 if not fst.GetDeterminizeType(tostring(det_type),
3424 addr(determinize_type_enum)):
3425 raise FstArgError("Unknown determinization type: {!r}".format(det_type))
3426 cdef unique_ptr[fst.DeterminizeOptions] opts
3427 opts.reset(new fst.DeterminizeOptions(delta, wc, nstate, subsequential_label,
3428 determinize_type_enum,
3429 increment_subsequential_label))
3430 fst.Determinize(deref(ifst._fst), tfst.get(), deref(opts))
3431 return _init_MutableFst(tfst.release())
3434 cpdef _MutableFst difference(_Fst ifst1,
3436 compose_filter=b"auto",
3439 difference(ifst1, ifst2, compose_filter="auto", connect=True)
3441 Constructively computes the difference of two FSTs.
3443 This operation computes the difference between two FSAs. Only strings that are
3444 in the first automaton but not in second are retained in the result. The first
3445 argument must be an acceptor; the second argument must be an unweighted,
3446 epsilon-free, deterministic acceptor. The output labels of the first
3447 transducer or the input labels of the second transducer must be sorted (or
3448 otherwise support appropriate matchers).
3451 ifst1: The first input FST.
3452 ifst2: The second input FST.
3453 compose_filter: A string matching a known composition filter; one of:
3454 "alt_sequence", "auto", "match", "null", "sequence", "trivial".
3455 connect: Should the output FST be trimmed?
3458 An FST representing the difference of the FSTs.
3460 cdef unique_ptr[fst.VectorFstClass] tfst
3461 tfst.reset(new fst.VectorFstClass(ifst1.arc_type()))
3462 cdef unique_ptr[fst.ComposeOptions] opts
3463 opts.reset(new fst.ComposeOptions(connect, _get_compose_filter(
3464 tostring(compose_filter))))
3465 fst.Difference(deref(ifst1._fst), deref(ifst2._fst), tfst.get(), deref(opts))
3466 return _init_MutableFst(tfst.release())
3469 cpdef _MutableFst disambiguate(_Fst ifst,
3470 float delta=fst.kDelta,
3471 int64 nstate=fst.kNoStateId,
3472 int64 subsequential_label=0,
3475 disambiguate(ifst, delta=0.0009765625, nstate=NO_STATE_ID,
3476 subsequential_label=0, weight=None):
3478 Constructively disambiguates a weighted transducer.
3480 This operation disambiguates a weighted transducer. The result will be an
3481 equivalent FST that has the property that no two successful paths have the
3482 same input labeling. For this algorithm, epsilon transitions are treated as
3483 regular symbols (cf. `rmepsilon`).
3486 ifst: The input FST.
3487 delta: Comparison/quantization delta.
3488 nstate: State number threshold.
3489 subsequential_label: Input label of arc corresponding to residual final
3490 output when producing a subsequential transducer.
3491 weight: A Weight or weight string indicating the desired weight threshold
3492 below which paths are pruned; if omitted, no paths are pruned.
3495 An equivalent disambiguated FST.
3497 See also: `determinize`, `rmepsilon`.
3499 cdef unique_ptr[fst.VectorFstClass] tfst
3500 tfst.reset(new fst.VectorFstClass(ifst.arc_type()))
3501 # Threshold is set to semiring Zero (no pruning) if no weight is specified.
3502 cdef fst.WeightClass wc = _get_WeightClass_or_Zero(ifst.weight_type(),
3504 cdef unique_ptr[fst.DisambiguateOptions] opts
3505 opts.reset(new fst.DisambiguateOptions(delta, wc, nstate,
3506 subsequential_label))
3507 fst.Disambiguate(deref(ifst._fst), tfst.get(), deref(opts))
3508 return _init_MutableFst(tfst.release())
3511 cpdef _MutableFst epsnormalize(_Fst ifst, bool eps_norm_output=False):
3513 epsnormalize(ifst, eps_norm_output=False)
3515 Constructively epsilon-normalizes an FST.
3517 This operation creates an equivalent FST that is epsilon-normalized. An
3518 acceptor is epsilon-normalized if it it is epsilon-removed (cf. `rmepsilon`).
3519 A transducer is input epsilon-normalized if, in addition, along any path, all
3520 arcs with epsilon input labels follow all arcs with non-epsilon input labels.
3521 Output epsilon-normalized is defined similarly. The input FST must be
3525 ifst: The input FST.
3526 eps_norm_output: Should the FST be output epsilon-normalized?
3529 An equivalent epsilon-normalized FST.
3531 See also: `rmepsilon`.
3533 cdef unique_ptr[fst.VectorFstClass] tfst
3534 tfst.reset(new fst.VectorFstClass(ifst.arc_type()))
3535 fst.EpsNormalize(deref(ifst._fst), tfst.get(), fst.EPS_NORM_OUTPUT if
3536 eps_norm_output else
3538 return _init_MutableFst(tfst.release())
3541 cpdef bool equal(_Fst ifst1, _Fst ifst2, float delta=fst.kDelta):
3543 equal(ifst1, ifst2, delta=0.0009765625)
3547 This function tests whether two FSTs have the same states with the same
3548 numbering and the same transitions with the same labels and weights in the
3552 ifst1: The first input FST.
3553 ifst2: The second input FST.
3554 delta: Comparison/quantization delta.
3557 True if the FSTs satisfy the above condition, else False.
3559 See also: `equivalent`, `isomorphic`, `randequivalent`.
3561 return fst.Equal(deref(ifst1._fst), deref(ifst2._fst), delta)
3564 cpdef bool equivalent(_Fst ifst1, _Fst ifst2, float delta=fst.kDelta) except *:
3566 equivalent(ifst1, ifst2, delta=0.0009765625)
3568 Are the two acceptors equivalent?
3570 This operation tests whether two epsilon-free deterministic weighted
3571 acceptors are equivalent, that is if they accept the same strings with the
3575 ifst1: The first input FST.
3576 ifst2: The second input FST.
3577 delta: Comparison/quantization delta.
3580 True if the FSTs satisfy the above condition, else False.
3582 See also: `equal`, `isomorphic`, `randequivalent`.
3584 return fst.Equivalent(deref(ifst1._fst), deref(ifst2._fst), delta)
3587 cpdef _MutableFst intersect(_Fst ifst1,
3589 compose_filter=b"auto",
3592 intersect(ifst1, ifst2, compose_filter="auto", connect=True)
3594 Constructively intersects two FSTs.
3596 This operation computes the intersection (Hadamard product) of two FSTs.
3597 Only strings that are in both automata are retained in the result. The two
3598 arguments must be acceptors. One of the arguments must be label-sorted (or
3599 otherwise support appropriate matchers).
3602 ifst1: The first input FST.
3603 ifst2: The second input FST.
3604 compose_filter: A string matching a known composition filter; one of:
3605 "alt_sequence", "auto", "match", "null", "sequence", "trivial".
3606 connect: Should output be trimmed?
3611 cdef unique_ptr[fst.VectorFstClass] tfst
3612 tfst.reset(new fst.VectorFstClass(ifst1.arc_type()))
3613 cdef unique_ptr[fst.ComposeOptions] opts
3614 opts.reset(new fst.ComposeOptions(connect,
3615 _get_compose_filter(tostring(compose_filter))))
3616 fst.Intersect(deref(ifst1._fst), deref(ifst2._fst), tfst.get(), deref(opts))
3617 return _init_MutableFst(tfst.release())
3620 cpdef bool isomorphic(_Fst ifst1, _Fst ifst2, float delta=fst.kDelta):
3622 isomorphic(ifst1, ifst2, delta=0.0009765625)
3624 Are the two acceptors isomorphic?
3626 This operation determines if two transducers with a certain required
3627 determinism have the same states, irrespective of numbering, and the same
3628 transitions with the same labels and weights, irrespective of ordering. In
3629 other words, FSTs A, B are isomorphic if and only if the states of A can be
3630 renumbered and the transitions leaving each state reordered so the two are
3631 equal (according to the definition given in `equal`).
3634 ifst1: The first input FST.
3635 ifst2: The second input FST.
3636 delta: Comparison/quantization delta.
3639 True if the two transducers satisfy the above condition, else False.
3641 See also: `equal`, `equivalent`, `randequivalent`.
3643 return fst.Isomorphic(deref(ifst1._fst), deref(ifst2._fst), delta)
3646 cpdef _MutableFst prune(_Fst ifst,
3647 float delta=fst.kDelta,
3648 int64 nstate=fst.kNoStateId,
3651 prune(ifst, delta=0.0009765625, nstate=NO_STATE_ID, weight=None)
3653 Constructively removes paths with weights below a certain threshold.
3655 This operation deletes states and arcs in the input FST that do not belong
3656 to a successful path whose weight is no more (w.r.t the natural semiring
3657 order) than the threshold t \otimes-times the weight of the shortest path in
3658 the input FST. Weights must be commutative and have the path property.
3661 ifst: The input FST.
3662 delta: Comparison/quantization delta.
3663 nstate: State number threshold.
3664 weight: A Weight or weight string indicating the desired weight threshold
3665 below which paths are pruned; if omitted, no paths are pruned.
3670 See also: The destructive variant.
3672 cdef unique_ptr[fst.VectorFstClass] tfst
3673 tfst.reset(new fst.VectorFstClass(ifst.arc_type()))
3674 cdef fst.WeightClass wc = _get_WeightClass_or_Zero(ifst.weight_type(), weight)
3675 fst.Prune(deref(ifst._fst), tfst.get(), wc, nstate, delta)
3676 return _init_MutableFst(tfst.release())
3679 cpdef _MutableFst push(_Fst ifst,
3680 float delta=fst.kDelta,
3681 bool push_weights=False,
3682 bool push_labels=False,
3683 bool remove_common_affix=False,
3684 bool remove_total_weight=False,
3685 bool to_final=False):
3687 push(ifst, delta=0.0009765625, push_weights=False, push_labels=False,
3688 remove_common_affix=False, remove_total_weight=False, to_final=False)
3690 Constructively pushes weights/labels towards initial or final states.
3692 This operation produces an equivalent transducer by pushing the weights
3693 and/or the labels towards the initial state or toward the final states.
3695 When pushing weights towards the initial state, the sum of the weight of the
3696 outgoing transitions and final weight at any non-initial state is equal to 1
3697 in the resulting machine. When pushing weights towards the final states, the
3698 sum of the weight of the incoming transitions at any state is equal to 1.
3699 Weights need to be left distributive when pushing towards the initial state
3700 and right distributive when pushing towards the final states.
3702 Pushing labels towards the initial state consists in minimizing at every
3703 state the length of the longest common prefix of the output labels of the
3704 outgoing paths. Pushing labels towards the final states consists in
3705 minimizing at every state the length of the longest common suffix of the
3706 output labels of the incoming paths.
3709 ifst: The input FST.
3710 delta: Comparison/quantization delta.
3711 push_weights: Should weights be pushed?
3712 push_labels: Should labels be pushed?
3713 remove_common_affix: If pushing labels, should common prefix/suffix be
3715 remove_total_weight: If pushing weights, should total weight be removed?
3716 to_final: Push towards final states?
3719 An equivalent pushed FST.
3721 See also: The destructive variant.
3723 # This is copied, almost verbatim, from ./fstpush.cc.
3724 cdef unique_ptr[fst.VectorFstClass] tfst
3725 tfst.reset(new fst.VectorFstClass(ifst.arc_type()))
3726 cdef uint32 flags = fst.GetPushFlags(push_weights, push_labels,
3727 remove_common_affix, remove_total_weight)
3728 fst.Push(deref(ifst._fst), tfst.get(), flags, fst.GetReweightType(to_final),
3730 return _init_MutableFst(tfst.release())
3733 cpdef bool randequivalent(_Fst ifst1,
3736 float delta=fst.kDelta,
3739 int32 max_length=INT32_MAX) except *:
3741 randequivalent(ifst1, ifst2, npath=1, delta=0.0009765625, seed=0,
3742 select="uniform", max_length=2147483647)
3744 Are two acceptors stochastically equivalent?
3746 This operation tests whether two FSTs are equivalent by randomly generating
3747 paths alternatively in each of the two FSTs. For each randomly generated path,
3748 the algorithm computes for each of the two FSTs the sum of the weights of all
3749 the successful paths sharing the same input and output labels as the randomly
3750 generated path and checks that these two values are within `delta`.
3753 ifst1: The first input FST.
3754 ifst2: The second input FST.
3755 npath: The number of random paths to generate.
3756 delta: Comparison/quantization delta.
3757 seed: An optional seed value for random path generation; if zero, the
3758 current time and process ID is used.
3759 select: A string matching a known random arc selection type; one of:
3760 "uniform", "log_prob", "fast_log_prob".
3761 max_length: The maximum length of each random path.
3764 True if the two transducers satisfy the above condition, else False.
3766 See also: `equal`, `equivalent`, `isomorphic`, `randgen`.
3768 cdef fst.RandArcSelection ras = _get_rand_arc_selection(tostring(select))
3769 cdef unique_ptr[fst.RandGenOptions[fst.RandArcSelection]] opts
3770 # The three trailing options will be ignored by RandEquivalent.
3771 opts.reset(new fst.RandGenOptions[fst.RandArcSelection](ras, max_length,
3774 seed = time(NULL) + getpid()
3775 return fst.RandEquivalent(deref(ifst1._fst), deref(ifst2._fst), npath, delta,
3779 cpdef _MutableFst randgen(_Fst ifst,
3783 int32 max_length=INT32_MAX,
3784 bool weighted=False,
3785 bool remove_total_weight=False):
3787 randgen(ifst, npath=1, seed=0, select="uniform", max_length=2147483647,
3788 weight=False, remove_total_weight=False)
3790 Randomly generate successful paths in an FST.
3792 This operation randomly generates a set of successful paths in the input FST.
3793 This relies on a mechanism for selecting arcs, specified using the `select`
3794 argument. The default selector, "uniform", randomly selects a transition
3795 using a uniform distribution. The "log_prob" selector randomly selects a
3796 transition w.r.t. the weights treated as negative log probabilities after
3797 normalizing for the total weight leaving the state. In all cases, finality is
3798 treated as a transition to a super-final state.
3801 ifst: The input FST.
3802 npath: The number of random paths to generate.
3803 seed: An optional seed value for random path generation; if zero, the
3804 current time and process ID is used.
3805 select: A string matching a known random arc selection type; one of:
3806 "uniform", "log_prob", "fast_log_prob".
3807 max_length: The maximum length of each random path.
3808 weighted: Should the output be weighted by path count?
3809 remove_total_weight: Should the total weight be removed (ignored when
3810 `weighted` is False)?
3813 An FST containing one or more random paths.
3815 See also: `randequivalent`.
3817 cdef fst.RandArcSelection ras = _get_rand_arc_selection(tostring(select))
3818 cdef unique_ptr[fst.RandGenOptions[fst.RandArcSelection]] opts
3819 opts.reset(new fst.RandGenOptions[fst.RandArcSelection](ras, max_length,
3821 remove_total_weight))
3822 cdef unique_ptr[fst.VectorFstClass] tfst
3823 tfst.reset(new fst.VectorFstClass(ifst.arc_type()))
3825 seed = time(NULL) + getpid()
3826 fst.RandGen(deref(ifst._fst), tfst.get(), seed, deref(opts))
3827 return _init_MutableFst(tfst.release())
3830 cpdef _MutableFst replace(pairs,
3831 call_arc_labeling=b"input",
3832 return_arc_labeling=b"neither",
3833 bool epsilon_on_replace=False,
3834 int64 return_label=0):
3836 replace(pairs, call_arc_labeling="input", return_arc_labeling="neither",
3837 epsilon_on_replace=False, return_label=0)
3839 Recursively replaces arcs in the FST with other FST(s).
3841 This operation performs the dynamic replacement of arcs in one FST with
3842 another FST, allowing the definition of FSTs analogous to RTNs. It takes as
3843 input a set of pairs of a set of pairs formed by a non-terminal label and
3844 its corresponding FST, and a label identifying the root FST in that set.
3845 The resulting FST is obtained by taking the root FST and recursively replacing
3846 each arc having a nonterminal as output label by its corresponding FST. More
3847 precisely, an arc from state s to state d with (nonterminal) output label n in
3848 this FST is replaced by redirecting this "call" arc to the initial state of a
3849 copy F of the FST for n, and adding "return" arcs from each final state of F
3850 to d. Optional arguments control how the call and return arcs are labeled; by
3851 default, the only non-epsilon label is placed on the call arc.
3855 pairs: An iterable of (nonterminal label, FST) pairs, where the former is an
3856 unsigned integer and the latter is an Fst instance.
3857 call_arc_labeling: A string indicating which call arc labels should be
3858 non-epsilon. One of: "input" (default), "output", "both", "neither".
3859 This value is set to "neither" if epsilon_on_replace is True.
3860 return_arc_labeling: A string indicating which return arc labels should be
3861 non-epsilon. One of: "input", "output", "both", "neither" (default).
3862 This value is set to "neither" if epsilon_on_replace is True.
3863 epsilon_on_replace: Should call and return arcs be epsilon arcs? If True,
3864 this effectively overrides call_arc_labeling and return_arc_labeling,
3865 setting both to "neither".
3866 return_label: The integer label for return arcs.
3869 An FST resulting from expanding the input RTN.
3871 cdef vector[fst.LabelFstClassPair] _pairs
3872 cdef int64 root_label
3876 (root_label, ifst) = next(it)
3877 _pairs.push_back(fst.LabelFstClassPair(root_label, ifst._fst.get()))
3878 cdef unique_ptr[fst.VectorFstClass] tfst
3879 tfst.reset(new fst.VectorFstClass(ifst.arc_type()))
3880 for (label, ifst) in it:
3881 _pairs.push_back(fst.LabelFstClassPair(label, ifst._fst.get()))
3882 cdef fst.ReplaceLabelType cal = _get_replace_label_type(
3883 tostring(call_arc_labeling), epsilon_on_replace)
3884 cdef fst.ReplaceLabelType ral = _get_replace_label_type(
3885 tostring(return_arc_labeling), epsilon_on_replace)
3886 cdef unique_ptr[fst.ReplaceOptions] opts
3887 opts.reset(new fst.ReplaceOptions(root_label, cal, ral, return_label))
3888 fst.Replace(_pairs, tfst.get(), deref(opts))
3889 return _init_MutableFst(tfst.release())
3892 cpdef _MutableFst reverse(_Fst ifst, bool require_superinitial=True):
3894 reverse(ifst, require_superinitial=True)
3896 Constructively reverses an FST's transduction.
3898 This operation reverses an FST. If A transduces string x to y with weight a,
3899 then the reverse of A transduces the reverse of x to the reverse of y with
3900 weight a.Reverse(). (Typically, a = a.Reverse() and Arc = RevArc, e.g.,
3901 TropicalWeight and LogWeight.) In general, e.g., when the weights only form a
3902 left or right semiring, the output arc type must match the input arc type.
3905 ifst: The input FST.
3906 require_superinitial: Should a superinitial state be created?
3911 cdef unique_ptr[fst.VectorFstClass] tfst
3912 tfst.reset(new fst.VectorFstClass(ifst.arc_type()))
3913 fst.Reverse(deref(ifst._fst), tfst.get(), require_superinitial)
3914 return _init_MutableFst(tfst.release())
3917 # Pure C++ helper for shortestdistance.
3920 cdef vector[fst.WeightClass] *_shortestdistance(_Fst ifst,
3921 float delta=fst.kDelta, int64 nstate=fst.kNoStateId, queue_type=b"auto",
3922 bool reverse=False) except *:
3923 cdef unique_ptr[vector[fst.WeightClass]] distance
3924 distance.reset(new vector[fst.WeightClass]())
3925 # For scoping reasons, these have to be declared here even though they may
3926 # not be used in all cases.
3927 cdef unique_ptr[fst.ShortestDistanceOptions] opts
3929 # Only the simpler signature supports shortest distance to final states;
3930 # `nstate` and `queue_type` arguments are ignored.
3931 fst.ShortestDistance(deref(ifst._fst), distance.get(), True, delta)
3933 opts.reset(new fst.ShortestDistanceOptions(
3934 _get_queue_type(tostring(queue_type)), fst.ANY_ARC_FILTER, nstate,
3936 fst.ShortestDistance(deref(ifst._fst), distance.get(), deref(opts))
3937 return distance.release()
3940 def shortestdistance(_Fst ifst,
3941 float delta=fst.kDelta,
3942 int64 nstate=fst.kNoStateId,
3944 bool reverse=False):
3946 shortestdistance(ifst, delta=0.0009765625, nstate=NO_STATE_ID,
3947 queue_type="auto", reverse=False)
3949 Compute the shortest distance from the initial or final state.
3951 This operation computes the shortest distance from the initial state (when
3952 `reverse` is False) or from every state to the final state (when `reverse` is
3953 True). The shortest distance from p to q is the \otimes-sum of the weights of
3954 all the paths between p and q. The weights must be right (if `reverse` is
3955 False) or left (if `reverse` is True) distributive, and k-closed (i.e., 1
3956 \otimes x \otimes x^2 \otimes ... \otimes x^{k + 1} = 1 \otimes x \otimes x^2
3957 \otimes ... \otimes x^k; e.g., TropicalWeight).
3960 ifst: The input FST.
3961 delta: Comparison/quantization delta.
3962 nstate: State number threshold (ignored if `reverse` is True).
3963 queue_type: A string matching a known queue type; one of: "auto", "fifo",
3964 "lifo", "shortest", "state", "top" (ignored if `reverse` is True).
3965 reverse: Should the reverse distance (from each state to the final state)
3969 A list of Weight objects representing the shortest distance for each state.
3971 cdef unique_ptr[vector[fst.WeightClass]] distance
3972 distance.reset(_shortestdistance(ifst, delta, nstate, queue_type, reverse))
3973 cdef string weight_type = ifst.weight_type()
3974 return [Weight(weight_type, weight.ToString()) for weight in deref(distance)]
3977 cpdef _MutableFst shortestpath(_Fst ifst,
3978 float delta=fst.kDelta,
3980 int64 nstate=fst.kNoStateId,
3985 shortestpath(ifst, delta=0.0009765625, nshortest=1, nstate=NO_STATE_ID,
3986 queue_type="auto", unique=False, weight=None)
3988 Construct an FST containing the shortest path(s) in the input FST.
3990 This operation produces an FST containing the n-shortest paths in the input
3991 FST. The n-shortest paths are the n-lowest weight paths w.r.t. the natural
3992 semiring order. The single path that can be read from the ith of at most n
3993 transitions leaving the initial state of the resulting FST is the ith
3994 shortest path. The weights need to be right distributive and have the path
3995 property. They also need to be left distributive as well for n-shortest with
3996 n > 1 (e.g., TropicalWeight).
3999 ifst: The input FST.
4000 delta: Comparison/quantization delta.
4001 nshortest: The number of paths to return.
4002 nstate: State number threshold.
4003 queue_type: A string matching a known queue type; one of: "auto", "fifo",
4004 "lifo", "shortest", "state", "top".
4005 unique: Should the resulting FST only contain distinct paths? (Requires
4006 the input FST to be an acceptor; epsilons are treated as if they are
4008 weight: A Weight or weight string indicating the desired weight threshold
4009 below which paths are pruned; if omitted, no paths are pruned.
4012 An FST containing the n-shortest paths.
4014 cdef unique_ptr[fst.VectorFstClass] tfst
4015 tfst.reset(new fst.VectorFstClass(ifst.arc_type()))
4016 # Threshold is set to semiring Zero (no pruning) if no weight is specified.
4017 cdef fst.WeightClass wc = _get_WeightClass_or_Zero(ifst.weight_type(), weight)
4018 cdef unique_ptr[fst.ShortestPathOptions] opts
4019 opts.reset(new fst.ShortestPathOptions(_get_queue_type(tostring(queue_type)),
4020 nshortest, unique, delta, wc, nstate))
4021 fst.ShortestPath(deref(ifst._fst), tfst.get(), deref(opts))
4022 return _init_MutableFst(tfst.release())
4025 cpdef _Fst statemap(_Fst ifst, map_type):
4027 state_map(ifst, map_type)
4029 Constructively applies a transform to all states.
4031 This operation transforms each state according to the requested map type.
4032 Note that currently, only one state-mapping operation is supported.
4035 ifst: The input FST.
4036 map_type: A string matching a known mapping operation; one of: "arc_sum"
4037 (sum weights of identically-labeled multi-arcs), "arc_unique" (deletes
4038 non-unique identically-labeled multi-arcs).
4041 An FST with states remapped.
4044 FstArgError: Unknown map type.
4048 return _map(ifst, fst.kDelta, map_type, None)
4051 cpdef _MutableFst synchronize(_Fst ifst):
4055 Constructively synchronizes an FST.
4057 This operation synchronizes a transducer. The result will be an equivalent
4058 FST that has the property that during the traversal of a path, the delay is
4059 either zero or strictly increasing, where the delay is the difference between
4060 the number of non-epsilon output labels and input labels along the path. For
4061 the algorithm to terminate, the input transducer must have bounded delay,
4062 i.e., the delay of every cycle must be zero.
4065 ifst: The input FST.
4068 An equivalent synchronized FST.
4070 cdef unique_ptr[fst.VectorFstClass] tfst
4071 tfst.reset(new fst.VectorFstClass(ifst.arc_type()))
4072 fst.Synchronize(deref(ifst._fst), tfst.get())
4073 return _init_MutableFst(tfst.release())
4079 cdef class Compiler(object):
4082 Compiler(fst_type="vector", arc_type="standard", isymbols=None,
4083 osymbols=None, ssymbols=None, acceptor=False, keep_isymbols=False,
4084 keep_osymbols=False, keep_state_numbering=False,
4085 allow_negative_labels=False)
4087 Class used to compile FSTs from strings.
4089 This class is used to compile FSTs specified using the AT&T FSM library
4090 format described here:
4092 http://web.eecs.umich.edu/~radev/NLP-fall2015/resources/fsm_archive/fsm.5.html
4094 This is the same format used by the `fstcompile` executable.
4096 Compiler options (symbol tables, etc.) are set at construction time.
4098 compiler = fst.Compiler(isymbols=ascii_syms, osymbols=ascii_syms)
4100 Once constructed, Compiler instances behave like a file handle opened for
4104 print >> compiler, "0 1 50 50"
4105 print >> compiler, "1 2 49 49"
4106 print >> compiler, "2 2 49 49"
4107 print >> compiler, "2"
4109 The `compile` method returns an actual FST instance:
4111 sheep_machine = compiler.compile()
4113 Compilation flushes the internal buffer, so the compiler instance can be
4114 reused to compile new machines with the same symbol tables (etc.)
4117 fst_type: A string indicating the container type for the compiled FST.
4118 arc_type: A string indicating the arc type for the compiled FST.
4119 isymbols: An optional SymbolTable used to label input symbols.
4120 osymbols: An optional SymbolTable used to label output symbols.
4121 ssymbols: An optional SymbolTable used to label states.
4122 acceptor: Should the FST be rendered in acceptor format if possible?
4123 keep_isymbols: Should the input symbol table be stored in the FST?
4124 keep_osymbols: Should the output symbol table be stored in the FST?
4125 keep_state_numbering: Should the state numbering be preserved?
4126 allow_negative_labels: Should negative labels be allowed? (Not
4127 recommended; may cause conflicts).
4131 string fst_type=b"vector",
4132 string arc_type=b"standard",
4133 SymbolTable isymbols=None,
4134 SymbolTable osymbols=None,
4135 SymbolTable ssymbols=None,
4136 bool acceptor=False,
4137 bool keep_isymbols=False,
4138 bool keep_osymbols=False,
4139 bool keep_state_numbering=False,
4140 bool allow_negative_labels=False):
4141 self._sstrm.reset(new stringstream())
4142 self._fst_type = tostring(fst_type)
4143 self._arc_type = tostring(arc_type)
4144 self._isymbols = NULL
4145 if isymbols is not None:
4146 self._isymbols = isymbols._table
4147 self._osymbols = NULL
4148 if osymbols is not None:
4149 self._osymbols = osymbols._table
4150 self._ssymbols = NULL
4151 if ssymbols is not None:
4152 self._ssymbols = ssymbols._table
4153 self._acceptor = acceptor
4154 self._keep_isymbols = keep_isymbols
4155 self._keep_osymbols = keep_osymbols
4156 self._keep_state_numbering = keep_state_numbering
4157 self._allow_negative_labels = allow_negative_labels
4159 cpdef _Fst compile(self):
4163 Compiles the FST in the compiler string buffer.
4165 This method compiles the FST and returns the resulting machine.
4168 The FST described by the compiler string buffer.
4171 FstOpError: Compilation failed.
4173 cdef unique_ptr[fst.FstClass] tfst
4174 tfst.reset(fst.CompileFstInternal(deref(self._sstrm),
4175 b"<pywrapfst>", self._fst_type, self._arc_type, self._isymbols,
4176 self._osymbols, self._ssymbols, self._acceptor, self._keep_isymbols,
4177 self._keep_osymbols, self._keep_state_numbering,
4178 self._allow_negative_labels))
4179 self._sstrm.reset(new stringstream())
4180 if tfst.get() == NULL:
4181 raise FstOpError("Compilation failed")
4182 return _init_XFst(tfst.release())
4184 cpdef void write(self, expression):
4188 Writes a string into the compiler string buffer.
4190 This method adds a line to the compiler string buffer. It is normally
4191 invoked using the right shift operator, like so:
4193 compiler = fst.Compiler()
4194 print >> compiler, "0 0 49 49"
4195 print >> compiler, "0"
4198 expression: A string expression to add to compiler string buffer.
4200 deref(self._sstrm) << tostring(expression)
4203 ## FarReader and FarWriter.
4206 cdef class FarReader(object):
4211 FAR ("Fst ARchive") reader object.
4213 This class is used to read a FAR from disk. FARs contain one or more FSTs (of
4214 the same arc type) indexed by a unique string key. To construct a FarReader
4215 object, use the `open` class method.
4218 arc_type: A string indicating the arc type.
4219 far_type: A string indicating the FAR type.
4223 raise FstDeletedConstructorError(
4224 "Cannot construct {}".format(self.__class__.__name__))
4227 return "<{} FarReader at 0x{:x}>".format(self.far_type(), id(self))
4230 def open(cls, *filenames):
4232 FarReader.open(*filenames)
4234 Creates a FarReader object.
4236 This class method creates a FarReader given the string location of one or
4237 more FAR files on disk.
4240 *filenames: The string location of one or more input FAR files.
4243 A new FarReader instance.
4246 FstIOError: Read failed.
4248 filenames = [tostring(filename) for filename in filenames]
4249 cdef unique_ptr[fst.FarReaderClass] tfar
4250 tfar.reset(fst.FarReaderClass.Open(filenames))
4251 if tfar.get() == NULL:
4252 raise FstIOError("Read failed: {!r}".format(filenames))
4253 cdef FarReader result = FarReader.__new__(FarReader)
4254 result._reader.reset(tfar.release())
4257 # This just registers this class as a possible iterator.
4261 # Magic method used to get a Pythonic API out of the C++ API.
4265 cdef string k = self.get_key()
4266 cdef _Fst f = self.get_fst()
4270 cpdef string arc_type(self):
4274 Returns a string indicating the arc type.
4276 return self._reader.get().ArcType()
4278 cpdef bool done(self):
4282 Indicates whether the iterator is exhausted or not.
4285 True if the iterator is exhausted, False otherwise.
4287 return self._reader.get().Done()
4289 cpdef bool error(self):
4293 Indicates whether the FarReader has encountered an error.
4296 True if the FarReader is in an errorful state, False otherwise.
4298 return self._reader.get().Error()
4300 cpdef string far_type(self):
4301 return fst.GetFarTypeString(self._reader.get().Type())
4303 cpdef bool find(self, key):
4307 Sets the current position to the first entry greater than or equal to the
4308 key (a string) and indicates whether or not a match was found.
4314 True if the key was found, False otherwise.
4316 return self._reader.get().Find(tostring(key))
4318 cpdef _Fst get_fst(self):
4322 Returns the FST at the current position.
4325 A copy of the FST at the current position.
4327 return _init_XFst(new fst.FstClass(
4328 deref(self._reader.get().GetFstClass())))
4330 cpdef string get_key(self):
4334 Returns the string key at the current position.
4337 The string key at the current position.
4339 return self._reader.get().GetKey()
4341 cpdef void next(self):
4345 Advances the iterator.
4347 self._reader.get().Next()
4349 cpdef void reset(self):
4353 Resets the iterator to the initial position.
4355 self._reader.get().Reset()
4357 # Dictionary-like access by combining `find` and `get_fst`.
4358 def __getitem__(self, key):
4359 if not self.find(key):
4361 return self.get_fst()
4364 cdef class FarWriter(object):
4369 FAR ("Fst ARchive") writer object.
4371 This class is used to write FSTs (of the same arc type) to a FAR on disk. To
4372 construct a FarWriter, use the `create` class method.
4374 Note that the data is not guaranteed to flush to disk until the FarWriter
4375 is garbage-collected. If a FarWriter has been assigned to only one variable,
4376 then calling `del` on that variable should decrement the object's reference
4377 count from 1 to 0, triggering a flush to disk on the next GC cycle.
4380 arc_type: A string indicating the arc type.
4381 far_type: A string indicating the FAR type.
4385 raise FstDeletedConstructorError(
4386 "Cannot construct {}".format(self.__class__.__name__))
4389 return "<{} FarWriter at 0x{:x}>".format(self.far_type(), id(self))
4392 def create(cls, filename, arc_type=b"standard", far_type=b"default"):
4396 Creates a FarWriter object.
4398 This class method creates a FarWriter given the desired output location,
4399 arc type, and FAR type.
4402 filename: The string location for the output FAR files.
4403 arc_type: A string indicating the arc type.
4404 far_type: A string indicating the FAR type; one of: "fst", "stlist",
4405 "sttable", "sstable", "default".
4408 A new FarWriter instance.
4411 FstIOError: Read failed.
4413 cdef fst.FarType ft = fst.GetFarType(tostring(far_type))
4414 cdef fst.FarWriterClass *tfar = fst.FarWriterClass.Create(
4415 tostring(filename), tostring(arc_type), ft)
4417 raise FstIOError("Open failed: {!r}".format(filename))
4418 cdef FarWriter result = FarWriter.__new__(FarWriter)
4419 result._writer.reset(tfar)
4422 # NB: Invoking this method is DANGEROUS: calling any other method on the
4423 # instance after this is invoked may result in a null dereference.
4424 cdef void _close(self):
4425 self._writer.reset()
4427 cpdef void add(self, key, _Fst ifst) except *:
4429 add(self, key, ifst)
4431 Adds an FST to the FAR.
4433 This method adds an FST to the FAR which can be retrieved with the
4434 specified string key.
4437 key: The string used to key the input FST.
4438 ifst: The FST to write to the FAR.
4441 FstArgError: Key out of order.
4442 FstOpError: Incompatible or invalid arc type.
4444 # Failure here results from passing an FST with a different arc type than
4445 # used by the FAR was initialized to use.
4446 if not self._writer.get().Add(tostring(key), deref(ifst._fst)):
4447 raise FstOpError("Incompatible or invalid arc type")
4448 # An error here usually indicates a key out of order.
4449 if self._writer.get().Error():
4450 raise FstArgError("Key out of order")
4452 cpdef string arc_type(self):
4456 Returns a string indicating the arc type.
4458 return self._writer.get().ArcType()
4460 cpdef bool error(self):
4464 Indicates whether the FarWriter has encountered an error.
4467 True if the FarWriter is in an errorful state, False otherwise.
4469 return self._writer.get().Error()
4471 cpdef string far_type(self):
4475 Returns a string indicating the FAR type.
4477 return fst.GetFarTypeString(self._writer.get().Type())
4479 # Dictionary-like assignment.
4480 def __setitem__(self, key, _Fst fst):
4484 ## Cleanup operations for module entrance and exit.
4487 # Masks fst_error_fatal flags while this module is running, returning to the
4488 # previous state upon module exit.
4491 _fst_error_fatal_old = fst.FLAGS_fst_error_fatal
4492 fst.FLAGS_fst_error_fatal = False
4496 def _reset_fst_error_fatal():
4497 fst.FLAGS_fst_error_fatal = _fst_error_fatal_old