1 #cython: nonecheck=True
2 # See www.openfst.org for extensive documentation on this weighted
3 # finite-state transducer library.
6 """Python interface to the FST scripting API.
8 Operations which construct new FSTs are implemented as traditional functions, as
9 are two-argument boolean functions like `equal` and `equivalent`. Destructive
10 operations---those that mutate an FST, in place---are instance methods, as is
11 `write`. Operator overloading is not used. The following example, based on
12 Mohri et al. 2002, shows the construction of an ASR system given a pronunciation
13 lexicon L, grammar G, a transducer from context-dependent phones to
14 context-independent phones C, and an HMM set H:
16 L = fst.Fst.read("L.fst")
17 G = fst.Fst.read("G.fst")
18 C = fst.Fst.read("C.fst")
19 H = fst.Fst.read("H.fst")
20 LG = fst.determinize(fst.compose(L, G))
21 CLG = fst.determinize(fst.compose(C, LG))
22 HCLG = fst.determinize(fst.compose(H, CLG))
23 HCLG.minimize() # NB: works in-place.
25 Python variables here use snake_case and constants are in all caps, minus the
29 # Overview of the file:
34 # * Weight and helpers
35 # * _SymbolTable, _EncodeMapperSymbolTable, _FstSymbolTable,
36 # _MutableFstSymbolTable, SymbolTable, and helpers
37 # * SymbolTableIterator
39 # * _Fst, _MutableFst, Fst, and helpers
41 # * Arc, ArcIterator, and MutableArcIterator
45 # * FarReader and FarWriter
46 # * Cleanup operations for module entrance and exit.
48 # TODO(kbg): Try breaking this apart into smaller pieces.
50 # A few of the more idiosyncratic choices made here are due to "impedance
51 # mismatches" between C++ and Python, as follows.
53 # Another issue is that due to differences in C++ and Python scope rules, most
54 # C++ class instances have to be heap-allocated. Since all are packed into
55 # Python class instances, Python destructors are used to semi-automatically
56 # free C++ instances. The one exception are the various `...Options` structs.
57 # All that is included here are the constructors; there is no need to include
58 # the names of the struct members. Cython does not draw any meaningful
59 # distinction between structs and C++ classes, so these look just like class
62 # Cython's type annotations (e.g., `string`) are used when the variables will
63 # be sent as arguments to C++ functions, but are not used for variables used
66 # Internal functions which may raise a Python error do not have a C++ return
67 # type simply because this leads the C++ compiler to think that the resulting
68 # value could be used before it is populated.
74 from libc.stdint cimport INT32_MAX
75 from libc.stdint cimport SIZE_MAX
76 from posix.unistd cimport getpid
79 from libcpp cimport bool
80 from libcpp.cast cimport const_cast
81 from libcpp.cast cimport static_cast
84 from ios cimport ofstream
85 from memory cimport static_pointer_cast
87 # Cython operator workarounds.
88 from cython.operator cimport address as addr # &foo
89 from cython.operator cimport dereference as deref # *foo
90 from cython.operator cimport preincrement as inc # ++foo
99 # TODO(kbg): Figure out how to access static class variables so I don't have
104 ## Custom exceptions.
107 class FstError(Exception):
112 class FstArgError(FstError, ValueError):
117 class FstBadWeightError(FstError, ValueError):
122 class FstDeletedConstructorError(FstError, RuntimeError):
127 class FstIndexError(FstError, IndexError):
132 class FstIOError(FstError, IOError):
137 class FstOpError(FstError, RuntimeError):
145 cdef string tostring(data, encoding="utf8") except *:
146 """Converts strings to bytestrings.
148 This function converts Python bytestrings and Unicode strings to bytestrings
149 encoded in UTF-8. It is used to process most Python string arguments before
150 passing them to the lower-level library.
153 data: A Unicode string or bytestring.
154 encoding: The desired encoding, defaulting to UTF-8.
160 FstArgError: Cannot encode string.
163 This function is not visible to Python users.
165 # A Python bytestring can be implicitly cast to a C++ string.
166 if isinstance(data, bytes):
168 elif isinstance(data, unicode):
169 return data.encode(encoding)
170 raise FstArgError("Cannot encode as string: {!r}".format(data))
173 cdef string weight_tostring(data, encoding="utf8") except *:
174 """Converts strings or numerics to bytestrings.
176 This function converts Python bytestrings, Unicode strings, and numerics
177 which can be cast to floats to bytestrings encoded in UTF-8. It is used to
178 process Python string arguments so they can be used to construct Weight
179 objects. In most cases, weights are underlyingly floating-point, but since
180 not all weights are, they can only be constructed using a string.
183 data: A Unicode string, bytestring, or type which can be converted to a
190 FstArgError: Cannot encode string.
191 ValueError: Invalid literal for float.
194 This function is not visible to Python users.
196 # A Python bytestring can be implicitly cast to a C++ string.
197 if isinstance(data, bytes):
199 elif isinstance(data, unicode):
200 return data.encode(encoding)
201 elif isinstance(data, numbers.Number):
202 return str(data).encode(encoding)
203 raise FstArgError("Cannot encode as string: {!r}".format(data))
206 cdef fst.ComposeFilter _get_compose_filter(
207 const string &compose_filter) except *:
208 """Matches string with the appropriate ComposeFilter enum value.
210 This function takes a string argument and returns the matching ComposeFilter
211 enum value used to initialize ComposeOptions instances. ComposeOptions is used
212 by difference and intersection in addition to composition.
215 compose_filter: A string matching a known composition filter; one of:
216 "alt_sequence", "auto", "match", "null", "sequence", "trivial".
219 A ComposeFilter enum value.
222 FstArgError: Unknown compose filter type.
224 This function is not visible to Python users.
226 cdef fst.ComposeFilter compose_filter_enum
227 if not fst.GetComposeFilter(compose_filter, addr(compose_filter_enum)):
228 raise FstArgError("Unknown compose filter type: {!r}".format(
230 return compose_filter_enum
233 cdef fst.DeterminizeType _get_determinize_type(const string &det_type) except *:
234 """Matches string with the appropriate DeterminizeType enum value.
237 det_type: A string matching a known determinization type; one of:
238 "functional", "nonfunctional", "disambiguate".
241 A DeterminizeType enum value.
244 FstArgError: Unknown determinization type.
246 This function is not visible to Python users.
248 cdef fst.DeterminizeType det_type_enum
249 if not fst.GetDeterminizeType(det_type, addr(det_type_enum)):
250 raise FstArgError("Unknown determinization type: {!r}".format(det_type))
254 cdef fst.QueueType _get_queue_type(const string &queue_type) except *:
255 """Matches string with the appropriate QueueType enum value.
257 This function takes a string argument and returns the matching QueueType enum
258 value passed to the RmEpsilonOptions constructor.
261 queue_type: A string matching a known queue type; one of: "auto", "fifo",
262 "lifo", "shortest", "state", "top".
265 A QueueType enum value.
268 FstArgError: Unknown queue type.
270 This function is not visible to Python users.
272 cdef fst.QueueType queue_type_enum
273 if not fst.GetQueueType(queue_type, addr(queue_type_enum)):
274 raise FstArgError("Unknown queue type: {!r}".format(queue_type))
275 return queue_type_enum
278 cdef fst.RandArcSelection _get_rand_arc_selection(
279 const string &select) except *:
280 """Matches string with the appropriate RandArcSelection enum value.
282 This function takes a string argument and returns the matching
283 RandArcSelection enum value passed to the RandGenOptions constructor.
286 select: A string matching a known random arc selection type; one of:
287 "uniform", "log_prob", "fast_log_prob".
290 A RandArcSelection enum value.
293 FstArgError: Unknown random arc selection type.
295 This function is not visible to Python users.
297 cdef fst.RandArcSelection select_enum
298 if not fst.GetRandArcSelection(select, addr(select_enum)):
299 raise FstArgError("Unknown random arc selection type: {!r}".format(select))
303 cdef fst.ReplaceLabelType _get_replace_label_type(
304 const string &replace_label_type, bool epsilon_on_replace) except *:
305 """Matches string with the appropriate ReplaceLabelType enum value.
307 This function takes a string argument and returns the matching
308 ReplaceLabelType enum value passed to the ReplaceOptions constructor.
311 replace_label_type: A string matching a known replace label type; one of:
312 "neither", "input", "output", "both".
313 epsilon_on_replace: Should call/return arcs be epsilon arcs?
316 A ReplaceLabelType enum value.
319 FstArgError: Unknown replace label type.
321 This function is not visible to Python users.
323 cdef fst.ReplaceLabelType replace_label_type_enum
324 if not fst.GetReplaceLabelType(replace_label_type, epsilon_on_replace,
325 addr(replace_label_type_enum)):
326 raise FstArgError("Unknown replace label type: {!r}".format(
328 return replace_label_type_enum
331 ## Weight and helpers.
334 cdef class Weight(object):
337 Weight(weight_type, weight_string)
341 This class represents an FST weight. When passed as an argument to an FST
342 operation, it should have the weight type of the input FST(s) to said
346 weight_type: A string indicating the weight type.
347 weight_string: A string indicating the underlying weight.
350 FstArgError: Weight type not found.
351 FstBadWeightError: Invalid weight.
355 return "<{} Weight {} at 0x{:x}>".format(self.type(), self.to_string(),
359 return self.to_string()
361 # This attempts to convert the string form into a float, raising
362 # ValueError when that is not appropriate.
365 return float(self.to_string())
367 def __init__(self, weight_type, weight):
368 self._weight.reset(new fst.WeightClass(tostring(weight_type),
369 weight_tostring(weight)))
372 cdef void _check_weight(self) except *:
373 if self.type() == b"none":
374 raise FstArgError("Weight type not found")
375 if self.to_string() == b"BadNumber":
376 raise FstBadWeightError("Invalid weight")
378 cpdef Weight copy(self):
382 Returns a copy of the Weight.
384 cdef Weight result = Weight.__new__(Weight)
385 result._weight.reset(new fst.WeightClass(deref(self._weight)))
388 # To get around the inability to declare cdef class methods, we define the
389 # C++ part out-of-class and then call it from within.
392 def Zero(cls, weight_type):
394 Weight.Zero(weight_type)
396 Constructs semiring zero.
398 return _Zero(weight_type)
401 def One(cls, weight_type):
403 Weight.One(weight_type)
405 Constructs semiring One.
407 return _One(weight_type)
410 def NoWeight(cls, weight_type):
412 Weight.NoWeight(weight_type)
414 Constructs a non-member weight in the semiring.
416 return _NoWeight(weight_type)
418 def __eq__(Weight w1, Weight w2):
419 return fst.Eq(deref(w1._weight), deref(w2._weight))
421 def __ne__(Weight w1, Weight w2):
424 cpdef string to_string(self):
425 return self._weight.get().ToString()
427 cpdef string type(self):
430 Returns a string indicating the weight type.
432 return self._weight.get().Type()
435 cdef Weight _plus(Weight lhs, Weight rhs):
436 cdef Weight result = Weight.__new__(Weight)
437 result._weight.reset(new fst.WeightClass(fst.Plus(deref(lhs._weight),
438 deref(rhs._weight))))
442 def plus(Weight lhs, Weight rhs):
446 Computes the sum of two Weights in the same semiring.
448 This function computes lhs \oplus rhs, raising an exception if lhs and rhs
449 are not in the same semiring.
452 lhs: Left-hand side Weight.
453 rhs: Right-hand side Weight.
459 FstArgError: Weight type not found (or not in same semiring).
460 FstBadWeightError: invalid weight.
462 cdef Weight result = _plus(lhs, rhs)
463 result._check_weight()
467 cdef Weight _times(Weight lhs, Weight rhs):
468 cdef Weight result = Weight.__new__(Weight)
469 result._weight.reset(new fst.WeightClass(fst.Times(deref(lhs._weight),
470 deref(rhs._weight))))
474 def times(Weight lhs, Weight rhs):
478 Computes the product of two Weights in the same semiring.
480 This function computes lhs \otimes rhs, raising an exception if lhs and rhs
481 are not in the same semiring.
484 lhs: Left-hand side Weight.
485 rhs: Right-hand side Weight.
491 FstArgError: Weight type not found (or not in same semiring).
492 FstBadWeightError: Invalid weight.
494 cdef Weight result = _times(lhs, rhs)
495 result._check_weight()
499 cdef Weight _divide(Weight lhs, Weight rhs):
500 cdef Weight result = Weight.__new__(Weight)
501 result._weight.reset(new fst.WeightClass(fst.Divide(deref(lhs._weight),
502 deref(rhs._weight))))
506 def divide(Weight lhs, Weight rhs):
510 Computes the quotient of two Weights in the same semiring.
512 This function computes lhs \oslash rhs, raising an exception if lhs and rhs
513 are not in the same semiring. As there is no way to specify whether to use
514 left vs. right division, this assumes a commutative semiring in which these
515 are equivalent operations.
518 lhs: Left-hand side Weight.
519 rhs: Right-hand side Weight.
525 FstArgError: Weight type not found (or not in same semiring).
526 FstBadWeightError: Invalid weight.
528 cdef Weight result = _divide(lhs, rhs)
529 result._check_weight()
533 cdef Weight _power(Weight w, size_t n):
534 cdef Weight result = Weight.__new__(Weight)
535 result._weight.reset(new fst.WeightClass(fst.Power(deref(w._weight), n)))
539 def power(Weight w, size_t n):
543 Computes the iterated product of a weight.
553 FstArgError: Weight type not found (or not in same semiring).
554 FstBadWeightError: Invalid weight.
556 cdef Weight result = _power(w, n)
557 result._check_weight()
561 cdef fst.WeightClass _get_WeightClass_or_Zero(const string &weight_type,
563 """Converts weight string to a WeightClass.
565 This function constructs a WeightClass instance of the desired weight type.
566 If the first argument is null, the weight is set to semiring Zero.
569 weight_type: A string denoting the desired weight type.
570 weight: A object indicating the desired weight; if omitted, the weight is
571 set to semiring Zero.
574 A WeightClass object.
576 This function is not visible to Python users.
578 cdef fst.WeightClass result
580 result = fst.WeightClass.Zero(weight_type)
581 elif isinstance(weight, Weight):
582 result = deref(<fst.WeightClass *> (<Weight> weight)._weight.get())
584 result = fst.WeightClass(weight_type, weight_tostring(weight))
585 if result.ToString() == b"BadNumber":
586 raise FstBadWeightError(weight_tostring(weight))
590 cdef fst.WeightClass _get_WeightClass_or_One(const string &weight_type,
592 """Converts weight string to a WeightClass.
594 This function constructs a WeightClass instance of the desired weight type.
595 If the first argument is null, the weight is set to semiring One.
598 weight_type: A string denoting the desired weight type.
599 weight: A object indicating the desired weight; if omitted, the weight is
603 A WeightClass object.
605 This function is not visible to Python users.
607 cdef fst.WeightClass result
609 result = fst.WeightClass.One(weight_type)
610 elif isinstance(weight, Weight):
611 result = deref(<fst.WeightClass *> (<Weight> weight)._weight.get())
613 result = fst.WeightClass(weight_type, weight_tostring(weight))
614 if result.ToString() == b"BadNumber":
615 raise FstBadWeightError(weight_tostring(weight))
619 cdef Weight _Zero(weight_type):
620 cdef Weight result = Weight.__new__(Weight)
621 result._weight.reset(new fst.WeightClass(fst.WeightClass.Zero(
622 tostring(weight_type))))
623 if result._weight.get().Type() == b"none":
624 raise FstArgError("Weight type not found")
628 cdef Weight _One(weight_type):
629 cdef Weight result = Weight.__new__(Weight)
630 result._weight.reset(new fst.WeightClass(
631 fst.WeightClass.One(tostring(weight_type))))
632 if result._weight.get().Type() == b"none":
633 raise FstArgError("Weight type not found")
637 cdef Weight _NoWeight(weight_type):
638 cdef Weight result = Weight.__new__(Weight)
639 result._weight.reset(new fst.WeightClass(
640 fst.WeightClass.NoWeight(tostring(weight_type))))
644 ## _SymbolTable, _MutableSymbolTable, _EncodeMapperSymbolTable, _FstSymbolTable,
645 ## _MutableFstSymbolTable, SymbolTable, and helpers.
647 # SymbolTable hierarchy:
649 # _SymbolTable: abstract base class; has-a SymbolTable*
650 # _EncodeMapperSymbolTable(_SymbolTable): constant symbol table returned by
651 # EncodeMapper.input_symbols/output_symbols
652 # _FstSymbolTable(_SymbolTable): constant symbol table returned by
653 # _Fst.input_symbols/output_symbols
655 # _MutableSymbolTable(_SymbolTable): abstract base class adding mutation methods
656 # _MutableFstSymbolTable(_MutableSymbolTable): mutable symbol table returned by
657 # _MutableFst.mutable_input_symbols/mutable_output_symbols
658 # SymbolTable(_MutableSymbolTable): adds constructor
661 cdef class _SymbolTable(object):
666 Base class for the symbol table hierarchy.
668 This class is the base class for SymbolTable. It has a "deleted" constructor
669 and implementations for the const methods of the wrapped SymbolTable.
672 # NB: Do not expose any non-const methods of the wrapped SymbolTable here.
673 # Doing so will allow undefined behavior.
676 raise FstDeletedConstructorError(
677 "Cannot construct {}".format(self.__class__.__name__))
680 return SymbolTableIterator(self)
682 cpdef int64 available_key(self):
686 Returns an integer indicating the next available key index in the table.
688 return self._table.AvailableKey()
690 cpdef string checksum(self):
694 Returns a string indicating the label-agnostic MD5 checksum for the table.
696 return self._table.CheckSum()
698 cpdef SymbolTable copy(self):
702 Returns a mutable copy of the SymbolTable.
704 return _init_SymbolTable(self._table.Copy())
710 Given a symbol or index, finds the other one.
712 This method returns the index associated with a symbol key, or the symbol
713 associated with a index key.
716 key: Either a string or an index.
719 If the key is a string, the associated index or NO_LABEL if not found; if
720 the key is an integer, the associated symbol or an empty string if
724 return self._table.FindIndex(tostring(key))
726 return self._table.FindSymbol(key)
728 cpdef int64 get_nth_key(self, ssize_t pos) except *:
730 get_nth_key(self, pos)
732 Retrieves the integer index of the n-th key in the table.
735 pos: The n-th key to retrieve.
738 The integer index of the n-th key, or NO_LABEL if not found.
740 return self._table.GetNthKey(pos)
742 cpdef string labeled_checksum(self):
744 labeled_checksum(self)
746 Returns a string indicating the label-dependent MD5 checksum for the table.
748 return self._table.LabeledCheckSum()
750 cpdef bool member(self, key):
754 Given a symbol or index, returns whether it is found in the table.
756 This method returns a boolean indicating whether the given symbol or index
757 is present in the table. If one intends to perform subsequent lookup, it is
758 better to simply call the find method, catching the KeyError.
761 key: Either a string or an index.
764 Whether or not the key is present (as a string or a index) in the table.
767 return self._table.MemberSymbol(tostring(key))
769 return self._table.MemberIndex(key)
771 def __contains__(self, key):
772 return self.member(key)
774 cpdef string name(self):
778 Returns the symbol table's name.
780 return self._table.Name()
782 cpdef size_t num_symbols(self):
786 Returns the number of symbols in the symbol table.
788 return self._table.NumSymbols()
790 cpdef void write(self, filename) except *:
792 write(self, filename)
794 Serializes symbol table to a file.
796 This methods writes the SymbolTable to a file in binary format.
799 filename: The string location of the output file.
802 FstIOError: Write failed.
804 if not self._table.Write(tostring(filename)):
805 raise FstIOError("Write failed: {!r}".format(filename))
807 cpdef void write_text(self, filename) except *:
809 write_text(self, filename)
811 Writes symbol table to text file.
813 This method writes the SymbolTable to a file in human-readable format.
816 filename: The string location of the output file.
819 FstIOError: Write failed.
821 if not self._table.WriteText(tostring(filename)):
822 raise FstIOError("Write failed: {!r}".format(filename))
825 cdef class _EncodeMapperSymbolTable(_SymbolTable):
830 Immutable SymbolTable class for tables stored in an EncodeMapper.
832 This class wraps a library const SymbolTable and exposes const methods of the
833 wrapped object. It is only to be returned by method, never constructed
837 # NB: Do not expose any non-const methods of the wrapped SymbolTable here.
838 # Doing so will allow undefined behavior.
841 return "<const EncodeMapper SymbolTable {!r} at 0x{:x}>".format(self.name(),
845 cdef class _FstSymbolTable(_SymbolTable):
850 Mutable SymbolTable class for tables stored in a mutable FST.
852 This class wraps a library SymbolTable and exposes methods of the wrapped
853 object. It is only to be returned by method, never constructed directly.
856 # NB: Do not expose any non-const methods of the wrapped SymbolTable here.
857 # Doing so will allow undefined behavior.
860 return "<const Fst SymbolTable {!r} at 0x{:x}>".format(self.name(),
864 cdef class _MutableSymbolTable(_SymbolTable):
869 Base class for mutable symbol tables.
871 This class is the base class for a mutable SymbolTable. It has a "deleted"
872 constructor and implementations of all methods of the wrapped SymbolTable.
875 cpdef int64 add_symbol(self, symbol, int64 key=kNoSymbol):
877 add_symbol(self, symbol, key=NO_SYMBOL)
879 Adds a symbol to the table and returns the index.
881 This method adds a symbol to the table. The caller can optionally
882 specify a non-negative integer index for the key.
885 symbol: A symbol string.
886 key: An index for the symbol; if not specified, the next index will be
890 The integer key of the new symbol.
892 cdef string symbol_string = tostring(symbol)
894 return self._table.AddSymbol(symbol_string, key)
896 return self._table.AddSymbol(symbol_string)
898 cpdef void add_table(self, _SymbolTable syms):
900 add_table(self, syms)
902 Adds another SymbolTable to this table.
904 This method merges another symbol table into the current table. All key
905 values will be offset by the current available key.
908 syms: A SymbolTable to be merged with the current table.
910 self._table.AddTable(deref(syms._table))
912 cpdef void set_name(self, new_name) except *:
913 self._table.SetName(tostring(new_name))
916 cdef class _MutableFstSymbolTable(_MutableSymbolTable):
920 Mutable SymbolTable assigned to an FST.
924 return "<Fst SymbolTable {!r} at 0x{:x}>".format(self.name(), id(self))
927 cdef class SymbolTable(_MutableSymbolTable):
930 SymbolTable(name="<unspecified>")
932 Mutable SymbolTable class.
934 This class wraps the library SymbolTable and exposes both const (i.e.,
935 access) and non-const (i.e., mutation) methods of wrapped object.
937 Unlike other classes in the hierarchy, it has a working constructor and can be
938 used to programmatically construct a SymbolTable in memory.
941 name: An optional string indicating the table's name.
945 return "<SymbolTable {!r} at 0x{:x}>".format(self.name(), id(self))
947 def __init__(self, name=b"<unspecified>"):
948 self._table = new fst.SymbolTable(tostring(name))
949 self._smart_table.reset(self._table)
952 def read(cls, filename):
954 SymbolTable.read(filename)
956 Reads symbol table from binary file.
958 This class method creates a new SymbolTable from a symbol table binary file.
961 filename: The string location of the input binary file.
964 A new SymbolTable instance.
966 See also: `SymbolTable.read_fst`, `SymbolTable.read_text`.
968 cdef fst.SymbolTable *tsyms = fst.SymbolTable.Read(tostring(filename))
970 raise FstIOError("Read failed: {!r}".format(filename))
971 return _init_SymbolTable(tsyms)
974 def read_text(cls, filename, bool allow_negative_labels=False):
976 SymbolTable.read_text(filename)
978 Reads symbol table from text file.
980 This class method creates a new SymbolTable from a symbol table text file.
983 filename: The string location of the input text file.
984 allow_negative_labels: Should negative labels be allowed? (Not
985 recommended; may cause conflicts).
988 A new SymbolTable instance.
990 See also: `SymbolTable.read`, `SymbolTable.read_fst`.
992 cdef unique_ptr[fst.SymbolTableTextOptions] opts
993 opts.reset(new fst.SymbolTableTextOptions(allow_negative_labels))
994 cdef fst.SymbolTable *tsyms = fst.SymbolTable.ReadText(tostring(filename),
997 raise FstIOError("Read failed: {!r}".format(filename))
998 return _init_SymbolTable(tsyms)
1001 def read_fst(cls, filename, bool input_table):
1003 SymbolTable.read_fst(filename, input_table)
1005 Reads symbol table from an FST file without loading the corresponding FST.
1007 This class method creates a new SymbolTable by reading either the input or
1008 output symbol table from an FST file, without loading the corresponding FST.
1011 filename: The string location of the input FST file.
1012 input_table: Should the input table be read (True) or the output table
1016 A new SymbolTable instance, or None if none can be read.
1019 FstIOError: Read failed.
1021 See also: `SymbolTable.read`, `SymbolTable.read_text`.
1023 cdef fst.SymbolTable *tsyms = fst.FstReadSymbols(filename, input_table)
1025 raise FstIOError("Read failed: {!r}".format(filename))
1026 return _init_SymbolTable(tsyms)
1029 cdef _EncodeMapperSymbolTable _init_EncodeMapperSymbolTable(
1030 fst.SymbolTable *table, shared_ptr[fst.EncodeMapperClass] encoder):
1031 cdef _EncodeMapperSymbolTable result = (
1032 _EncodeMapperSymbolTable.__new__(_EncodeMapperSymbolTable))
1033 result._table = table
1034 result._encoder = encoder
1038 cdef _FstSymbolTable _init_FstSymbolTable(fst.SymbolTable *table,
1039 shared_ptr[fst.FstClass] ifst):
1040 cdef _FstSymbolTable result = _FstSymbolTable.__new__(_FstSymbolTable)
1041 result._table = table
1046 cdef _MutableFstSymbolTable _init_MutableFstSymbolTable(fst.SymbolTable *table,
1047 shared_ptr[fst.MutableFstClass] ifst):
1048 cdef _MutableFstSymbolTable result = (
1049 _MutableFstSymbolTable.__new__(_MutableFstSymbolTable))
1050 result._table = table
1055 cdef SymbolTable _init_SymbolTable(fst.SymbolTable *table):
1056 cdef SymbolTable result = SymbolTable.__new__(SymbolTable)
1057 result._table = table
1061 # Constructive SymbolTable operations.
1064 cpdef SymbolTable compact_symbol_table(_SymbolTable syms):
1066 compact_symbol_table(syms)
1068 Constructively relabels a SymbolTable to make it a contiguous mapping.
1071 syms: Input SymbolTable.
1074 A new compacted SymbolTable.
1076 return _init_SymbolTable(fst.CompactSymbolTable(deref(syms._table)))
1079 cpdef SymbolTable merge_symbol_table(_SymbolTable lhs, _SymbolTable rhs):
1081 merge_symbol_table(lhs, rhs)
1083 Merges all symbols from the left table into the right.
1085 This function creates a new SymbolTable which is the merger of the two input
1086 symbol Tables. Symbols in the right-hand table that conflict with those in the
1087 left-hand table will be assigned values from the left-hand table. Thus the
1088 returned table will never modify symbol assignments from the left-hand side,
1089 but may do so on the right.
1091 If the left-hand table is associated with an FST, it may be necessary to
1092 relabel it using the output table.
1095 lhs: Left-hand side SymbolTable.
1096 rhs: Left-hand side SymbolTable.
1099 A new merged SymbolTable.
1101 See also: `relabel_symbols`.
1103 return _init_SymbolTable(fst.MergeSymbolTable(deref(lhs._table),
1104 deref(rhs._table), NULL))
1107 ## SymbolTableIterator.
1110 cdef class SymbolTableIterator(object):
1113 SymbolTableIterator(syms)
1115 This class is used for iterating over a symbol table.
1119 return "<SymbolTableIterator at 0x{:x}>".format(id(self))
1121 def __init__(self, _SymbolTable syms):
1122 self._siter.reset(new fst.SymbolTableIterator(deref(syms._table)))
1124 # This just registers this class as a possible iterator.
1128 # Magic method used to get a Pythonic API out of the C++ API.
1132 cdef int64 value = self.value()
1133 cdef string symbol = self.symbol()
1135 return (value, symbol)
1137 cpdef bool done(self):
1141 Indicates whether the iterator is exhausted or not.
1144 True if the iterator is exhausted, False otherwise.
1146 return self._siter.get().Done()
1148 cpdef void next(self):
1152 Advances the iterator.
1154 self._siter.get().Next()
1156 cpdef void reset(self):
1160 Resets the iterator to the initial position.
1162 self._siter.get().Reset()
1164 cpdef string symbol(self):
1168 Returns the current symbol string.
1170 This method returns the current symbol string at this point in the table.
1175 return self._siter.get().Symbol()
1177 cpdef int64 value(self):
1181 Returns the current integer index of the symbol.
1186 return self._siter.get().Value()
1192 cdef class EncodeMapper(object):
1195 EncodeMapper(arc_type="standard", encode_labels=False, encode_weights=False)
1197 Arc encoder class, wrapping EncodeMapperClass.
1199 This class provides an object which can be used to encode or decode FST arcs.
1200 This is most useful to convert an FST to an unweighted acceptor, on which
1201 some FST operations are more efficient, and then decoding the FST afterwards.
1203 To use an instance of this class to encode or decode a mutable FST, pass it
1204 as the first argument to the FST instance methods `encode` and `decode`.
1206 For implementational reasons, it is not currently possible to use an encoder
1207 on disk to construct this class.
1210 arc_type: A string indicating the arc type.
1211 encode_labels: Should labels be encoded?
1212 encode_weights: Should weights be encoded?
1216 return "<EncodeMapper at 0x{:x}>".format(id(self))
1219 arc_type=b"standard",
1220 bool encode_labels=False,
1221 bool encode_weights=False):
1222 cdef uint32 flags = fst.GetEncodeFlags(encode_labels, encode_weights)
1223 self._encoder.reset(new fst.EncodeMapperClass(tostring(arc_type), flags,
1225 if not self._encoder:
1226 raise FstOpError("Unknown arc type: {!r}".format(arc_type))
1228 cpdef string arc_type(self):
1232 Returns a string indicating the arc type.
1234 return self._encoder.get().ArcType()
1236 # Python's equivalent to operator().
1238 def __call__(self, Arc arc):
1240 self(state, ilabel, olabel, weight, nextstate)
1242 Uses the encoder to encode an arc.
1245 ilabel: The integer index of the input label.
1246 olabel: The integer index of the output label.
1247 weight: A Weight or weight string indicating the desired final weight; if
1248 null, it is set to semiring One.
1249 nextstate: The integer index of the destination state.
1252 FstOpError: Incompatible or invalid weight.
1254 return _init_Arc(self._encoder.get().__call__(deref(arc._arc)))
1256 cpdef uint32 flags(self):
1260 Returns the encoder's flags.
1262 return self._encoder.get().Flags()
1264 cpdef _EncodeMapperSymbolTable input_symbols(self):
1268 Returns the encoder's input symbol table, or None if none is present.
1270 cdef fst.SymbolTable *syms = const_cast[SymbolTable_ptr](
1271 self._encoder.get().InputSymbols())
1274 return _init_EncodeMapperSymbolTable(syms, self._encoder)
1276 cpdef _EncodeMapperSymbolTable output_symbols(self):
1278 output_symbols(self)
1280 Returns the encoder's output symbol table, or None if none is present.
1282 cdef fst.SymbolTable *syms = const_cast[SymbolTable_ptr](
1283 self._encoder.get().OutputSymbols())
1286 return _init_EncodeMapperSymbolTable(syms, self._encoder)
1288 cpdef uint64 properties(self, uint64 mask):
1290 properties(self, mask)
1292 Provides property bits.
1294 This method provides user access to the properties of the encoder.
1297 mask: The property mask to be compared to the encoder's properties.
1300 A 64-bit bitmask representing the requested properties.
1302 return self._encoder.get().Properties(mask)
1304 cpdef void set_input_symbols(self, _SymbolTable syms) except *:
1306 set_input_symbols(self, syms)
1308 Sets the encoder's input symbol table.
1311 syms: A SymbolTable.
1313 See also: `set_output_symbols`.
1315 self._encoder.get().SetInputSymbols(syms._table)
1317 cpdef void set_output_symbols(self, _SymbolTable syms) except *:
1319 set_output_symbols(self, syms)
1321 Sets the encoder's output symbol table.
1324 syms: A SymbolTable.
1326 See also: `set_input_symbols`.
1328 self._encoder.get().SetOutputSymbols(syms._table)
1330 cpdef string weight_type(self):
1334 Returns a string indicating the weight type.
1336 return self._encoder.get().WeightType()
1339 ## _Fst, _MutableFst, Fst, and helpers.
1343 # _Fst: base class; has-a FstClass*.
1344 # _MutableFst(_Fst): adds mutable methods.
1345 # Fst(filename): pseudo-constructor.
1348 cdef class _Fst(object):
1353 Immutable FST class, wrapping FstClass.
1355 This class is the basic user-facing FST object. It does not itself support any
1356 mutation operations.
1359 # IPython notebook magic to produce an SVG of the FST.
1360 def _repr_svg_(self):
1361 """IPython notebook magic to produce an SVG of the FST using GraphViz.
1363 This method produces an SVG of the internal graph. Users wishing to create
1364 publication-quality graphs should instead use the method `draw`, which
1365 exposes additional parameters.
1368 OSError: Cannot locate the `dot` executable.
1369 subprocess.CalledProcessError: `dot` returned non-zero exit code.
1371 See also: `draw`, `text`.
1373 # Throws OSError if the dot executable is not found.
1374 proc = subprocess.Popen(["dot", "-Tsvg"], stdin=subprocess.PIPE,
1375 stdout=subprocess.PIPE, stderr=subprocess.PIPE)
1376 cdef stringstream sstrm
1377 fst.DrawFst(deref(self._fst), self._fst.get().InputSymbols(),
1378 self._fst.get().OutputSymbols(), NULL,
1379 self._fst.get().Properties(fst.kAcceptor, True) ==
1381 b"", 8.5, 11, True, False, 0.4, 0.25, 14, 5, b"g", False,
1382 addr(sstrm), b"_repr_svg")
1383 (sout, serr) = proc.communicate(sstrm.str())
1384 if proc.returncode != 0: # Just to be explicit.
1385 raise subprocess.CalledProcessError(proc.returncode, self._DOT_TSVG)
1386 return sout.decode("utf8")
1389 return "<{} Fst at 0x{:x}>".format(self.fst_type(), id(self))
1392 raise FstDeletedConstructorError(
1393 "Cannot construct {}".format(self.__class__.__name__))
1398 # Registers the class for pickling; must be repeated in any subclass which
1399 # can't be derived by _init_XFst.
1401 def __reduce__(self):
1402 return (_read_from_string, (self.write_to_string(),))
1404 cpdef string arc_type(self):
1408 Returns a string indicating the arc type.
1410 return self._fst.get().ArcType()
1412 cpdef ArcIterator arcs(self, int64 state):
1416 Returns an iterator over arcs leaving the specified state.
1419 state: The source state ID.
1424 See also: `mutable_arcs`, `states`.
1426 return ArcIterator(self, state)
1428 cpdef _Fst copy(self):
1432 Makes a copy of the FST.
1434 return _init_XFst(new fst.FstClass(deref(self._fst)))
1436 cpdef void draw(self,
1438 _SymbolTable isymbols=None,
1439 _SymbolTable osymbols=None,
1440 SymbolTable ssymbols=None,
1441 bool acceptor=False,
1445 bool portrait=False,
1446 bool vertical=False,
1448 double nodesep=0.25,
1452 bool show_weight_one=False):
1454 draw(self, filename, isymbols=None, osymbols=None, ssymbols=None,
1455 acceptor=False, title="", width=8.5, height=11, portrait=False,
1456 vertical=False, ranksep=0.4, nodesep=0.25, fontsize=14,
1457 precision=5, float_format="g", show_weight_one=False):
1459 Writes out the FST in Graphviz text format.
1461 This method writes out the FST in the dot graph description language. The
1462 graph can be rendered using the `dot` executable provided by Graphviz.
1465 filename: The string location of the output dot/Graphviz file.
1466 isymbols: An optional symbol table used to label input symbols.
1467 osymbols: An optional symbol table used to label output symbols.
1468 ssymbols: An optional symbol table used to label states.
1469 acceptor: Should the figure be rendered in acceptor format if possible?
1470 title: An optional string indicating the figure title.
1471 width: The figure width, in inches.
1472 height: The figure height, in inches.
1473 portrait: Should the figure be rendered in portrait rather than
1475 vertical: Should the figure be rendered bottom-to-top rather than
1477 ranksep: The minimum separation separation between ranks, in inches.
1478 nodesep: The minimum separation between nodes, in inches.
1479 fontsize: Font size, in points.
1480 precision: Numeric precision for floats, in number of chars.
1481 float_format: One of: 'e', 'f' or 'g'.
1482 show_weight_one: Should weights equivalent to semiring One be printed?
1486 cdef string filename_string = tostring(filename)
1487 cdef unique_ptr[ofstream] ostrm
1488 ostrm.reset(new ofstream(filename_string))
1489 cdef fst.SymbolTable *ssymbols_ptr = NULL
1490 if ssymbols is not None:
1491 ssymbols_ptr = ssymbols._table
1492 fst.DrawFst(deref(self._fst),
1493 self._fst.get().InputSymbols() if isymbols is None
1494 else isymbols._table,
1495 self._fst.get().OutputSymbols() if osymbols is None
1496 else osymbols._table,
1497 ssymbols_ptr, acceptor, tostring(title), width, height, portrait,
1498 vertical, ranksep, nodesep, fontsize, precision,
1499 tostring(float_format), show_weight_one, ostrm.get(),
1502 cpdef Weight final(self, int64 state):
1506 Returns the final weight of a state.
1509 state: The integer index of a state.
1512 The final Weight of that state.
1515 FstIndexError: State index out of range.
1517 cdef Weight weight = Weight.__new__(Weight)
1518 weight._weight.reset(new fst.WeightClass(self._fst.get().Final(state)))
1521 cpdef string fst_type(self):
1525 Returns a string indicating the FST type.
1527 return self._fst.get().FstType()
1529 cpdef _FstSymbolTable input_symbols(self):
1533 Returns the FST's input symbol table, or None if none is present.
1535 See also: `input_symbols`.
1537 cdef fst.SymbolTable *syms = const_cast[SymbolTable_ptr](
1538 self._fst.get().InputSymbols())
1541 return _init_FstSymbolTable(syms, self._fst)
1543 cpdef size_t num_arcs(self, int64 state) except *:
1545 num_arcs(self, state)
1547 Returns the number of arcs leaving a state.
1550 state: The integer index of a state.
1553 The number of arcs leaving that state.
1556 FstIndexError: State index out of range.
1558 See also: `num_states`.
1560 cdef size_t result = self._fst.get().NumArcs(state)
1561 if result == SIZE_MAX:
1562 raise FstIndexError("State index out of range")
1565 cpdef size_t num_input_epsilons(self, int64 state) except *:
1567 num_input_epsilons(self, state)
1569 Returns the number of arcs with epsilon input labels leaving a state.
1572 state: The integer index of a state.
1575 The number of epsilon-input-labeled arcs leaving that state.
1578 FstIndexError: State index out of range.
1580 See also: `num_output_epsilons`.
1582 cdef size_t result = self._fst.get().NumInputEpsilons(state)
1583 if result == SIZE_MAX:
1584 raise FstIndexError("State index out of range")
1587 cpdef size_t num_output_epsilons(self, int64 state) except *:
1589 num_output_epsilons(self, state)
1591 Returns the number of arcs with epsilon output labels leaving a state.
1594 state: The integer index of a state.
1597 The number of epsilon-output-labeled arcs leaving that state.
1600 FstIndexError: State index out of range.
1602 See also: `num_input_epsilons`.
1604 cdef size_t result = self._fst.get().NumOutputEpsilons(state)
1605 if result == SIZE_MAX:
1606 raise FstIndexError("State index out of range")
1609 cpdef _FstSymbolTable output_symbols(self):
1611 output_symbols(self)
1613 Returns the FST's output symbol table, or None if none is present.
1615 See also: `input_symbols`.
1617 cdef fst.SymbolTable *syms = const_cast[SymbolTable_ptr](
1618 self._fst.get().OutputSymbols())
1621 return _init_FstSymbolTable(syms, self._fst)
1623 cpdef uint64 properties(self, uint64 mask, bool test):
1625 properties(self, mask, test)
1627 Provides property bits.
1629 This method provides user access to the properties attributes for the FST.
1630 The resulting value is a long integer, but when it is cast to a boolean,
1631 it represents whether or not the FST has the `mask` property.
1634 mask: The property mask to be compared to the FST's properties.
1635 test: Should any unknown values be computed before comparing against
1639 A 64-bit bitmask representing the requested properties.
1641 return self._fst.get().Properties(mask, test)
1643 cpdef int64 start(self):
1647 Returns the start state.
1649 return self._fst.get().Start()
1651 cpdef StateIterator states(self):
1655 Returns an iterator over all states in the FST.
1658 A StateIterator object for the FST.
1660 See also: `arcs`, `mutable_arcs`.
1662 return StateIterator(self)
1664 cpdef string text(self, _SymbolTable isymbols=None,
1665 _SymbolTable osymbols=None, _SymbolTable ssymbols=None,
1666 bool acceptor=False, bool show_weight_one=False, missing_sym=b""):
1668 text(self, isymbols=None, osymbols=None, ssymbols=None, acceptor=False,
1669 show_weight_one=False, missing_sym="")
1671 Produces a human-readable string representation of the FST.
1673 This method generates a human-readable string representation of the FST.
1674 The caller may optionally specify SymbolTables used to label input labels,
1675 output labels, or state labels, respectively.
1678 isymbols: An optional symbol table used to label input symbols.
1679 osymbols: An optional symbol table used to label output symbols.
1680 ssymbols: An optional symbol table used to label states.
1681 acceptor: Should the FST be rendered in acceptor format if possible?
1682 show_weight_one: Should weights equivalent to semiring One be printed?
1683 missing_symbol: The string to be printed when symbol table lookup fails.
1686 A formatted string representing the machine.
1688 # Prints FST to stringstream, then returns resulting string.
1689 cdef fst.SymbolTable *ssymbols_ptr = NULL
1690 if ssymbols is not None:
1691 ssymbols_ptr = ssymbols._table
1692 cdef stringstream sstrm
1693 fst.PrintFst(deref(self._fst), sstrm, b"<pywrapfst>",
1694 self._fst.get().InputSymbols() if isymbols is None
1695 else isymbols._table,
1696 self._fst.get().OutputSymbols() if osymbols is None
1697 else osymbols._table,
1698 ssymbols_ptr, acceptor, show_weight_one, tostring(missing_sym))
1701 cpdef bool verify(self):
1705 Verifies that an FST's contents are sane.
1708 True if the contents are sane, False otherwise.
1710 return fst.Verify(deref(self._fst))
1712 cpdef string weight_type(self):
1716 Provides the FST's weight type.
1719 A string representing the weight type.
1721 return self._fst.get().WeightType()
1723 cpdef void write(self, filename) except *:
1725 write(self, filename)
1727 Serializes FST to a file.
1729 This method writes the FST to a file in a binary format.
1732 filename: The string location of the output file.
1735 FstIOError: Write failed.
1737 if not self._fst.get().Write(tostring(filename)):
1738 raise FstIOError("Write failed: {!r}".format(filename))
1740 cpdef string write_to_string(self):
1742 write_to_string(self)
1744 Serializes FST to a string.
1750 FstIOError: Write to string failed.
1752 See also: `read_from_string`.
1754 cdef stringstream sstrm
1755 if not self._fst.get().Write(sstrm, "write_to_string"):
1756 raise FstIOError("Write to string failed")
1760 cdef class _MutableFst(_Fst):
1765 Mutable FST class, wrapping MutableFstClass.
1767 This class extends _Fst by adding mutation operations.
1770 cdef void _check_mutating_imethod(self) except *:
1771 """Checks whether an operation mutating the FST has produced an error.
1773 This function is not visible to Python users.
1775 if self._fst.get().Properties(fst.kError, True) == fst.kError:
1776 raise FstOpError("Operation failed")
1778 cdef void _add_arc(self, int64 state, Arc arc) except *:
1779 if not self._fst.get().ValidStateId(state):
1780 raise FstIndexError("State index out of range")
1781 if not self._mfst.get().AddArc(state, deref(arc._arc)):
1782 raise FstOpError("Incompatible or invalid weight type")
1783 self._check_mutating_imethod()
1785 def add_arc(self, int64 state, Arc arc):
1787 add_arc(self, state, arc)
1789 Adds a new arc to the FST and return self.
1792 state: The integer index of the source state.
1793 arc: The arc to add.
1799 FstIndexError: State index out of range.
1800 FstOpdexError: Incompatible or invalid weight type.
1802 See also: `add_state`.
1804 self._add_arc(state, arc)
1807 cpdef int64 add_state(self) except *:
1811 Adds a new state to the FST and returns the state ID.
1814 The integer index of the new state.
1816 See also: `add_arc`, `set_start`, `set_final`.
1818 cdef int64 result = self._mfst.get().AddState()
1819 self._check_mutating_imethod()
1822 cdef void _arcsort(self, sort_type=b"ilabel") except *:
1823 cdef fst.ArcSortType sort_type_enum
1824 if not fst.GetArcSortType(tostring(sort_type), addr(sort_type_enum)):
1825 raise FstArgError("Unknown sort type {!r}".format(sort_type))
1826 fst.ArcSort(self._mfst.get(), sort_type_enum)
1827 self._check_mutating_imethod()
1829 def arcsort(self, sort_type=b"ilabel"):
1831 arcsort(self, sort_type="ilabel")
1833 Sorts arcs leaving each state of the FST.
1835 This operation destructively sorts arcs leaving each state using either
1836 input or output labels.
1839 sort_type: Either "ilabel" (sort arcs according to input labels) or
1840 "olabel" (sort arcs according to output labels).
1846 FstArgError: Unknown sort type.
1848 See also: `topsort`.
1850 self._arcsort(sort_type)
1853 cdef void _closure(self, bool closure_plus=False) except *:
1854 fst.Closure(self._mfst.get(), fst.GetClosureType(closure_plus))
1855 self._check_mutating_imethod()
1857 def closure(self, bool closure_plus=False):
1859 closure(self, closure_plus=False)
1861 Computes concatenative closure.
1863 This operation destructively converts the FST to its concatenative closure.
1864 If A transduces string x to y with weight a, then the closure transduces x
1865 to y with weight a, xx to yy with weight a \otimes a, xxx to yyy with weight
1866 a \otimes a \otimes a, and so on. The empty string is also transduced to
1867 itself with semiring One if `closure_plus` is False.
1870 closure_plus: If False, do not accept the empty string.
1875 self._closure(closure_plus)
1878 cdef void _concat(self, _Fst ifst) except *:
1879 fst.Concat(self._mfst.get(), deref(ifst._fst))
1880 self._check_mutating_imethod()
1882 def concat(self, _Fst ifst):
1886 Computes the concatenation (product) of two FSTs.
1888 This operation destructively concatenates the FST with a second FST. If A
1889 transduces string x to y with weight a and B transduces string w to v with
1890 weight b, then their concatenation transduces string xw to yv with weight a
1894 ifst: The second input FST.
1902 cdef void _connect(self) except *:
1903 fst.Connect(self._mfst.get())
1904 self._check_mutating_imethod()
1910 Removes unsuccessful paths.
1912 This operation destructively trims the FST, removing states and arcs that
1913 are not part of any successful path.
1921 cdef void _decode(self, EncodeMapper encoder) except *:
1922 fst.Decode(self._mfst.get(), deref(encoder._encoder))
1923 self._check_mutating_imethod()
1925 def decode(self, EncodeMapper encoder):
1927 decode(self, encoder)
1929 Decodes encoded labels and/or weights.
1931 This operation reverses the encoding performed by `encode`.
1934 encoder: An EncodeMapper object used to encode the FST.
1941 self._decode(encoder)
1944 cdef void _delete_arcs(self, int64 state, size_t n=0) except *:
1945 if not (self._mfst.get().DeleteArcs(state, n) if n else
1946 self._mfst.get().DeleteArcs(state)):
1947 raise FstIndexError("State index out of range")
1948 self._check_mutating_imethod()
1950 def delete_arcs(self, int64 state, size_t n=0):
1952 delete_arcs(self, state, n=0)
1954 Deletes arcs leaving a particular state.
1957 state: The integer index of a state.
1958 n: An optional argument indicating how many arcs to be deleted. If this
1959 argument is omitted or passed as zero, all arcs from this state are
1966 FstIndexError: State index out of range.
1968 See also: `delete_states`.
1970 self._delete_arcs(state, n)
1973 cdef void _delete_states(self, states=None) except *:
1974 # Only the former signature has a possible indexing failure.
1976 if not self._mfst.get().DeleteStates(<const vector[int64]> states):
1977 raise FstIndexError("State index out of range")
1979 self._mfst.get().DeleteStates()
1980 self._check_mutating_imethod()
1982 def delete_states(self, states=None):
1984 delete_states(self, states=None)
1989 states: An optional iterable of integer indices of the states to be
1990 deleted. If this argument is omitted, all states are deleted.
1996 FstIndexError: State index out of range.
1998 See also: `delete_arcs`.
2000 self._delete_states(states)
2003 cdef void _encode(self, EncodeMapper encoder) except *:
2004 fst.Encode(self._mfst.get(), encoder._encoder.get())
2005 self._check_mutating_imethod()
2007 def encode(self, EncodeMapper encoder):
2009 encode(self, encoder)
2011 Encodes labels and/or weights.
2013 This operation allows for the representation of a weighted transducer as a
2014 weighted acceptor, an unweighted transducer, or an unweighted acceptor by
2015 considering the pair (input label, output label), the pair (input label,
2016 weight), or the triple (input label, output label, weight) as a single
2017 label. Applying this operation mutates the EncodeMapper argument, which
2018 can then be used to decode.
2021 encoder: An EncodeMapper object to be used as the encoder.
2028 self._encode(encoder)
2031 cdef void _invert(self) except *:
2032 fst.Invert(self._mfst.get())
2033 self._check_mutating_imethod()
2039 Inverts the FST's transduction.
2041 This operation destructively inverts the FST's transduction by exchanging
2042 input and output labels.
2050 cdef void _minimize(self, float delta=fst.kShortestDelta,
2051 bool allow_nondet=False) except *:
2052 # This runs in-place when the second argument is null.
2053 fst.Minimize(self._mfst.get(), NULL, delta, allow_nondet)
2054 self._check_mutating_imethod()
2056 def minimize(self, float delta=fst.kShortestDelta, bool allow_nondet=False):
2058 minimize(self, delta=1e-6, allow_nondet=False)
2062 This operation destructively performs the minimization of deterministic
2063 weighted automata and transducers. If the input FST A is an acceptor, this
2064 operation produces the minimal acceptor B equivalent to A, i.e. the
2065 acceptor with a minimal number of states that is equivalent to A. If the
2066 input FST A is a transducer, this operation internally builds an equivalent
2067 transducer with a minimal number of states. However, this minimality is
2068 obtained by allowing transition having strings of symbols as output labels,
2069 this known in the litterature as a real-time transducer. Such transducers
2070 are not directly supported by the library. This function will convert such
2071 transducer by expanding each string-labeled transition into a sequence of
2072 transitions. This will results in the creation of new states, hence losing
2073 the minimality property.
2076 delta: Comparison/quantization delta.
2077 allow_nondet: Attempt minimization of non-deterministic FST?
2082 self._minimize(delta, allow_nondet)
2085 cpdef MutableArcIterator mutable_arcs(self, int64 state):
2087 mutable_arcs(self, state)
2089 Returns a mutable iterator over arcs leaving the specified state.
2092 state: The source state ID.
2095 A MutableArcIterator.
2097 See also: `arcs`, `states`.
2099 return MutableArcIterator(self, state)
2101 def mutable_input_symbols(self):
2103 mutable_input_symbols(self)
2105 Returns the FST's (mutable) input symbol table, or None if none is present.
2107 cdef fst.SymbolTable *tst = self._mfst.get().MutableInputSymbols()
2110 return _init_MutableFstSymbolTable(tst, self._mfst)
2112 def mutable_output_symbols(self):
2114 mutable_output_symbols(self)
2116 Returns the FST's (mutable) output symbol table, or None if none is present.
2118 cdef fst.SymbolTable *tst = self._mfst.get().MutableOutputSymbols()
2121 return _init_MutableFstSymbolTable(tst, self._mfst)
2123 cpdef int64 num_states(self):
2127 Returns the number of states.
2129 return self._mfst.get().NumStates()
2131 cdef void _project(self, bool project_output=False) except *:
2132 fst.Project(self._mfst.get(), fst.GetProjectType(project_output))
2133 self._check_mutating_imethod()
2135 def project(self, bool project_output=False):
2137 project(self, project_output=False)
2139 Converts the FST to an acceptor using input or output labels.
2141 This operation destructively projects an FST onto its domain or range by
2142 either copying each arc's input label to its output label (the default) or
2146 project_output: Should the output labels be projected?
2151 See also: `decode`, `encode`, `relabel_pairs`, `relabel_symbols`.
2153 self._project(project_output)
2156 cdef void _prune(self, float delta=fst.kDelta, int64 nstate=fst.kNoStateId,
2157 weight=None) except *:
2158 # Threshold is set to semiring Zero (no pruning) if no weight is specified.
2159 cdef fst.WeightClass wc = _get_WeightClass_or_Zero(self.weight_type(),
2161 fst.Prune(self._mfst.get(), wc, nstate, delta)
2162 self._check_mutating_imethod()
2165 float delta=fst.kDelta,
2166 int64 nstate=fst.kNoStateId,
2169 prune(self, delta=0.0009765625, nstate=NO_STATE_ID, weight=None)
2171 Removes paths with weights below a certain threshold.
2173 This operation deletes states and arcs in the input FST that do not belong
2174 to a successful path whose weight is no more (w.r.t the natural semiring
2175 order) than the threshold t \otimes-times the weight of the shortest path in
2176 the input FST. Weights must be commutative and have the path property.
2179 delta: Comparison/quantization delta.
2180 nstate: State number threshold.
2181 weight: A Weight or weight string indicating the desired weight threshold
2182 below which paths are pruned; if omitted, no paths are pruned.
2187 See also: The constructive variant.
2189 self._prune(delta, nstate, weight)
2192 cdef void _push(self,
2193 float delta=fst.kDelta,
2194 bool remove_total_weight=False,
2195 bool to_final=False) except *:
2196 fst.Push(self._mfst.get(), fst.GetReweightType(to_final), delta,
2197 remove_total_weight)
2198 self._check_mutating_imethod()
2201 float delta=fst.kDelta,
2202 bool remove_total_weight=False,
2203 bool to_final=False):
2205 push(self, delta=0.0009765625, remove_total_weight=False, to_final=False)
2207 Pushes weights towards the initial or final states.
2209 This operation destructively produces an equivalent transducer by pushing
2210 the weights towards the initial state or toward the final states. When
2211 pushing weights towards the initial state, the sum of the weight of the
2212 outgoing transitions and final weight at any non-initial state is equal to
2213 one in the resulting machine. When pushing weights towards the final states,
2214 the sum of the weight of the incoming transitions at any state is equal to
2215 one. Weights need to be left distributive when pushing towards the initial
2216 state and right distributive when pushing towards the final states.
2219 delta: Comparison/quantization delta.
2220 remove_total_weight: If pushing weights, should the total weight be
2222 to_final: Push towards final states?
2227 See also: The constructive variant, which also supports label pushing.
2229 self._push(delta, remove_total_weight, to_final)
2232 cdef void _relabel_pairs(self, ipairs=None, opairs=None) except *:
2233 cdef unique_ptr[vector[fst.LabelPair]] _ipairs
2234 _ipairs.reset(new vector[fst.LabelPair]())
2235 cdef unique_ptr[vector[fst.LabelPair]] _opairs
2236 _opairs.reset(new vector[fst.LabelPair]())
2240 for (before, after) in ipairs:
2241 _ipairs.get().push_back(fst.LabelPair(before, after))
2243 for (before, after) in opairs:
2244 _opairs.get().push_back(fst.LabelPair(before, after))
2245 if _ipairs.get().empty() and _opairs.get().empty():
2246 raise FstArgError("No relabeling pairs specified.")
2247 fst.Relabel(self._mfst.get(), deref(_ipairs), deref(_opairs))
2248 self._check_mutating_imethod()
2250 def relabel_pairs(self, ipairs=None, opairs=None):
2252 relabel_pairs(self, ipairs=None, opairs=None)
2254 Replaces input and/or output labels using pairs of labels.
2256 This operation destructively relabels the input and/or output labels of the
2257 FST using pairs of the form (old_ID, new_ID); omitted indices are
2261 ipairs: An iterable containing (older index, newer index) integer pairs.
2262 opairs: An iterable containing (older index, newer index) integer pairs.
2268 FstArgError: No relabeling pairs specified.
2270 See also: `decode`, `encode`, `project`, `relabel_tables`.
2272 self._relabel_pairs(ipairs, opairs)
2275 cdef void _relabel_tables(self,
2276 _SymbolTable old_isymbols=None,
2277 _SymbolTable new_isymbols=None,
2278 unknown_isymbol=b"",
2279 bool attach_new_isymbols=True,
2280 _SymbolTable old_osymbols=None,
2281 _SymbolTable new_osymbols=None,
2282 unknown_osymbol=b"",
2283 bool attach_new_osymbols=True) except *:
2284 if new_isymbols is None and new_osymbols is None:
2285 raise FstArgError("No new SymbolTables specified")
2286 cdef fst.SymbolTable *new_isymbols_ptr = NULL
2287 if new_isymbols is not None:
2288 new_isymbols_ptr = new_isymbols._table
2289 cdef fst.SymbolTable *new_osymbols_ptr = NULL
2290 if new_osymbols is not None:
2291 new_osymbols_ptr = new_osymbols._table
2292 fst.Relabel(self._mfst.get(),
2293 self._fst.get().InputSymbols() if old_isymbols is None else
2294 old_isymbols._table, new_isymbols_ptr, tostring(unknown_isymbol),
2295 attach_new_isymbols,
2296 self._fst.get().OutputSymbols() if old_osymbols is None else
2297 old_osymbols._table, new_osymbols_ptr, tostring(unknown_osymbol),
2298 attach_new_osymbols)
2299 self._check_mutating_imethod()
2301 def relabel_tables(self,
2302 _SymbolTable old_isymbols=None,
2303 _SymbolTable new_isymbols=None,
2304 unknown_isymbol=b"",
2305 bool attach_new_isymbols=True,
2306 _SymbolTable old_osymbols=None,
2307 _SymbolTable new_osymbols=None,
2308 unknown_osymbol=b"",
2309 bool attach_new_osymbols=True):
2311 relabel_tables(self, old_isymbols=None, new_isymbols=None,
2312 unknown_isymbol="", attach_new_isymbols=True,
2313 old_osymbols=None, new_osymbols=None,
2314 unknown_osymbol="", attach_new_osymbols=True)
2316 Replaces input and/or output labels using SymbolTables.
2318 This operation destructively relabels the input and/or output labels of the
2319 FST using user-specified symbol tables; omitted symbols are identity-mapped.
2322 old_isymbols: The old SymbolTable for input labels, defaulting to the
2323 FST's input symbol table.
2324 new_isymbols: A SymbolTable used to relabel the input labels
2325 unknown_isymbol: Input symbol to use to relabel OOVs (if empty,
2326 OOVs raise an exception)
2327 attach_new_isymbols: Should new_isymbols be made the FST's input symbol
2329 old_osymbols: The old SymbolTable for output labels, defaulting to the
2330 FST's output symbol table.
2331 new_osymbols: A SymbolTable used to relabel the output labels.
2332 unknown_osymbol: Outnput symbol to use to relabel OOVs (if empty,
2333 OOVs raise an exception)
2334 attach_new_isymbols: Should new_osymbols be made the FST's output symbol
2341 FstArgError: No SymbolTable specified.
2343 See also: `decode`, `encode`, `project`, `relabel_pairs`.
2345 self._relabel_tables(old_isymbols, new_isymbols,
2346 unknown_isymbol, attach_new_isymbols,
2347 old_osymbols, new_osymbols,
2348 unknown_osymbol, attach_new_osymbols)
2351 cdef void _reserve_arcs(self, int64 state, size_t n) except *:
2352 if not self._mfst.get().ReserveArcs(state, n):
2353 raise FstIndexError("State index out of range")
2354 self._check_mutating_imethod()
2356 def reserve_arcs(self, int64 state, size_t n):
2358 reserve_arcs(self, state, n)
2360 Reserve n arcs at a particular state (best effort).
2363 state: The integer index of a state.
2364 n: The number of arcs to reserve.
2370 FstIndexError: State index out of range.
2372 See also: `reserve_states`.
2374 self._reserve_arcs(state, n)
2377 cdef void _reserve_states(self, int64 n) except *:
2378 self._mfst.get().ReserveStates(n)
2379 self._check_mutating_imethod()
2381 def reserve_states(self, int64 n):
2383 reserve_states(self, n)
2385 Reserve n states (best effort).
2388 n: The number of states to reserve.
2393 See also: `reserve_arcs`.
2395 self._reserve_states(n)
2398 cdef void _reweight(self, potentials, bool to_final=False) except *:
2399 cdef unique_ptr[vector[fst.WeightClass]] _potentials
2400 _potentials.reset(new vector[fst.WeightClass]())
2401 cdef string weight_type = self.weight_type()
2402 for weight in potentials:
2403 _potentials.get().push_back(_get_WeightClass_or_One(self.weight_type(),
2405 fst.Reweight(self._mfst.get(), deref(_potentials),
2406 fst.GetReweightType(to_final))
2407 self._check_mutating_imethod()
2409 def reweight(self, potentials, bool to_final=False):
2411 reweight(self, potentials, to_final=False)
2413 Reweights an FST using an iterable of potentials.
2415 This operation destructively reweights an FST according to the potentials
2416 and in the direction specified by the user. An arc of weight w, with an
2417 origin state of potential p and destination state of potential q, is
2418 reweighted by p^{-1} \otimes (w \otimes q) when reweighting towards the
2419 initial state, and by (p \otimes w) \otimes q^{-1} when reweighting towards
2420 the final states. The weights must be left distributive when reweighting
2421 towards the initial state and right distributive when reweighting towards
2422 the final states (e.g., TropicalWeight and LogWeight).
2425 potentials: An iterable of Weight or weight strings.
2426 to_final: Push towards final states?
2431 self._reweight(potentials, to_final)
2434 cdef void _rmepsilon(self,
2438 int64 nstate=fst.kNoStateId,
2439 float delta=fst.kShortestDelta) except *:
2440 cdef fst.WeightClass wc = _get_WeightClass_or_Zero(self.weight_type(),
2442 cdef unique_ptr[fst.RmEpsilonOptions] opts
2443 opts.reset(new fst.RmEpsilonOptions(_get_queue_type(tostring(queue_type)),
2444 connect, wc, nstate, delta))
2445 fst.RmEpsilon(self._mfst.get(), deref(opts))
2446 self._check_mutating_imethod()
2452 int64 nstate=fst.kNoStateId,
2453 float delta=fst.kShortestDelta):
2455 rmepsilon(self, queue_type="auto", connect=True, weight=None,
2456 nstate=NO_STATE_ID, delta=1e-6):
2458 Removes epsilon transitions.
2460 This operation destructively removes epsilon transitions, i.e., those where
2461 both input and output labels are epsilon) from an FST.
2464 queue_type: A string matching a known queue type; one of: "auto", "fifo",
2465 "lifo", "shortest", "state", "top".
2466 connect: Should output be trimmed?
2467 weight: A Weight or weight string indicating the desired weight threshold
2468 below which paths are pruned; if omitted, no paths are pruned.
2469 nstate: State number threshold.
2470 delta: Comparison/quantization delta.
2475 self._rmepsilon(queue_type, connect, weight, nstate, delta)
2478 cdef void _set_final(self, int64 state, weight=None) except *:
2479 if not self._mfst.get().ValidStateId(state):
2480 raise FstIndexError("State index out of range")
2481 cdef fst.WeightClass wc = _get_WeightClass_or_One(self.weight_type(),
2483 if not self._mfst.get().SetFinal(state, wc):
2484 raise FstOpError("Incompatible or invalid weight")
2485 self._check_mutating_imethod()
2487 def set_final(self, int64 state, weight=None):
2489 set_final(self, state, weight)
2491 Sets the final weight for a state.
2494 state: The integer index of a state.
2495 weight: A Weight or weight string indicating the desired final weight; if
2496 omitted, it is set to semiring One.
2502 FstIndexError: State index out of range.
2503 FstOpError: Incompatible or invalid weight.
2505 See also: `set_start`.
2507 self._set_final(state, weight)
2510 cdef void _set_input_symbols(self, _SymbolTable syms) except *:
2512 self._mfst.get().SetInputSymbols(NULL)
2514 self._mfst.get().SetInputSymbols(syms._table)
2515 self._check_mutating_imethod()
2517 def set_input_symbols(self, _SymbolTable syms):
2519 set_input_symbols(self, syms)
2521 Sets the input symbol table.
2523 Passing None as a value will delete the input symbol table.
2526 syms: A SymbolTable.
2531 See also: `set_output_symbols`.
2533 self._set_input_symbols(syms)
2536 cdef void _set_output_symbols(self, _SymbolTable syms) except *:
2538 self._mfst.get().SetOutputSymbols(NULL)
2540 self._mfst.get().SetOutputSymbols(syms._table)
2541 self._check_mutating_imethod()
2543 def set_output_symbols(self, _SymbolTable syms):
2545 set_output_symbols(self, syms)
2547 Sets the output symbol table.
2549 Passing None as a value will delete the output symbol table.
2552 syms: A SymbolTable.
2557 See also: `set_input_symbols`.
2559 self._set_output_symbols(syms)
2562 cdef void _set_properties(self, uint64 props, uint64 mask):
2563 self._mfst.get().SetProperties(props, mask)
2565 def set_properties(self, uint64 props, uint64 mask):
2567 set_properties(self, props, mask)
2569 Sets the properties bits.
2572 props: The properties to be set.
2573 mask: A mask to be applied to the `props` argument before setting the
2579 self._set_properties(props, mask)
2582 cdef void _set_start(self, int64 state) except *:
2583 if not self._mfst.get().SetStart(state):
2584 raise FstIndexError("State index out of range")
2585 self._check_mutating_imethod()
2587 def set_start(self, int64 state):
2589 set_start(self, state)
2591 Sets a state to be the initial state state.
2594 state: The integer index of a state.
2600 FstIndexError: State index out of range.
2602 See also: `set_final`.
2604 self._set_start(state)
2607 cdef void _topsort(self) except *:
2608 # TopSort returns False if the FST is cyclic, and thus can't be TopSorted.
2609 if not fst.TopSort(self._mfst.get()):
2610 logging.warning("Cannot topsort cyclic FST.")
2611 self._check_mutating_imethod()
2617 Sorts transitions by state IDs.
2619 This operation destructively topologically sorts the FST, if it is acyclic;
2620 otherwise it remains unchanged. Once sorted, all transitions are from lower
2621 state IDs to higher state IDs
2626 See also: `arcsort`.
2631 cdef void _union(self, _Fst ifst) except *:
2632 fst.Union(self._mfst.get(), deref(ifst._fst))
2633 self._check_mutating_imethod()
2635 def union(self, _Fst ifst):
2639 Computes the union (sum) of two FSTs.
2641 This operation computes the union (sum) of two FSTs. If A transduces string
2642 x to y with weight a and B transduces string w to v with weight b, then
2643 their union transduces x to y with weight a and w to v with weight b.
2646 ifst: The second input FST.
2655 # Pseudo-constructors for _Fst and _MutableFst.
2657 # _init_Fst and _init_MutableFst use an FstClass pointer to instantiate _Fst
2658 # and _MutableFst objects, respectively. The latter function is only safe to
2659 # call when the FST being wrapped is known to be kMutable. The caller can
2660 # safely use it when they have either checked this bit (e.g., by using
2661 # _init_XFst) or have themselves constructed a mutable container for the
2662 # FstClass pointer they're passing (e.g., most of the constructive operations,
2663 # storing their results in a VectorFstClass, a derivative of MutableFstClass).
2665 # _create_Fst constructs an empty VectorFstClass of a user-specified arc type,
2666 # and passes this pointer to _init_MutableFst.
2668 # _read_Fst reads an FST from disk, performing FST conversion if requested, and
2669 # then passes this pointer to _init_XFst.
2671 # The Python class Fst provides a wrapper for these two operations. The former
2672 # can be accessed by calling Fst(...), which acts like a class method, and the
2673 # latter via Fst.read(...), which acts like a static method. This is a bit
2674 # nasty, but totally hidden from the Python user.
2677 cdef _Fst _init_Fst(FstClass_ptr tfst):
2678 if tfst.Properties(fst.kError, True):
2679 raise FstOpError("Operation failed")
2680 cdef _Fst ofst = _Fst.__new__(_Fst)
2681 ofst._fst.reset(tfst)
2685 cdef _MutableFst _init_MutableFst(MutableFstClass_ptr tfst):
2686 if tfst.Properties(fst.kError, True):
2687 raise FstOpError("Operation failed")
2688 cdef _MutableFst ofst = _MutableFst.__new__(_MutableFst)
2689 ofst._fst.reset(tfst)
2690 # Makes a copy of it as the derived type! Cool.
2691 ofst._mfst = static_pointer_cast[fst.MutableFstClass, fst.FstClass](ofst._fst)
2695 cdef _Fst _init_XFst(FstClass_ptr tfst):
2696 if tfst.Properties(fst.kMutable, True):
2697 return _init_MutableFst(static_cast[MutableFstClass_ptr](tfst))
2699 return _init_Fst(tfst)
2702 cdef _MutableFst _create_Fst(arc_type=b"standard"):
2703 cdef unique_ptr[fst.VectorFstClass] tfst
2704 tfst.reset(new fst.VectorFstClass(tostring(arc_type)))
2705 if tfst.get() == NULL:
2706 raise FstOpError("Unknown arc type: {!r}".format(arc_type))
2707 return _init_MutableFst(tfst.release())
2710 cpdef _Fst _read(filename):
2711 cdef unique_ptr[fst.FstClass] tfst
2712 tfst.reset(fst.FstClass.Read(tostring(filename)))
2713 if tfst.get() == NULL:
2714 raise FstIOError("Read failed: {!r}".format(filename))
2715 return _init_XFst(tfst.release())
2718 cpdef _Fst _read_from_string(state):
2719 cdef stringstream sstrm
2720 sstrm << tostring(state)
2721 cdef unique_ptr[fst.FstClass] tfst
2722 tfst.reset(fst.FstClass.ReadFromStream(sstrm, b"<pywrapfst>"))
2723 if tfst.get() == NULL:
2724 raise FstIOError("Read failed: <string>")
2725 return _init_XFst(tfst.release())
2731 Fst(arc_type="standard")
2733 Constructs an empty FST.
2736 arc_type: A string indicating the arc type.
2739 FstError: Unknown arc type.
2742 FstOpError: operation failed.
2745 def __new__(cls, arc_type=b"standard"):
2746 return _create_Fst(arc_type)
2753 Reads an FST from a file.
2756 filename: The string location of the input file.
2762 FstIOError: Read failed.
2764 return _read(filename)
2767 def read_from_string(state):
2769 read_from_string(string, fst_type=None)
2771 Reads an FST from a serialized string.
2774 state: A string containing the serialized FST.
2780 FstIOError: Read failed.
2781 FstOpError: Read-time conversion failed.
2783 See also: `write_to_string`.
2785 return _read_from_string(state)
2791 NO_LABEL = fst.kNoLabel
2792 NO_STATE_ID = fst.kNoStateId
2793 # TODO(kbg): Figure out how to access static class variables so I don't have
2794 # to do it this way.
2795 NO_SYMBOL = kNoSymbol
2801 EXPANDED = fst.kExpanded
2802 MUTABLE = fst.kMutable
2804 ACCEPTOR = fst.kAcceptor
2805 NOT_ACCEPTOR = fst.kNotAcceptor
2806 I_DETERMINISTIC = fst.kIDeterministic
2807 NON_I_DETERMINISTIC = fst.kNonIDeterministic
2808 O_DETERMINISTIC = fst.kODeterministic
2809 NON_O_DETERMINISTIC = fst.kNonODeterministic
2810 EPSILONS = fst.kEpsilons
2811 NO_EPSILONS = fst.kNoEpsilons
2812 I_EPSILONS = fst.kIEpsilons
2813 NO_I_EPSILONS = fst.kNoIEpsilons
2814 O_EPSILONS = fst.kOEpsilons
2815 NO_O_EPSILONS = fst.kNoOEpsilons
2816 I_LABEL_SORTED = fst.kILabelSorted
2817 NOT_I_LABEL_SORTED = fst.kNotILabelSorted
2818 O_LABEL_SORTED = fst.kOLabelSorted
2819 NOT_O_LABEL_SORTED = fst.kNotOLabelSorted
2820 WEIGHTED = fst.kWeighted
2821 UNWEIGHTED = fst.kUnweighted
2822 CYCLIC = fst.kCyclic
2823 ACYCLIC = fst.kAcyclic
2824 INITIAL_CYCLIC = fst.kInitialCyclic
2825 INITIAL_ACYCLIC = fst.kInitialAcyclic
2826 TOP_SORTED = fst.kTopSorted
2827 NOT_TOP_SORTED = fst.kNotTopSorted
2828 ACCESSIBLE = fst.kAccessible
2829 NOT_ACCESSIBLE = fst.kNotAccessible
2830 COACCESSIBLE = fst.kCoAccessible
2831 NOT_COACCESSIBLE = fst.kNotCoAccessible
2832 STRING = fst.kString
2833 NOT_STRING = fst.kNotString
2834 WEIGHTED_CYCLES = fst.kWeightedCycles
2835 UNWEIGHTED_CYCLES = fst.kUnweightedCycles
2836 NULL_PROPERTIES = fst.kNullProperties
2837 COPY_PROPERTIES = fst.kCopyProperties
2838 INTRINSIC_PROPERTIES = fst.kIntrinsicProperties
2839 EXTRINSIC_PROPERTIES = fst.kExtrinsicProperties
2840 SET_START_PROPERTIES = fst.kSetStartProperties
2841 SET_FINAL_PROPERTIES = fst.kSetFinalProperties
2842 ADD_STATE_PROPERTIES = fst.kAddStateProperties
2843 ADD_ARC_PROPERTIES = fst.kAddArcProperties
2844 SET_ARC_PROPERTIES = fst.kSetArcProperties
2845 DELETE_STATE_PROPERTIES = fst.kDeleteStatesProperties
2846 DELETE_ARC_PROPERTIES = fst.kDeleteArcsProperties
2847 STATE_SORT_PROPERTIES = fst.kStateSortProperties
2848 ARC_SORT_PROPERTIES = fst.kArcSortProperties
2849 I_LABEL_INVARIANT_PROPERTIES = fst.kILabelInvariantProperties
2850 O_LABEL_INVARIANT_PROPERTIES = fst.kOLabelInvariantProperties
2851 WEIGHT_INVARIANT_PROPERTIES = fst.kWeightInvariantProperties
2852 ADD_SUPERFINAL_PROPERTIES = fst.kAddSuperFinalProperties
2853 RM_SUPERFINAL_PROPERTIES = fst.kRmSuperFinalProperties
2854 BINARY_PROPERTIES = fst.kBinaryProperties
2855 TRINARY_PROPERTIES = fst.kTrinaryProperties
2856 POS_TRINARY_PROPERTIES = fst.kPosTrinaryProperties
2857 NEG_TRINARY_PROPERTIES = fst.kNegTrinaryProperties
2858 FST_PROPERTIES = fst.kFstProperties
2861 ## Arc iterator properties.
2864 ARC_I_LABEL_VALUE = fst.kArcILabelValue
2865 ARC_O_LABEL_VALUE = fst.kArcOLabelValue
2866 ARC_WEIGHT_VALUE = fst.kArcWeightValue
2867 ARC_NEXT_STATE_VALUE = fst.kArcNextStateValue
2868 ARC_NO_CACHE = fst.kArcNoCache
2869 ARC_VALUE_FLAGS = fst.kArcValueFlags
2870 ARC_FLAGS = fst.kArcFlags
2873 ## EncodeMapper properties.
2876 ENCODE_LABELS = fst.kEncodeLabels
2877 ENCODE_WEIGHTS = fst.kEncodeWeights
2878 ENCODE_FLAGS = fst.kEncodeFlags
2881 ## Arc, ArcIterator, and MutableArcIterator.
2884 cdef class Arc(object):
2887 Arc(ilabel, olabel, weight, nextstate)
2889 This class represents an arc while remaining agnostic about the underlying arc
2890 type. Attributes of the arc can be accessed or mutated, and the arc can be
2894 ilabel: The input label.
2895 olabel: The output label.
2896 weight: The arc weight.
2897 nextstate: The destination state for the arc.
2901 return "<Arc at 0x{:x}>".format(id(self))
2903 def __init__(self, int64 ilabel, int64 olabel, weight, int64 nextstate):
2904 cdef fst.WeightClass wc = _get_WeightClass_or_One(b"tropical", weight)
2905 self._arc.reset(new fst.ArcClass(ilabel, olabel, wc, nextstate))
2907 cpdef Arc copy(self):
2908 return Arc(self.ilabel, self.olabel, self.weight, self.nextstate)
2913 return deref(self._arc).ilabel
2915 def __set__(self, int64 value):
2916 deref(self._arc).ilabel = value
2921 return deref(self._arc).olabel
2923 def __set__(self, int64 value):
2924 deref(self._arc).olabel = value
2929 cdef Weight weight = Weight.__new__(Weight)
2930 weight._weight.reset(new fst.WeightClass(deref(self._arc).weight))
2933 def __set__(self, weight):
2934 deref(self._arc).weight = _get_WeightClass_or_One(b"tropical", weight)
2939 return deref(self._arc).nextstate
2941 def __set__(self, int64 value):
2942 deref(self._arc).nextstate = value
2945 cdef Arc _init_Arc(const fst.ArcClass &arc):
2946 cdef Weight weight = Weight.__new__(Weight)
2947 weight._weight.reset(new fst.WeightClass(arc.weight))
2948 return Arc(arc.ilabel, arc.olabel, weight, arc.nextstate)
2951 cdef class ArcIterator(object):
2954 ArcIterator(ifst, state)
2956 This class is used for iterating over the arcs leaving some state of an FST.
2960 return "<ArcIterator at 0x{:x}>".format(id(self))
2962 def __init__(self, _Fst ifst, int64 state):
2963 if not ifst._fst.get().ValidStateId(state):
2964 raise FstIndexError("State index out of range")
2965 # Makes copy of the shared_ptr, potentially extending the FST's lifetime.
2966 self._fst = ifst._fst
2967 self._aiter.reset(new fst.ArcIteratorClass(deref(self._fst), state))
2969 # This just registers this class as a possible iterator.
2973 # Magic method used to get a Pythonic API out of the C++ API.
2977 result = self.value()
2981 cpdef bool done(self):
2985 Indicates whether the iterator is exhausted or not.
2988 True if the iterator is exhausted, False otherwise.
2990 return self._aiter.get().Done()
2992 cpdef uint32 flags(self):
2996 Returns the current iterator behavioral flags.
2999 The current iterator behavioral flags as an integer.
3001 return self._aiter.get().Flags()
3003 cpdef void next(self):
3007 Advances the iterator.
3009 self._aiter.get().Next()
3011 cpdef size_t position(self):
3015 Returns the position of the iterator.
3018 The iterator's position, expressed as an integer.
3020 return self._aiter.get().Position()
3022 cpdef void reset(self):
3026 Resets the iterator to the initial position.
3028 self._aiter.get().Reset()
3030 cpdef void seek(self, size_t a):
3034 Advance the iterator to a new position.
3037 a: The position to seek to.
3039 self._aiter.get().Seek(a)
3041 cpdef void set_flags(self, uint32 flags, uint32 mask):
3043 set_flags(self, flags, mask)
3045 Sets the current iterator behavioral flags.
3048 flags: The properties to be set.
3049 mask: A mask to be applied to the `flags` argument before setting them.
3051 self._aiter.get().SetFlags(flags, mask)
3053 cpdef object value(self):
3057 Returns the current arc.
3059 return _init_Arc(self._aiter.get().Value())
3062 cdef class MutableArcIterator(object):
3065 MutableArcIterator(ifst, state)
3067 This class is used for iterating over the arcs leaving some state of an FST,
3068 also permitting mutation of the current arc.
3072 return "<MutableArcIterator at 0x{:x}>".format(id(self))
3074 def __init__(self, _MutableFst ifst, int64 state):
3075 if not ifst._fst.get().ValidStateId(state):
3076 raise FstIndexError("State index out of range")
3077 # Makes copy of the shared_ptr, potentially extending the FST's lifetime.
3078 self._mfst = ifst._mfst
3079 self._aiter.reset(new fst.MutableArcIteratorClass(ifst._mfst.get(), state))
3081 cpdef bool done(self):
3085 Indicates whether the iterator is exhausted or not.
3088 True if the iterator is exhausted, False otherwise.
3090 return self._aiter.get().Done()
3092 cpdef uint32 flags(self):
3096 Returns the current iterator behavioral flags.
3099 The current iterator behavioral flags as an integer.
3101 return self._aiter.get().Flags()
3103 cpdef void next(self):
3107 Advances the iterator.
3109 self._aiter.get().Next()
3111 cpdef size_t position(self):
3115 Returns the position of the iterator.
3118 The iterator's position, expressed as an integer.
3120 return self._aiter.get().Position()
3122 cpdef void reset(self):
3126 Resets the iterator to the initial position.
3128 self._aiter.get().Reset()
3130 cpdef void seek(self, size_t a):
3134 Advance the iterator to a new position.
3137 a: The position to seek to.
3139 self._aiter.get().Seek(a)
3141 cpdef void set_flags(self, uint32 flags, uint32 mask):
3143 set_flags(self, flags, mask)
3145 Sets the current iterator behavioral flags.
3148 flags: The properties to be set.
3149 mask: A mask to be applied to the `flags` argument before setting them.
3151 self._aiter.get().SetFlags(flags, mask)
3153 cpdef void set_value(self, Arc arc):
3155 set_value(self, arc)
3157 Replace the current arc with a new arc.
3160 arc: The arc to replace the current arc with.
3162 self._aiter.get().SetValue(deref(arc._arc))
3164 cpdef object value(self):
3168 Returns the current arc.
3170 return _init_Arc(self._aiter.get().Value())
3176 cdef class StateIterator(object):
3181 This class is used for iterating over the states in an FST.
3185 return "<StateIterator at 0x{:x}>".format(id(self))
3187 def __init__(self, _Fst ifst):
3188 # Makes copy of the shared_ptr, potentially extending the FST's lifetime.
3189 self._fst = ifst._fst
3190 self._siter.reset(new fst.StateIteratorClass(deref(self._fst)))
3192 # This just registers this class as a possible iterator.
3196 # Magic method used to get a Pythonic API out of the C++ API.
3200 cdef int64 result = self.value()
3204 cpdef bool done(self):
3208 Indicates whether the iterator is exhausted or not.
3211 True if the iterator is exhausted, False otherwise.
3213 return self._siter.get().Done()
3215 cpdef void next(self):
3219 Advances the iterator.
3221 self._siter.get().Next()
3223 cpdef void reset(self):
3227 Resets the iterator to the initial position.
3229 self._siter.get().Reset()
3231 cpdef int64 value(self):
3235 Returns the current state index.
3237 return self._siter.get().Value()
3243 cdef _Fst _map(_Fst ifst,
3244 float delta=fst.kDelta,
3245 map_type=b"identity",
3247 cdef fst.MapType map_type_enum
3248 if not fst.GetMapType(tostring(map_type), addr(map_type_enum)):
3249 raise FstArgError("Unknown map type: {!r}".format(map_type))
3250 cdef fst.WeightClass wc = (_get_WeightClass_or_One(ifst.weight_type(),
3251 weight) if map_type_enum == fst.TIMES_MAPPER else
3252 _get_WeightClass_or_Zero(ifst.weight_type(), weight))
3253 return _init_XFst(fst.Map(deref(ifst._fst), map_type_enum, delta, wc))
3256 cpdef _Fst arcmap(_Fst ifst,
3257 float delta=fst.kDelta,
3258 map_type=b"identity",
3261 arcmap(ifst, delta=0.0009765625, map_type="identity", weight=None)
3263 Constructively applies a transform to all arcs and final states.
3265 This operation transforms each arc and final state in the input FST using
3266 one of the following:
3268 * identity: maps to self.
3269 * input_epsilon: replaces all input labels with epsilon.
3270 * invert: reciprocates all non-Zero weights.
3271 * output_epsilon: replaces all output labels with epsilon.
3272 * plus: adds a constant to all weights.
3273 * quantize: quantizes weights.
3274 * rmweight: replaces all non-Zero weights with 1.
3275 * superfinal: redirects final states to a new superfinal state.
3276 * times: right-multiplies a constant to all weights.
3277 * to_log: converts weights to the log semiring.
3278 * to_log64: converts weights to the log64 semiring.
3279 * to_standard: converts weights to the tropical ("standard") semiring.
3282 ifst: The input FST.
3283 delta: Comparison/quantization delta (ignored unless `map_type` is
3285 map_type: A string matching a known mapping operation (see above).
3286 weight: A Weight or weight string passed to the arc-mapper; ignored unless
3287 `map_type` is `plus` (in which case it defaults to semiring Zero) or
3288 `times` (in which case it defaults to semiring One).
3291 An FST with arcs and final states remapped.
3294 FstArgError: Unknown map type.
3296 See also: `statemap`.
3298 return _map(ifst, delta, map_type, weight)
3301 cpdef _MutableFst compose(_Fst ifst1,
3303 compose_filter=b"auto",
3306 compose(ifst1, ifst2, compose_filter="auto", connect=True)
3308 Constructively composes two FSTs.
3310 This operation computes the composition of two FSTs. If A transduces string
3311 x to y with weight a and B transduces y to z with weight b, then their
3312 composition transduces string x to z with weight a \otimes b. The output
3313 labels of the first transducer or the input labels of the second transducer
3314 must be sorted (or otherwise support appropriate matchers).
3317 ifst1: The first input FST.
3318 ifst2: The second input FST.
3319 compose_filter: A string matching a known composition filter; one of:
3320 "alt_sequence", "auto", "match", "null", "sequence", "trivial".
3321 connect: Should output be trimmed?
3326 See also: `arcsort`.
3328 cdef unique_ptr[fst.VectorFstClass] tfst
3329 tfst.reset(new fst.VectorFstClass(ifst1.arc_type()))
3330 cdef unique_ptr[fst.ComposeOptions] opts
3331 opts.reset(new fst.ComposeOptions(connect,
3332 _get_compose_filter(tostring(compose_filter))))
3333 fst.Compose(deref(ifst1._fst), deref(ifst2._fst), tfst.get(), deref(opts))
3334 return _init_MutableFst(tfst.release())
3337 cpdef _Fst convert(_Fst ifst, fst_type=None):
3339 convert(ifst, fst_type=None)
3341 Constructively converts an FST to a new internal representation.
3344 ifst: The input FST.
3345 fst_type: A string indicating the FST type to convert to, or None if
3346 no conversion is desired.
3349 An equivalent Fst converted to the desired FST type.
3352 FstOpError: Conversion failed.
3354 cdef string fst_type_string = b"" if fst_type is None else tostring(fst_type)
3355 cdef unique_ptr[fst.FstClass] tfst
3356 tfst.reset(fst.Convert(deref(ifst._fst), fst_type_string))
3357 # Script-land Convert returns a null pointer to signal failure.
3358 if tfst.get() == NULL:
3359 raise FstOpError("Conversion to {!r} failed".format(fst_type))
3360 return _init_XFst(tfst.release())
3363 cpdef _MutableFst determinize(_Fst ifst,
3364 float delta=fst.kShortestDelta,
3365 det_type=b"functional",
3366 int64 nstate=fst.kNoStateId,
3367 int64 subsequential_label=0,
3369 bool increment_subsequential_label=False):
3371 determinize(ifst, delta=1e-6, det_type="functional",
3372 nstate=NO_STATE_ID, subsequential_label=0, weight=None,
3373 incremental_subsequential_label=False)
3375 Constructively determinizes a weighted FST.
3377 This operations creates an equivalent FST that has the property that no
3378 state has two transitions with the same input label. For this algorithm,
3379 epsilon transitions are treated as regular symbols (cf. `rmepsilon`).
3382 ifst: The input FST.
3383 delta: Comparison/quantization delta.
3384 det_type: Type of determinization; one of: "functional" (input transducer is
3385 functional), "nonfunctional" (input transducer is not functional) and
3386 disambiguate" (input transducer is not functional but only keep the min
3387 of ambiguous outputs).
3388 nstate: State number threshold.
3389 subsequential_label: Input label of arc corresponding to residual final
3390 output when producing a subsequential transducer.
3391 weight: A Weight or weight string indicating the desired weight threshold
3392 below which paths are pruned; if omitted, no paths are pruned.
3393 increment_subsequential_label: Increment subsequential when creating
3394 several arcs for the residual final output at a given state.
3397 An equivalent deterministic FST.
3400 FstArgError: Unknown determinization type.
3402 See also: `disambiguate`, `rmepsilon`.
3404 cdef unique_ptr[fst.VectorFstClass] tfst
3405 tfst.reset(new fst.VectorFstClass(ifst.arc_type()))
3406 # Threshold is set to semiring Zero (no pruning) if weight unspecified.
3407 cdef fst.WeightClass wc = _get_WeightClass_or_Zero(ifst.weight_type(),
3409 cdef fst.DeterminizeType determinize_type_enum
3410 if not fst.GetDeterminizeType(tostring(det_type),
3411 addr(determinize_type_enum)):
3412 raise FstArgError("Unknown determinization type: {!r}".format(det_type))
3413 cdef unique_ptr[fst.DeterminizeOptions] opts
3414 opts.reset(new fst.DeterminizeOptions(delta, wc, nstate, subsequential_label,
3415 determinize_type_enum,
3416 increment_subsequential_label))
3417 fst.Determinize(deref(ifst._fst), tfst.get(), deref(opts))
3418 return _init_MutableFst(tfst.release())
3421 cpdef _MutableFst difference(_Fst ifst1,
3423 compose_filter=b"auto",
3426 difference(ifst1, ifst2, compose_filter="auto", connect=True)
3428 Constructively computes the difference of two FSTs.
3430 This operation computes the difference between two FSAs. Only strings that are
3431 in the first automaton but not in second are retained in the result. The first
3432 argument must be an acceptor; the second argument must be an unweighted,
3433 epsilon-free, deterministic acceptor. The output labels of the first
3434 transducer or the input labels of the second transducer must be sorted (or
3435 otherwise support appropriate matchers).
3438 ifst1: The first input FST.
3439 ifst2: The second input FST.
3440 compose_filter: A string matching a known composition filter; one of:
3441 "alt_sequence", "auto", "match", "null", "sequence", "trivial".
3442 connect: Should the output FST be trimmed?
3445 An FST representing the difference of the FSTs.
3447 cdef unique_ptr[fst.VectorFstClass] tfst
3448 tfst.reset(new fst.VectorFstClass(ifst1.arc_type()))
3449 cdef unique_ptr[fst.ComposeOptions] opts
3450 opts.reset(new fst.ComposeOptions(connect, _get_compose_filter(
3451 tostring(compose_filter))))
3452 fst.Difference(deref(ifst1._fst), deref(ifst2._fst), tfst.get(), deref(opts))
3453 return _init_MutableFst(tfst.release())
3456 cpdef _MutableFst disambiguate(_Fst ifst,
3457 float delta=fst.kDelta,
3458 int64 nstate=fst.kNoStateId,
3459 int64 subsequential_label=0,
3462 disambiguate(ifst, delta=0.0009765625, nstate=NO_STATE_ID,
3463 subsequential_label=0, weight=None):
3465 Constructively disambiguates a weighted transducer.
3467 This operation disambiguates a weighted transducer. The result will be an
3468 equivalent FST that has the property that no two successful paths have the
3469 same input labeling. For this algorithm, epsilon transitions are treated as
3470 regular symbols (cf. `rmepsilon`).
3473 ifst: The input FST.
3474 delta: Comparison/quantization delta.
3475 nstate: State number threshold.
3476 subsequential_label: Input label of arc corresponding to residual final
3477 output when producing a subsequential transducer.
3478 weight: A Weight or weight string indicating the desired weight threshold
3479 below which paths are pruned; if omitted, no paths are pruned.
3482 An equivalent disambiguated FST.
3484 See also: `determinize`, `rmepsilon`.
3486 cdef unique_ptr[fst.VectorFstClass] tfst
3487 tfst.reset(new fst.VectorFstClass(ifst.arc_type()))
3488 # Threshold is set to semiring Zero (no pruning) if no weight is specified.
3489 cdef fst.WeightClass wc = _get_WeightClass_or_Zero(ifst.weight_type(),
3491 cdef unique_ptr[fst.DisambiguateOptions] opts
3492 opts.reset(new fst.DisambiguateOptions(delta, wc, nstate,
3493 subsequential_label))
3494 fst.Disambiguate(deref(ifst._fst), tfst.get(), deref(opts))
3495 return _init_MutableFst(tfst.release())
3498 cpdef _MutableFst epsnormalize(_Fst ifst, bool eps_norm_output=False):
3500 epsnormalize(ifst, eps_norm_output=False)
3502 Constructively epsilon-normalizes an FST.
3504 This operation creates an equivalent FST that is epsilon-normalized. An
3505 acceptor is epsilon-normalized if it it is epsilon-removed (cf. `rmepsilon`).
3506 A transducer is input epsilon-normalized if, in addition, along any path, all
3507 arcs with epsilon input labels follow all arcs with non-epsilon input labels.
3508 Output epsilon-normalized is defined similarly. The input FST must be
3512 ifst: The input FST.
3513 eps_norm_output: Should the FST be output epsilon-normalized?
3516 An equivalent epsilon-normalized FST.
3518 See also: `rmepsilon`.
3520 cdef unique_ptr[fst.VectorFstClass] tfst
3521 tfst.reset(new fst.VectorFstClass(ifst.arc_type()))
3522 fst.EpsNormalize(deref(ifst._fst), tfst.get(), fst.EPS_NORM_OUTPUT if
3523 eps_norm_output else
3525 return _init_MutableFst(tfst.release())
3528 cpdef bool equal(_Fst ifst1, _Fst ifst2, float delta=fst.kDelta):
3530 equal(ifst1, ifst2, delta=0.0009765625)
3534 This function tests whether two FSTs have the same states with the same
3535 numbering and the same transitions with the same labels and weights in the
3539 ifst1: The first input FST.
3540 ifst2: The second input FST.
3541 delta: Comparison/quantization delta.
3544 True if the FSTs satisfy the above condition, else False.
3546 See also: `equivalent`, `isomorphic`, `randequivalent`.
3548 return fst.Equal(deref(ifst1._fst), deref(ifst2._fst), delta)
3551 cpdef bool equivalent(_Fst ifst1, _Fst ifst2, float delta=fst.kDelta) except *:
3553 equivalent(ifst1, ifst2, delta=0.0009765625)
3555 Are the two acceptors equivalent?
3557 This operation tests whether two epsilon-free deterministic weighted
3558 acceptors are equivalent, that is if they accept the same strings with the
3562 ifst1: The first input FST.
3563 ifst2: The second input FST.
3564 delta: Comparison/quantization delta.
3567 True if the FSTs satisfy the above condition, else False.
3569 See also: `equal`, `isomorphic`, `randequivalent`.
3571 return fst.Equivalent(deref(ifst1._fst), deref(ifst2._fst), delta)
3574 cpdef _MutableFst intersect(_Fst ifst1,
3576 compose_filter=b"auto",
3579 intersect(ifst1, ifst2, compose_filter="auto", connect=True)
3581 Constructively intersects two FSTs.
3583 This operation computes the intersection (Hadamard product) of two FSTs.
3584 Only strings that are in both automata are retained in the result. The two
3585 arguments must be acceptors. One of the arguments must be label-sorted (or
3586 otherwise support appropriate matchers).
3589 ifst1: The first input FST.
3590 ifst2: The second input FST.
3591 compose_filter: A string matching a known composition filter; one of:
3592 "alt_sequence", "auto", "match", "null", "sequence", "trivial".
3593 connect: Should output be trimmed?
3598 cdef unique_ptr[fst.VectorFstClass] tfst
3599 tfst.reset(new fst.VectorFstClass(ifst1.arc_type()))
3600 cdef unique_ptr[fst.ComposeOptions] opts
3601 opts.reset(new fst.ComposeOptions(connect,
3602 _get_compose_filter(tostring(compose_filter))))
3603 fst.Intersect(deref(ifst1._fst), deref(ifst2._fst), tfst.get(), deref(opts))
3604 return _init_MutableFst(tfst.release())
3607 cpdef bool isomorphic(_Fst ifst1, _Fst ifst2, float delta=fst.kDelta):
3609 isomorphic(ifst1, ifst2, delta=0.0009765625)
3611 Are the two acceptors isomorphic?
3613 This operation determines if two transducers with a certain required
3614 determinism have the same states, irrespective of numbering, and the same
3615 transitions with the same labels and weights, irrespective of ordering. In
3616 other words, FSTs A, B are isomorphic if and only if the states of A can be
3617 renumbered and the transitions leaving each state reordered so the two are
3618 equal (according to the definition given in `equal`).
3621 ifst1: The first input FST.
3622 ifst2: The second input FST.
3623 delta: Comparison/quantization delta.
3626 True if the two transducers satisfy the above condition, else False.
3628 See also: `equal`, `equivalent`, `randequivalent`.
3630 return fst.Isomorphic(deref(ifst1._fst), deref(ifst2._fst), delta)
3633 cpdef _MutableFst prune(_Fst ifst,
3634 float delta=fst.kDelta,
3635 int64 nstate=fst.kNoStateId,
3638 prune(ifst, delta=0.0009765625, nstate=NO_STATE_ID, weight=None)
3640 Constructively removes paths with weights below a certain threshold.
3642 This operation deletes states and arcs in the input FST that do not belong
3643 to a successful path whose weight is no more (w.r.t the natural semiring
3644 order) than the threshold t \otimes-times the weight of the shortest path in
3645 the input FST. Weights must be commutative and have the path property.
3648 ifst: The input FST.
3649 delta: Comparison/quantization delta.
3650 nstate: State number threshold.
3651 weight: A Weight or weight string indicating the desired weight threshold
3652 below which paths are pruned; if omitted, no paths are pruned.
3657 See also: The destructive variant.
3659 cdef unique_ptr[fst.VectorFstClass] tfst
3660 tfst.reset(new fst.VectorFstClass(ifst.arc_type()))
3661 cdef fst.WeightClass wc = _get_WeightClass_or_Zero(ifst.weight_type(), weight)
3662 fst.Prune(deref(ifst._fst), tfst.get(), wc, nstate, delta)
3663 return _init_MutableFst(tfst.release())
3666 cpdef _MutableFst push(_Fst ifst,
3667 float delta=fst.kDelta,
3668 bool push_weights=False,
3669 bool push_labels=False,
3670 bool remove_common_affix=False,
3671 bool remove_total_weight=False,
3672 bool to_final=False):
3674 push(ifst, delta=0.0009765625, push_weights=False, push_labels=False,
3675 remove_common_affix=False, remove_total_weight=False, to_final=False)
3677 Constructively pushes weights/labels towards initial or final states.
3679 This operation produces an equivalent transducer by pushing the weights
3680 and/or the labels towards the initial state or toward the final states.
3682 When pushing weights towards the initial state, the sum of the weight of the
3683 outgoing transitions and final weight at any non-initial state is equal to 1
3684 in the resulting machine. When pushing weights towards the final states, the
3685 sum of the weight of the incoming transitions at any state is equal to 1.
3686 Weights need to be left distributive when pushing towards the initial state
3687 and right distributive when pushing towards the final states.
3689 Pushing labels towards the initial state consists in minimizing at every
3690 state the length of the longest common prefix of the output labels of the
3691 outgoing paths. Pushing labels towards the final states consists in
3692 minimizing at every state the length of the longest common suffix of the
3693 output labels of the incoming paths.
3696 ifst: The input FST.
3697 delta: Comparison/quantization delta.
3698 push_weights: Should weights be pushed?
3699 push_labels: Should labels be pushed?
3700 remove_common_affix: If pushing labels, should common prefix/suffix be
3702 remove_total_weight: If pushing weights, should total weight be removed?
3703 to_final: Push towards final states?
3706 An equivalent pushed FST.
3708 See also: The destructive variant.
3710 # This is copied, almost verbatim, from ./fstpush.cc.
3711 cdef unique_ptr[fst.VectorFstClass] tfst
3712 tfst.reset(new fst.VectorFstClass(ifst.arc_type()))
3713 cdef uint32 flags = fst.GetPushFlags(push_weights, push_labels,
3714 remove_common_affix, remove_total_weight)
3715 fst.Push(deref(ifst._fst), tfst.get(), flags, fst.GetReweightType(to_final),
3717 return _init_MutableFst(tfst.release())
3720 cpdef bool randequivalent(_Fst ifst1,
3723 float delta=fst.kDelta,
3726 int32 max_length=INT32_MAX) except *:
3728 randequivalent(ifst1, ifst2, npath=1, delta=0.0009765625, seed=0,
3729 select="uniform", max_length=2147483647)
3731 Are two acceptors stochastically equivalent?
3733 This operation tests whether two FSTs are equivalent by randomly generating
3734 paths alternatively in each of the two FSTs. For each randomly generated path,
3735 the algorithm computes for each of the two FSTs the sum of the weights of all
3736 the successful paths sharing the same input and output labels as the randomly
3737 generated path and checks that these two values are within `delta`.
3740 ifst1: The first input FST.
3741 ifst2: The second input FST.
3742 npath: The number of random paths to generate.
3743 delta: Comparison/quantization delta.
3744 seed: An optional seed value for random path generation; if zero, the
3745 current time and process ID is used.
3746 select: A string matching a known random arc selection type; one of:
3747 "uniform", "log_prob", "fast_log_prob".
3748 max_length: The maximum length of each random path.
3751 True if the two transducers satisfy the above condition, else False.
3753 See also: `equal`, `equivalent`, `isomorphic`, `randgen`.
3755 cdef fst.RandArcSelection ras = _get_rand_arc_selection(tostring(select))
3756 cdef unique_ptr[fst.RandGenOptions[fst.RandArcSelection]] opts
3757 # The three trailing options will be ignored by RandEquivalent.
3758 opts.reset(new fst.RandGenOptions[fst.RandArcSelection](ras, max_length,
3761 seed = time(NULL) + getpid()
3762 return fst.RandEquivalent(deref(ifst1._fst), deref(ifst2._fst), npath, delta,
3766 cpdef _MutableFst randgen(_Fst ifst,
3770 int32 max_length=INT32_MAX,
3771 bool weighted=False,
3772 bool remove_total_weight=False):
3774 randgen(ifst, npath=1, seed=0, select="uniform", max_length=2147483647,
3775 weight=False, remove_total_weight=False)
3777 Randomly generate successful paths in an FST.
3779 This operation randomly generates a set of successful paths in the input FST.
3780 This relies on a mechanism for selecting arcs, specified using the `select`
3781 argument. The default selector, "uniform", randomly selects a transition
3782 using a uniform distribution. The "log_prob" selector randomly selects a
3783 transition w.r.t. the weights treated as negative log probabilities after
3784 normalizing for the total weight leaving the state. In all cases, finality is
3785 treated as a transition to a super-final state.
3788 ifst: The input FST.
3789 npath: The number of random paths to generate.
3790 seed: An optional seed value for random path generation; if zero, the
3791 current time and process ID is used.
3792 select: A string matching a known random arc selection type; one of:
3793 "uniform", "log_prob", "fast_log_prob".
3794 max_length: The maximum length of each random path.
3795 weighted: Should the output be weighted by path count?
3796 remove_total_weight: Should the total weight be removed (ignored when
3797 `weighted` is False)?
3800 An FST containing one or more random paths.
3802 See also: `randequivalent`.
3804 cdef fst.RandArcSelection ras = _get_rand_arc_selection(tostring(select))
3805 cdef unique_ptr[fst.RandGenOptions[fst.RandArcSelection]] opts
3806 opts.reset(new fst.RandGenOptions[fst.RandArcSelection](ras, max_length,
3808 remove_total_weight))
3809 cdef unique_ptr[fst.VectorFstClass] tfst
3810 tfst.reset(new fst.VectorFstClass(ifst.arc_type()))
3812 seed = time(NULL) + getpid()
3813 fst.RandGen(deref(ifst._fst), tfst.get(), seed, deref(opts))
3814 return _init_MutableFst(tfst.release())
3817 cpdef _MutableFst replace(pairs,
3818 call_arc_labeling=b"input",
3819 return_arc_labeling=b"neither",
3820 bool epsilon_on_replace=False,
3821 int64 return_label=0):
3823 replace(pairs, call_arc_labeling="input", return_arc_labeling="neither",
3824 epsilon_on_replace=False, return_label=0)
3826 Recursively replaces arcs in the FST with other FST(s).
3828 This operation performs the dynamic replacement of arcs in one FST with
3829 another FST, allowing the definition of FSTs analogous to RTNs. It takes as
3830 input a set of pairs of a set of pairs formed by a non-terminal label and
3831 its corresponding FST, and a label identifying the root FST in that set.
3832 The resulting FST is obtained by taking the root FST and recursively replacing
3833 each arc having a nonterminal as output label by its corresponding FST. More
3834 precisely, an arc from state s to state d with (nonterminal) output label n in
3835 this FST is replaced by redirecting this "call" arc to the initial state of a
3836 copy F of the FST for n, and adding "return" arcs from each final state of F
3837 to d. Optional arguments control how the call and return arcs are labeled; by
3838 default, the only non-epsilon label is placed on the call arc.
3842 pairs: An iterable of (nonterminal label, FST) pairs, where the former is an
3843 unsigned integer and the latter is an Fst instance.
3844 call_arc_labeling: A string indicating which call arc labels should be
3845 non-epsilon. One of: "input" (default), "output", "both", "neither".
3846 This value is set to "neither" if epsilon_on_replace is True.
3847 return_arc_labeling: A string indicating which return arc labels should be
3848 non-epsilon. One of: "input", "output", "both", "neither" (default).
3849 This value is set to "neither" if epsilon_on_replace is True.
3850 epsilon_on_replace: Should call and return arcs be epsilon arcs? If True,
3851 this effectively overrides call_arc_labeling and return_arc_labeling,
3852 setting both to "neither".
3853 return_label: The integer label for return arcs.
3856 An FST resulting from expanding the input RTN.
3858 cdef vector[fst.LabelFstClassPair] _pairs
3859 cdef int64 root_label
3863 (root_label, ifst) = next(it)
3864 _pairs.push_back(fst.LabelFstClassPair(root_label, ifst._fst.get()))
3865 cdef unique_ptr[fst.VectorFstClass] tfst
3866 tfst.reset(new fst.VectorFstClass(ifst.arc_type()))
3867 for (label, ifst) in it:
3868 _pairs.push_back(fst.LabelFstClassPair(label, ifst._fst.get()))
3869 cdef fst.ReplaceLabelType cal = _get_replace_label_type(
3870 tostring(call_arc_labeling), epsilon_on_replace)
3871 cdef fst.ReplaceLabelType ral = _get_replace_label_type(
3872 tostring(return_arc_labeling), epsilon_on_replace)
3873 cdef unique_ptr[fst.ReplaceOptions] opts
3874 opts.reset(new fst.ReplaceOptions(root_label, cal, ral, return_label))
3875 fst.Replace(_pairs, tfst.get(), deref(opts))
3876 return _init_MutableFst(tfst.release())
3879 cpdef _MutableFst reverse(_Fst ifst, bool require_superinitial=True):
3881 reverse(ifst, require_superinitial=True)
3883 Constructively reverses an FST's transduction.
3885 This operation reverses an FST. If A transduces string x to y with weight a,
3886 then the reverse of A transduces the reverse of x to the reverse of y with
3887 weight a.Reverse(). (Typically, a = a.Reverse() and Arc = RevArc, e.g.,
3888 TropicalWeight and LogWeight.) In general, e.g., when the weights only form a
3889 left or right semiring, the output arc type must match the input arc type.
3892 ifst: The input FST.
3893 require_superinitial: Should a superinitial state be created?
3898 cdef unique_ptr[fst.VectorFstClass] tfst
3899 tfst.reset(new fst.VectorFstClass(ifst.arc_type()))
3900 fst.Reverse(deref(ifst._fst), tfst.get(), require_superinitial)
3901 return _init_MutableFst(tfst.release())
3904 # Pure C++ helper for shortestdistance.
3907 cdef vector[fst.WeightClass] *_shortestdistance(_Fst ifst,
3908 float delta=fst.kShortestDelta,
3909 int64 nstate=fst.kNoStateId,
3911 bool reverse=False) except *:
3912 cdef unique_ptr[vector[fst.WeightClass]] distance
3913 distance.reset(new vector[fst.WeightClass]())
3914 # For scoping reasons, these have to be declared here even though they may
3915 # not be used in all cases.
3916 cdef unique_ptr[fst.ShortestDistanceOptions] opts
3918 # Only the simpler signature supports shortest distance to final states;
3919 # `nstate` and `queue_type` arguments are ignored.
3920 fst.ShortestDistance(deref(ifst._fst), distance.get(), True, delta)
3922 opts.reset(new fst.ShortestDistanceOptions(
3923 _get_queue_type(tostring(queue_type)), fst.ANY_ARC_FILTER, nstate,
3925 fst.ShortestDistance(deref(ifst._fst), distance.get(), deref(opts))
3926 return distance.release()
3929 def shortestdistance(_Fst ifst,
3930 float delta=fst.kShortestDelta,
3931 int64 nstate=fst.kNoStateId,
3933 bool reverse=False):
3935 shortestdistance(ifst, delta=1e-6, nstate=NO_STATE_ID,
3936 queue_type="auto", reverse=False)
3938 Compute the shortest distance from the initial or final state.
3940 This operation computes the shortest distance from the initial state (when
3941 `reverse` is False) or from every state to the final state (when `reverse` is
3942 True). The shortest distance from p to q is the \otimes-sum of the weights of
3943 all the paths between p and q. The weights must be right (if `reverse` is
3944 False) or left (if `reverse` is True) distributive, and k-closed (i.e., 1
3945 \otimes x \otimes x^2 \otimes ... \otimes x^{k + 1} = 1 \otimes x \otimes x^2
3946 \otimes ... \otimes x^k; e.g., TropicalWeight).
3949 ifst: The input FST.
3950 delta: Comparison/quantization delta.
3951 nstate: State number threshold (ignored if `reverse` is True).
3952 queue_type: A string matching a known queue type; one of: "auto", "fifo",
3953 "lifo", "shortest", "state", "top" (ignored if `reverse` is True).
3954 reverse: Should the reverse distance (from each state to the final state)
3958 A list of Weight objects representing the shortest distance for each state.
3960 cdef unique_ptr[vector[fst.WeightClass]] distance
3961 distance.reset(_shortestdistance(ifst, delta, nstate, queue_type, reverse))
3962 cdef string weight_type = ifst.weight_type()
3963 return [Weight(weight_type, weight.ToString()) for weight in deref(distance)]
3966 cpdef _MutableFst shortestpath(_Fst ifst,
3967 float delta=fst.kShortestDelta,
3969 int64 nstate=fst.kNoStateId,
3974 shortestpath(ifst, delta=1e-6, nshortest=1, nstate=NO_STATE_ID,
3975 queue_type="auto", unique=False, weight=None)
3977 Construct an FST containing the shortest path(s) in the input FST.
3979 This operation produces an FST containing the n-shortest paths in the input
3980 FST. The n-shortest paths are the n-lowest weight paths w.r.t. the natural
3981 semiring order. The single path that can be read from the ith of at most n
3982 transitions leaving the initial state of the resulting FST is the ith
3983 shortest path. The weights need to be right distributive and have the path
3984 property. They also need to be left distributive as well for n-shortest with
3985 n > 1 (e.g., TropicalWeight).
3988 ifst: The input FST.
3989 delta: Comparison/quantization delta.
3990 nshortest: The number of paths to return.
3991 nstate: State number threshold.
3992 queue_type: A string matching a known queue type; one of: "auto", "fifo",
3993 "lifo", "shortest", "state", "top".
3994 unique: Should the resulting FST only contain distinct paths? (Requires
3995 the input FST to be an acceptor; epsilons are treated as if they are
3997 weight: A Weight or weight string indicating the desired weight threshold
3998 below which paths are pruned; if omitted, no paths are pruned.
4001 An FST containing the n-shortest paths.
4003 cdef unique_ptr[fst.VectorFstClass] tfst
4004 tfst.reset(new fst.VectorFstClass(ifst.arc_type()))
4005 # Threshold is set to semiring Zero (no pruning) if no weight is specified.
4006 cdef fst.WeightClass wc = _get_WeightClass_or_Zero(ifst.weight_type(), weight)
4007 cdef unique_ptr[fst.ShortestPathOptions] opts
4008 opts.reset(new fst.ShortestPathOptions(_get_queue_type(tostring(queue_type)),
4009 nshortest, unique, delta, wc, nstate))
4010 fst.ShortestPath(deref(ifst._fst), tfst.get(), deref(opts))
4011 return _init_MutableFst(tfst.release())
4014 cpdef _Fst statemap(_Fst ifst, map_type):
4016 state_map(ifst, map_type)
4018 Constructively applies a transform to all states.
4020 This operation transforms each state according to the requested map type.
4021 Note that currently, only one state-mapping operation is supported.
4024 ifst: The input FST.
4025 map_type: A string matching a known mapping operation; one of: "arc_sum"
4026 (sum weights of identically-labeled multi-arcs), "arc_unique" (deletes
4027 non-unique identically-labeled multi-arcs).
4030 An FST with states remapped.
4033 FstArgError: Unknown map type.
4037 return _map(ifst, fst.kDelta, map_type, None)
4040 cpdef _MutableFst synchronize(_Fst ifst):
4044 Constructively synchronizes an FST.
4046 This operation synchronizes a transducer. The result will be an equivalent
4047 FST that has the property that during the traversal of a path, the delay is
4048 either zero or strictly increasing, where the delay is the difference between
4049 the number of non-epsilon output labels and input labels along the path. For
4050 the algorithm to terminate, the input transducer must have bounded delay,
4051 i.e., the delay of every cycle must be zero.
4054 ifst: The input FST.
4057 An equivalent synchronized FST.
4059 cdef unique_ptr[fst.VectorFstClass] tfst
4060 tfst.reset(new fst.VectorFstClass(ifst.arc_type()))
4061 fst.Synchronize(deref(ifst._fst), tfst.get())
4062 return _init_MutableFst(tfst.release())
4068 cdef class Compiler(object):
4071 Compiler(fst_type="vector", arc_type="standard", isymbols=None,
4072 osymbols=None, ssymbols=None, acceptor=False, keep_isymbols=False,
4073 keep_osymbols=False, keep_state_numbering=False,
4074 allow_negative_labels=False)
4076 Class used to compile FSTs from strings.
4078 This class is used to compile FSTs specified using the AT&T FSM library
4079 format described here:
4081 http://web.eecs.umich.edu/~radev/NLP-fall2015/resources/fsm_archive/fsm.5.html
4083 This is the same format used by the `fstcompile` executable.
4085 Compiler options (symbol tables, etc.) are set at construction time.
4087 compiler = fst.Compiler(isymbols=ascii_syms, osymbols=ascii_syms)
4089 Once constructed, Compiler instances behave like a file handle opened for
4093 print >> compiler, "0 1 50 50"
4094 print >> compiler, "1 2 49 49"
4095 print >> compiler, "2 2 49 49"
4096 print >> compiler, "2"
4098 The `compile` method returns an actual FST instance:
4100 sheep_machine = compiler.compile()
4102 Compilation flushes the internal buffer, so the compiler instance can be
4103 reused to compile new machines with the same symbol tables (etc.)
4106 fst_type: A string indicating the container type for the compiled FST.
4107 arc_type: A string indicating the arc type for the compiled FST.
4108 isymbols: An optional SymbolTable used to label input symbols.
4109 osymbols: An optional SymbolTable used to label output symbols.
4110 ssymbols: An optional SymbolTable used to label states.
4111 acceptor: Should the FST be rendered in acceptor format if possible?
4112 keep_isymbols: Should the input symbol table be stored in the FST?
4113 keep_osymbols: Should the output symbol table be stored in the FST?
4114 keep_state_numbering: Should the state numbering be preserved?
4115 allow_negative_labels: Should negative labels be allowed? (Not
4116 recommended; may cause conflicts).
4120 string fst_type=b"vector",
4121 string arc_type=b"standard",
4122 SymbolTable isymbols=None,
4123 SymbolTable osymbols=None,
4124 SymbolTable ssymbols=None,
4125 bool acceptor=False,
4126 bool keep_isymbols=False,
4127 bool keep_osymbols=False,
4128 bool keep_state_numbering=False,
4129 bool allow_negative_labels=False):
4130 self._sstrm.reset(new stringstream())
4131 self._fst_type = tostring(fst_type)
4132 self._arc_type = tostring(arc_type)
4133 self._isymbols = NULL
4134 if isymbols is not None:
4135 self._isymbols = isymbols._table
4136 self._osymbols = NULL
4137 if osymbols is not None:
4138 self._osymbols = osymbols._table
4139 self._ssymbols = NULL
4140 if ssymbols is not None:
4141 self._ssymbols = ssymbols._table
4142 self._acceptor = acceptor
4143 self._keep_isymbols = keep_isymbols
4144 self._keep_osymbols = keep_osymbols
4145 self._keep_state_numbering = keep_state_numbering
4146 self._allow_negative_labels = allow_negative_labels
4148 cpdef _Fst compile(self):
4152 Compiles the FST in the compiler string buffer.
4154 This method compiles the FST and returns the resulting machine.
4157 The FST described by the compiler string buffer.
4160 FstOpError: Compilation failed.
4162 cdef unique_ptr[fst.FstClass] tfst
4163 tfst.reset(fst.CompileFstInternal(deref(self._sstrm),
4164 b"<pywrapfst>", self._fst_type, self._arc_type, self._isymbols,
4165 self._osymbols, self._ssymbols, self._acceptor, self._keep_isymbols,
4166 self._keep_osymbols, self._keep_state_numbering,
4167 self._allow_negative_labels))
4168 self._sstrm.reset(new stringstream())
4169 if tfst.get() == NULL:
4170 raise FstOpError("Compilation failed")
4171 return _init_XFst(tfst.release())
4173 cpdef void write(self, expression):
4177 Writes a string into the compiler string buffer.
4179 This method adds a line to the compiler string buffer. It is normally
4180 invoked using the right shift operator, like so:
4182 compiler = fst.Compiler()
4183 print >> compiler, "0 0 49 49"
4184 print >> compiler, "0"
4187 expression: A string expression to add to compiler string buffer.
4189 deref(self._sstrm) << tostring(expression)
4192 ## FarReader and FarWriter.
4195 cdef class FarReader(object):
4200 FAR ("Fst ARchive") reader object.
4202 This class is used to read a FAR from disk. FARs contain one or more FSTs (of
4203 the same arc type) indexed by a unique string key. To construct a FarReader
4204 object, use the `open` class method.
4207 arc_type: A string indicating the arc type.
4208 far_type: A string indicating the FAR type.
4212 raise FstDeletedConstructorError(
4213 "Cannot construct {}".format(self.__class__.__name__))
4216 return "<{} FarReader at 0x{:x}>".format(self.far_type(), id(self))
4219 def open(cls, *filenames):
4221 FarReader.open(*filenames)
4223 Creates a FarReader object.
4225 This class method creates a FarReader given the string location of one or
4226 more FAR files on disk.
4229 *filenames: The string location of one or more input FAR files.
4232 A new FarReader instance.
4235 FstIOError: Read failed.
4237 filenames = [tostring(filename) for filename in filenames]
4238 cdef unique_ptr[fst.FarReaderClass] tfar
4239 tfar.reset(fst.FarReaderClass.Open(filenames))
4240 if tfar.get() == NULL:
4241 raise FstIOError("Read failed: {!r}".format(filenames))
4242 cdef FarReader result = FarReader.__new__(FarReader)
4243 result._reader.reset(tfar.release())
4246 cpdef string arc_type(self):
4250 Returns a string indicating the arc type.
4252 return self._reader.get().ArcType()
4254 cpdef bool done(self):
4258 Indicates whether the iterator is exhausted or not.
4261 True if the iterator is exhausted, False otherwise.
4263 return self._reader.get().Done()
4265 cpdef bool error(self):
4269 Indicates whether the FarReader has encountered an error.
4272 True if the FarReader is in an errorful state, False otherwise.
4274 return self._reader.get().Error()
4276 cpdef string far_type(self):
4277 return fst.GetFarTypeString(self._reader.get().Type())
4279 cpdef bool find(self, key) except *:
4283 Sets the current position to the first entry greater than or equal to the
4284 key (a string) and indicates whether or not a match was found.
4290 True if the key was found, False otherwise.
4292 return self._reader.get().Find(tostring(key))
4294 cpdef _Fst get_fst(self):
4298 Returns the FST at the current position.
4301 A copy of the FST at the current position.
4303 return _init_XFst(new fst.FstClass(
4304 deref(self._reader.get().GetFstClass())))
4306 cpdef string get_key(self):
4310 Returns the string key at the current position.
4313 The string key at the current position.
4315 return self._reader.get().GetKey()
4317 cpdef void next(self):
4321 Advances the iterator.
4323 self._reader.get().Next()
4325 cpdef void reset(self):
4329 Resets the iterator to the initial position.
4331 self._reader.get().Reset()
4333 def __getitem__(self, key):
4334 cdef string ckey = tostring(key)
4335 if self.get_key() == ckey or self._reader.get().Find(ckey):
4336 return self.get_fst()
4340 cdef class FarWriter(object):
4345 FAR ("Fst ARchive") writer object.
4347 This class is used to write FSTs (of the same arc type) to a FAR on disk. To
4348 construct a FarWriter, use the `create` class method.
4350 Note that the data is not guaranteed to flush to disk until the FarWriter
4351 is garbage-collected. If a FarWriter has been assigned to only one variable,
4352 then calling `del` on that variable should decrement the object's reference
4353 count from 1 to 0, triggering a flush to disk on the next GC cycle.
4356 arc_type: A string indicating the arc type.
4357 far_type: A string indicating the FAR type.
4361 raise FstDeletedConstructorError(
4362 "Cannot construct {}".format(self.__class__.__name__))
4365 return "<{} FarWriter at 0x{:x}>".format(self.far_type(), id(self))
4368 def create(cls, filename, arc_type=b"standard", far_type=b"default"):
4372 Creates a FarWriter object.
4374 This class method creates a FarWriter given the desired output location,
4375 arc type, and FAR type.
4378 filename: The string location for the output FAR files.
4379 arc_type: A string indicating the arc type.
4380 far_type: A string indicating the FAR type; one of: "fst", "stlist",
4381 "sttable", "sstable", "default".
4384 A new FarWriter instance.
4387 FstIOError: Read failed.
4389 cdef fst.FarType ft = fst.GetFarType(tostring(far_type))
4390 cdef fst.FarWriterClass *tfar = fst.FarWriterClass.Create(
4391 tostring(filename), tostring(arc_type), ft)
4393 raise FstIOError("Open failed: {!r}".format(filename))
4394 cdef FarWriter result = FarWriter.__new__(FarWriter)
4395 result._writer.reset(tfar)
4398 # NB: Invoking this method may be dangerous: calling any other method on the
4399 # instance after this is invoked may result in a null dereference.
4400 cdef void close(self):
4401 self._writer.reset()
4403 cpdef void add(self, key, _Fst ifst) except *:
4405 add(self, key, ifst)
4407 Adds an FST to the FAR.
4409 This method adds an FST to the FAR which can be retrieved with the
4410 specified string key.
4413 key: The string used to key the input FST.
4414 ifst: The FST to write to the FAR.
4417 FstArgError: Key out of order.
4418 FstOpError: Incompatible or invalid arc type.
4420 # Failure here results from passing an FST with a different arc type than
4421 # used by the FAR was initialized to use.
4422 if not self._writer.get().Add(tostring(key), deref(ifst._fst)):
4423 raise FstOpError("Incompatible or invalid arc type")
4424 # An error here usually indicates a key out of order.
4425 if self._writer.get().Error():
4426 raise FstArgError("Key out of order")
4428 cpdef string arc_type(self):
4432 Returns a string indicating the arc type.
4434 return self._writer.get().ArcType()
4436 cpdef bool error(self):
4440 Indicates whether the FarWriter has encountered an error.
4443 True if the FarWriter is in an errorful state, False otherwise.
4445 return self._writer.get().Error()
4447 cpdef string far_type(self):
4451 Returns a string indicating the FAR type.
4453 return fst.GetFarTypeString(self._writer.get().Type())
4455 # Dictionary-like assignment.
4456 def __setitem__(self, key, _Fst fst):
4460 ## Cleanup operations for module entrance and exit.
4463 # Masks fst_error_fatal flags while this module is running, returning to the
4464 # previous state upon module exit.
4467 _fst_error_fatal_old = fst.FLAGS_fst_error_fatal
4468 fst.FLAGS_fst_error_fatal = False
4472 def _reset_fst_error_fatal():
4473 fst.FLAGS_fst_error_fatal = _fst_error_fatal_old