1 #cython: nonecheck=True
2 # See www.openfst.org for extensive documentation on this weighted
3 # finite-state transducer library.
6 """Python interface to the FST scripting API.
8 Operations which construct new FSTs are implemented as traditional functions, as
9 are two-argument boolean functions like `equal` and `equivalent`. Destructive
10 operations---those that mutate an FST, in place---are instance methods, as is
11 `write`. Operator overloading is not used. The following example, based on
12 Mohri et al. 2002, shows the construction of an ASR system given a pronunciation
13 lexicon L, grammar G, a transducer from context-dependent phones to
14 context-independent phones C, and an HMM set H:
16 L = fst.Fst.read("L.fst")
17 G = fst.Fst.read("G.fst")
18 C = fst.Fst.read("C.fst")
19 H = fst.Fst.read("H.fst")
20 LG = fst.determinize(fst.compose(L, G))
21 CLG = fst.determinize(fst.compose(C, LG))
22 HCLG = fst.determinize(fst.compose(H, CLG))
23 HCLG.minimize() # NB: works in-place.
25 Python variables here use snake_case and constants are in all caps, minus the
29 # Overview of the file:
34 # * Weight and helpers
35 # * _SymbolTable, _EncodeMapperSymbolTable, _FstSymbolTable,
36 # _MutableFstSymbolTable, SymbolTable, and helpers
37 # * SymbolTableIterator
39 # * _Fst, _MutableFst, Fst, and helpers
41 # * Arc, ArcIterator, and MutableArcIterator
45 # * FarReader and FarWriter
46 # * Cleanup operations for module entrance and exit.
48 # TODO(kbg): Try breaking this apart into smaller pieces.
50 # A few of the more idiosyncratic choices made here are due to "impedance
51 # mismatches" between C++ and Python, as follows.
53 # Another issue is that due to differences in C++ and Python scope rules, most
54 # C++ class instances have to be heap-allocated. Since all are packed into
55 # Python class instances, Python destructors are used to semi-automatically
56 # free C++ instances. The one exception are the various `...Options` structs.
57 # All that is included here are the constructors; there is no need to include
58 # the names of the struct members. Cython does not draw any meaningful
59 # distinction between structs and C++ classes, so these look just like class
62 # Cython's type annotations (e.g., `string`) are used when the variables will
63 # be sent as arguments to C++ functions, but are not used for variables used
66 # Internal functions which may raise a Python error do not have a C++ return
67 # type simply because this leads the C++ compiler to think that the resulting
68 # value could be used before it is populated.
74 from libc.stdint cimport INT32_MAX
75 from libc.stdint cimport SIZE_MAX
76 from posix.unistd cimport getpid
79 from libcpp cimport bool
80 from libcpp.cast cimport const_cast
81 from libcpp.cast cimport static_cast
84 from ios cimport ofstream
85 from memory cimport static_pointer_cast
87 # Cython operator workarounds.
88 from cython.operator cimport address as addr # &foo
89 from cython.operator cimport dereference as deref # *foo
90 from cython.operator cimport preincrement as inc # ++foo
102 class FstError(Exception):
107 class FstArgError(FstError, ValueError):
112 class FstBadWeightError(FstError, ValueError):
117 class FstDeletedConstructorError(FstError, RuntimeError):
122 class FstIndexError(FstError, IndexError):
127 class FstIOError(FstError, IOError):
132 class FstOpError(FstError, RuntimeError):
140 cdef string tostring(data, encoding="utf8") except *:
141 """Converts strings to bytestrings.
143 This function converts Python bytestrings and Unicode strings to bytestrings
144 encoded in UTF-8. It is used to process most Python string arguments before
145 passing them to the lower-level library.
148 data: A Unicode string or bytestring.
149 encoding: The desired encoding, defaulting to UTF-8.
155 FstArgError: Cannot encode string.
158 This function is not visible to Python users.
160 # A Python bytestring can be implicitly cast to a C++ string.
161 if isinstance(data, bytes):
163 elif isinstance(data, unicode):
164 return data.encode(encoding)
165 raise FstArgError("Cannot encode as string: {!r}".format(data))
168 cdef string weighttostring(data, encoding="utf8") except *:
169 """Converts strings or numerics to bytestrings.
171 This function converts Python bytestrings, Unicode strings, and numerics
172 which can be cast to floats to bytestrings encoded in UTF-8. It is used to
173 process Python string arguments so they can be used to construct Weight
174 objects. In most cases, weights are underlyingly floating-point, but since
175 not all weights are, they can only be constructed using a string.
178 data: A Unicode string, bytestring, or type which can be converted to a
185 FstArgError: Cannot encode string.
186 ValueError: Invalid literal for float.
189 This function is not visible to Python users.
191 # A Python bytestring can be implicitly cast to a C++ string.
192 if isinstance(data, bytes):
194 elif isinstance(data, unicode):
195 return data.encode(encoding)
196 elif isinstance(data, numbers.Number):
197 return str(data).encode(encoding)
198 raise FstArgError("Cannot encode as string: {!r}".format(data))
201 cdef fst.ComposeFilter _get_compose_filter(
202 const string &compose_filter) except *:
203 """Matches string with the appropriate ComposeFilter enum value.
205 This function takes a string argument and returns the matching ComposeFilter
206 enum value used to initialize ComposeOptions instances. ComposeOptions is used
207 by difference and intersection in addition to composition.
210 compose_filter: A string matching a known composition filter; one of:
211 "alt_sequence", "auto", "match", "null", "sequence", "trivial".
214 A ComposeFilter enum value.
217 FstArgError: Unknown compose filter type.
219 This function is not visible to Python users.
221 cdef fst.ComposeFilter compose_filter_enum
222 if not fst.GetComposeFilter(compose_filter, addr(compose_filter_enum)):
223 raise FstArgError("Unknown compose filter type: {!r}".format(
225 return compose_filter_enum
228 cdef fst.DeterminizeType _get_determinize_type(const string &det_type) except *:
229 """Matches string with the appropriate DeterminizeType enum value.
232 det_type: A string matching a known determinization type; one of:
233 "functional", "nonfunctional", "disambiguate".
236 A DeterminizeType enum value.
239 FstArgError: Unknown determinization type.
241 This function is not visible to Python users.
243 cdef fst.DeterminizeType det_type_enum
244 if not fst.GetDeterminizeType(det_type, addr(det_type_enum)):
245 raise FstArgError("Unknown determinization type: {!r}".format(det_type))
249 cdef fst.QueueType _get_queue_type(const string &queue_type) except *:
250 """Matches string with the appropriate QueueType enum value.
252 This function takes a string argument and returns the matching QueueType enum
253 value passed to the RmEpsilonOptions constructor.
256 queue_type: A string matching a known queue type; one of: "auto", "fifo",
257 "lifo", "shortest", "state", "top".
260 A QueueType enum value.
263 FstArgError: Unknown queue type.
265 This function is not visible to Python users.
267 cdef fst.QueueType queue_type_enum
268 if not fst.GetQueueType(queue_type, addr(queue_type_enum)):
269 raise FstArgError("Unknown queue type: {!r}".format(queue_type))
270 return queue_type_enum
273 cdef fst.RandArcSelection _get_rand_arc_selection(
274 const string &select) except *:
275 """Matches string with the appropriate RandArcSelection enum value.
277 This function takes a string argument and returns the matching
278 RandArcSelection enum value passed to the RandGenOptions constructor.
281 select: A string matching a known random arc selection type; one of:
282 "uniform", "log_prob", "fast_log_prob".
285 A RandArcSelection enum value.
288 FstArgError: Unknown random arc selection type.
290 This function is not visible to Python users.
292 cdef fst.RandArcSelection select_enum
293 if not fst.GetRandArcSelection(select, addr(select_enum)):
294 raise FstArgError("Unknown random arc selection type: {!r}".format(select))
298 cdef fst.ReplaceLabelType _get_replace_label_type(
299 const string &replace_label_type, bool epsilon_on_replace) except *:
300 """Matches string with the appropriate ReplaceLabelType enum value.
302 This function takes a string argument and returns the matching
303 ReplaceLabelType enum value passed to the ReplaceOptions constructor.
306 replace_label_type: A string matching a known replace label type; one of:
307 "neither", "input", "output", "both".
308 epsilon_on_replace: Should call/return arcs be epsilon arcs?
311 A ReplaceLabelType enum value.
314 FstArgError: Unknown replace label type.
316 This function is not visible to Python users.
318 cdef fst.ReplaceLabelType replace_label_type_enum
319 if not fst.GetReplaceLabelType(replace_label_type, epsilon_on_replace,
320 addr(replace_label_type_enum)):
321 raise FstArgError("Unknown replace label type: {!r}".format(
323 return replace_label_type_enum
326 ## Weight and helpers.
329 cdef class Weight(object):
332 Weight(weight_type, weight_string)
336 This class represents an FST weight. When passed as an argument to an FST
337 operation, it should have the weight type of the input FST(s) to said
341 weight_type: A string indicating the weight type.
342 weight_string: A string indicating the underlying weight.
345 FstArgError: Weight type not found.
346 FstBadWeightError: Invalid weight.
350 return "<{} Weight {} at 0x{:x}>".format(self.type(), self.to_string(),
354 return self.to_string()
356 # This attempts to convert the string form into a float, raising
357 # ValueError when that is not appropriate.
360 return float(self.to_string())
362 def __init__(self, weight_type, weight):
363 self._weight.reset(new fst.WeightClass(tostring(weight_type),
364 weighttostring(weight)))
367 cdef void _check_weight(self) except *:
368 if self.type() == b"none":
369 raise FstArgError("Weight type not found")
370 if self.to_string() == b"BadNumber":
371 raise FstBadWeightError("Invalid weight")
373 cpdef Weight copy(self):
377 Returns a copy of the Weight.
379 cdef Weight result = Weight.__new__(Weight)
380 result._weight.reset(new
381 fst.WeightClass(<fst.WeightClass> deref(self._weight)))
384 # To get around the inability to declare cdef class methods, we define
385 # the C part out-of-class and then call it from within.
388 def Zero(cls, weight_type):
390 Weight.Zero(weight_type)
392 Constructs semiring zero.
394 return _Weight_Zero(weight_type)
397 def One(cls, weight_type):
399 Weight.One(weight_type)
401 Constructs semiring One.
403 return _Weight_One(weight_type)
406 def NoWeight(cls, weight_type):
408 Weight.NoWeight(weight_type)
410 Constructs a non-member weight in the semiring.
412 return _Weight_NoWeight(weight_type)
414 def __richcmp__(Weight x, Weight y, int op):
415 # This is useful for unit tests.
417 return (x.type() == y.type() and x.to_string() == y.to_string())
421 raise NotImplementedError("Invalid operator {!r}".format(op))
423 cpdef string to_string(self):
424 return self._weight.get().ToString()
426 cpdef string type(self):
429 Returns a string indicating the weight type.
431 return self._weight.get().Type()
434 cdef Weight _plus(Weight lhs, Weight rhs):
435 cdef Weight result = Weight.__new__(Weight)
436 result._weight.reset(new fst.WeightClass(fst.Plus(deref(lhs._weight),
437 deref(rhs._weight))))
441 def plus(Weight lhs, Weight rhs):
445 Computes the sum of two Weights in the same semiring.
447 This function computes lhs \oplus rhs, raising an exception if lhs and rhs
448 are not in the same semiring.
451 lhs: Left-hand side Weight.
452 rhs: Right-hand side Weight.
458 FstArgError: Weight type not found (or not in same semiring).
459 FstBadWeightError: invalid weight.
461 cdef Weight result = _plus(lhs, rhs)
462 result._check_weight()
466 cdef Weight _times(Weight lhs, Weight rhs):
467 cdef Weight result = Weight.__new__(Weight)
468 result._weight.reset(new fst.WeightClass(fst.Times(deref(lhs._weight),
469 deref(rhs._weight))))
473 def times(Weight lhs, Weight rhs):
477 Computes the product of two Weights in the same semiring.
479 This function computes lhs \otimes rhs, raising an exception if lhs and rhs
480 are not in the same semiring.
483 lhs: Left-hand side Weight.
484 rhs: Right-hand side Weight.
490 FstArgError: Weight type not found (or not in same semiring).
491 FstBadWeightError: Invalid weight.
493 cdef Weight result = _times(lhs, rhs)
494 result._check_weight()
498 cdef Weight _divide(Weight lhs, Weight rhs):
499 cdef Weight result = Weight.__new__(Weight)
500 result._weight.reset(new fst.WeightClass(fst.Divide(deref(lhs._weight),
501 deref(rhs._weight))))
505 def divide(Weight lhs, Weight rhs):
509 Computes the quotient of two Weights in the same semiring.
511 This function computes lhs \oslash rhs, raising an exception if lhs and rhs
512 are not in the same semiring. As there is no way to specify whether to use
513 left vs. right division, this assumes a commutative semiring in which these
514 are equivalent operations.
517 lhs: Left-hand side Weight.
518 rhs: Right-hand side Weight.
524 FstArgError: Weight type not found (or not in same semiring).
525 FstBadWeightError: Invalid weight.
527 cdef Weight result = _divide(lhs, rhs)
528 result._check_weight()
532 cdef Weight _power(Weight w, size_t n):
533 cdef Weight result = Weight.__new__(Weight)
534 result._weight.reset(new fst.WeightClass(fst.Power(deref(w._weight), n)))
538 def power(Weight w, size_t n):
542 Computes the iterated product of a weight.
552 FstArgError: Weight type not found (or not in same semiring).
553 FstBadWeightError: Invalid weight.
555 cdef Weight result = _power(w, n)
556 result._check_weight()
560 cdef fst.WeightClass _get_WeightClass_or_Zero(const string &weight_type,
562 """Converts weight string to a WeightClass.
564 This function constructs a WeightClass instance of the desired weight type.
565 If the first argument is null, the weight is set to semiring Zero.
568 weight_type: A string denoting the desired weight type.
569 weight: A object indicating the desired weight; if omitted, the weight is
570 set to semiring Zero.
573 A WeightClass object.
575 This function is not visible to Python users.
577 cdef fst.WeightClass result
579 result = fst.WeightClass.Zero(weight_type)
580 elif isinstance(weight, Weight):
581 result = deref(<fst.WeightClass *> (<Weight> weight)._weight.get())
583 result = fst.WeightClass(weight_type, weighttostring(weight))
584 if result.ToString() == b"BadNumber":
585 raise FstBadWeightError(weighttostring(weight))
589 cdef fst.WeightClass _get_WeightClass_or_One(const string &weight_type,
591 """Converts weight string to a WeightClass.
593 This function constructs a WeightClass instance of the desired weight type.
594 If the first argument is null, the weight is set to semiring One.
597 weight_type: A string denoting the desired weight type.
598 weight: A object indicating the desired weight; if omitted, the weight is
602 A WeightClass object.
604 This function is not visible to Python users.
606 cdef fst.WeightClass result
608 result = fst.WeightClass.One(weight_type)
609 elif isinstance(weight, Weight):
610 result = deref(<fst.WeightClass *> (<Weight> weight)._weight.get())
612 result = fst.WeightClass(weight_type, weighttostring(weight))
613 if result.ToString() == b"BadNumber":
614 raise FstBadWeightError(weighttostring(weight))
618 cdef Weight _Weight_Zero(weight_type):
619 cdef Weight result = Weight.__new__(Weight)
620 result._weight.reset(new fst.WeightClass(fst.WeightClass.Zero(
621 tostring(weight_type))))
622 if result._weight.get().Type() == b"none":
623 raise FstArgError("Weight type not found")
627 cdef Weight _Weight_One(weight_type):
628 cdef Weight result = Weight.__new__(Weight)
629 result._weight.reset(new fst.WeightClass(
630 fst.WeightClass.One(tostring(weight_type))))
631 if result._weight.get().Type() == b"none":
632 raise FstArgError("Weight type not found")
636 cdef Weight _Weight_NoWeight(weight_type):
637 cdef Weight result = Weight.__new__(Weight)
638 result._weight.reset(new fst.WeightClass(
639 fst.WeightClass.NoWeight(tostring(weight_type))))
643 ## _SymbolTable, _MutableSymbolTable, _EncodeMapperSymbolTable, _FstSymbolTable,
644 ## _MutableFstSymbolTable, SymbolTable, and helpers.
646 # SymbolTable hierarchy:
648 # _SymbolTable: abstract base class; has-a SymbolTable*
649 # _EncodeMapperSymbolTable(_SymbolTable): constant symbol table returned by
650 # EncodeMapper.input_symbols/output_symbols
651 # _FstSymbolTable(_SymbolTable): constant symbol table returned by
652 # _Fst.input_symbols/output_symbols
654 # _MutableSymbolTable(_SymbolTable): abstract base class adding mutation methods
655 # _MutableFstSymbolTable(_MutableSymbolTable): mutable symbol table returned by
656 # _MutableFst.mutable_input_symbols/mutable_output_symbols
657 # SymbolTable(_MutableSymbolTable): adds constructor
660 cdef class _SymbolTable(object):
665 Base class for the symbol table hierarchy.
667 This class is the base class for SymbolTable. It has a "deleted" constructor
668 and implementations for the const methods of the wrapped SymbolTable.
671 # NB: Do not expose any non-const methods of the wrapped SymbolTable here.
672 # Doing so will allow undefined behavior.
675 raise FstDeletedConstructorError(
676 "Cannot construct {}".format(self.__class__.__name__))
679 return SymbolTableIterator(self)
681 cpdef int64 available_key(self):
685 Returns an integer indicating the next available key index in the table.
687 return self._table.AvailableKey()
689 cpdef string checksum(self):
693 Returns a string indicating the label-agnostic MD5 checksum for the table.
695 return self._table.CheckSum()
697 cpdef SymbolTable copy(self):
701 Returns a mutable copy of the SymbolTable.
703 return _init_SymbolTable(self._table.Copy())
709 Given a symbol or index, finds the other one.
711 This method returns the index associated with a symbol key, or the symbol
712 associated with a index key.
715 key: Either a string or an index.
718 If key is a string, the associated index; if key is an integer, the
722 KeyError: Key not found.
725 result = self._table.FindIndex(tostring(key))
729 result = self._table.FindSymbol(key)
734 cpdef int64 get_nth_key(self, ssize_t pos) except *:
736 get_nth_key(self, pos)
738 Retrieves the integer index of the n-th key in the table.
741 pos: The n-th key to retrieve.
744 The integer index of the n-th key.
747 KeyError: index not found.
749 cdef int64 result = self._table.GetNthKey(pos)
754 cpdef string labeled_checksum(self):
756 labeled_checksum(self)
758 Returns a string indicating the label-dependent MD5 checksum for the table.
760 return self._table.LabeledCheckSum()
762 cpdef bool member(self, key):
766 Given a symbol or index, returns whether it is found in the table.
768 This method returns a boolean indicating whether the given symbol or index
769 is present in the table. If one intends to perform subsequent lookup, it is
770 better to simply call the find method, catching the KeyError.
773 key: Either a string or an index.
776 Whether or not the key is present (as a string or a index) in the table.
779 return self._table.MemberSymbol(tostring(key))
781 return self._table.MemberIndex(key)
783 def __contains__(self, key):
784 return self.member(key)
786 cpdef string name(self):
790 Returns the symbol table's name.
792 return self._table.Name()
794 cpdef size_t num_symbols(self):
798 Returns the number of symbols in the symbol table.
800 return self._table.NumSymbols()
802 cpdef void write(self, filename) except *:
804 write(self, filename)
806 Serializes symbol table to a file.
808 This methods writes the SymbolTable to a file in binary format.
811 filename: The string location of the output file.
814 FstIOError: Write failed.
816 if not self._table.Write(tostring(filename)):
817 raise FstIOError("Write failed: {!r}".format(filename))
819 cpdef void write_text(self, filename) except *:
821 write_text(self, filename)
823 Writes symbol table to text file.
825 This method writes the SymbolTable to a file in human-readable format.
828 filename: The string location of the output file.
831 FstIOError: Write failed.
833 if not self._table.WriteText(tostring(filename)):
834 raise FstIOError("Write failed: {!r}".format(filename))
837 cdef class _EncodeMapperSymbolTable(_SymbolTable):
842 Immutable SymbolTable class for tables stored in an EncodeMapper.
844 This class wraps a library const SymbolTable and exposes const methods of the
845 wrapped object. It is only to be returned by method, never constructed
849 # NB: Do not expose any non-const methods of the wrapped SymbolTable here.
850 # Doing so will allow undefined behavior.
853 return "<const EncodeMapper SymbolTable {!r} at 0x{:x}>".format(self.name(),
857 cdef class _FstSymbolTable(_SymbolTable):
862 Mutable SymbolTable class for tables stored in a mutable FST.
864 This class wraps a library SymbolTable and exposes methods of the wrapped
865 object. It is only to be returned by method, never constructed directly.
868 # NB: Do not expose any non-const methods of the wrapped SymbolTable here.
869 # Doing so will allow undefined behavior.
872 return "<const Fst SymbolTable {!r} at 0x{:x}>".format(self.name(),
876 cdef class _MutableSymbolTable(_SymbolTable):
881 Base class for mutable symbol tables.
883 This class is the base class for a mutable SymbolTable. It has a "deleted"
884 constructor and implementations of all methods of the wrapped SymbolTable.
887 cpdef int64 add_symbol(self, symbol, int64 key=-1):
889 add_symbol(self, symbol, key=-1)
891 Adds a symbol to the table and returns the index.
893 This method adds a symbol to the table. The caller can optionally
894 specify a non-negative integer index for the key.
897 symbol: A symbol string.
898 key: An index for the symbol (-1 is reserved for "no symbol requested").
901 The integer key of the new symbol.
903 cdef symbol_string = tostring(symbol)
905 return self._table.AddSymbol(symbol_string, key)
907 return self._table.AddSymbol(symbol_string)
909 cpdef void add_table(self, _SymbolTable syms):
911 add_table(self, syms)
913 Adds another SymbolTable to this table.
915 This method merges another symbol table into the current table. All key
916 values will be offset by the current available key.
919 syms: A SymbolTable to be merged with the current table.
921 self._table.AddTable(deref(syms._table))
923 cpdef void set_name(self, new_name) except *:
924 self._table.SetName(tostring(new_name))
927 cdef class _MutableFstSymbolTable(_MutableSymbolTable):
931 Mutable SymbolTable assigned to an FST.
935 return "<Fst SymbolTable {!r} at 0x{:x}>".format(self.name(), id(self))
938 cdef class SymbolTable(_MutableSymbolTable):
941 SymbolTable(name="<unspecified>")
943 Mutable SymbolTable class.
945 This class wraps the library SymbolTable and exposes both const (i.e.,
946 access) and non-const (i.e., mutation) methods of wrapped object.
948 Unlike other classes in the hierarchy, it has a working constructor and can be
949 used to programmatically construct a SymbolTable in memory.
952 name: An optional string indicating the table's name.
956 return "<SymbolTable {!r} at 0x{:x}>".format(self.name(), id(self))
958 def __init__(self, name=b"<unspecified>"):
959 self._table = new fst.SymbolTable(tostring(name))
960 self._smart_table.reset(self._table)
963 def read(cls, filename):
965 SymbolTable.read(filename)
967 Reads symbol table from binary file.
969 This class method creates a new SymbolTable from a symbol table binary file.
972 filename: The string location of the input binary file.
975 A new SymbolTable instance.
977 See also: `SymbolTable.read_fst`, `SymbolTable.read_text`.
979 cdef fst.SymbolTable *tsyms = fst.SymbolTable.Read(tostring(filename))
981 raise FstIOError("Read failed: {!r}".format(filename))
982 return _init_SymbolTable(tsyms)
985 def read_text(cls, filename, bool allow_negative_labels=False):
987 SymbolTable.read_text(filename)
989 Reads symbol table from text file.
991 This class method creates a new SymbolTable from a symbol table text file.
994 filename: The string location of the input text file.
995 allow_negative_labels: Should negative labels be allowed? (Not
996 recommended; may cause conflicts).
999 A new SymbolTable instance.
1001 See also: `SymbolTable.read`, `SymbolTable.read_fst`.
1003 cdef unique_ptr[fst.SymbolTableTextOptions] opts
1004 opts.reset(new fst.SymbolTableTextOptions(allow_negative_labels))
1005 cdef fst.SymbolTable *tsyms = fst.SymbolTable.ReadText(tostring(filename),
1008 raise FstIOError("Read failed: {!r}".format(filename))
1009 return _init_SymbolTable(tsyms)
1012 def read_fst(cls, filename, bool input_table):
1014 SymbolTable.read_fst(filename, input_table)
1016 Reads symbol table from an FST file without loading the corresponding FST.
1018 This class method creates a new SymbolTable by reading either the input or
1019 output symbol table from an FST file, without loading the corresponding FST.
1022 filename: The string location of the input FST file.
1023 input_table: Should the input table be read (True) or the output table
1027 A new SymbolTable instance, or None if none can be read.
1030 FstIOError: Read failed.
1032 See also: `SymbolTable.read`, `SymbolTable.read_text`.
1034 cdef fst.SymbolTable *tsyms = fst.FstReadSymbols(filename, input_table)
1036 raise FstIOError("Read failed: {!r}".format(filename))
1037 return _init_SymbolTable(tsyms)
1040 cdef _EncodeMapperSymbolTable _init_EncodeMapperSymbolTable(
1041 fst.SymbolTable *table, shared_ptr[fst.EncodeMapperClass] encoder):
1042 cdef _EncodeMapperSymbolTable result = (
1043 _EncodeMapperSymbolTable.__new__(_EncodeMapperSymbolTable))
1044 result._table = table
1045 result._encoder = encoder
1049 cdef _FstSymbolTable _init_FstSymbolTable(fst.SymbolTable *table,
1050 shared_ptr[fst.FstClass] ifst):
1051 cdef _FstSymbolTable result = _FstSymbolTable.__new__(_FstSymbolTable)
1052 result._table = table
1057 cdef _MutableFstSymbolTable _init_MutableFstSymbolTable(fst.SymbolTable *table,
1058 shared_ptr[fst.MutableFstClass] ifst):
1059 cdef _MutableFstSymbolTable result = (
1060 _MutableFstSymbolTable.__new__(_MutableFstSymbolTable))
1061 result._table = table
1066 cdef SymbolTable _init_SymbolTable(fst.SymbolTable *table):
1067 cdef SymbolTable result = SymbolTable.__new__(SymbolTable)
1068 result._table = table
1072 # Constructive SymbolTable operations.
1075 cpdef SymbolTable compact_symbol_table(_SymbolTable syms):
1077 compact_symbol_table(syms)
1079 Constructively relabels a SymbolTable to make it a contiguous mapping.
1082 syms: Input SymbolTable.
1085 A new compacted SymbolTable.
1087 return _init_SymbolTable(fst.CompactSymbolTable(deref(syms._table)))
1090 cpdef SymbolTable merge_symbol_table(_SymbolTable lhs, _SymbolTable rhs):
1092 merge_symbol_table(lhs, rhs)
1094 Merges all symbols from the left table into the right.
1096 This function creates a new SymbolTable which is the merger of the two input
1097 symbol Tables. Symbols in the right-hand table that conflict with those in the
1098 left-hand table will be assigned values from the left-hand table. Thus the
1099 returned table will never modify symbol assignments from the left-hand side,
1100 but may do so on the right.
1102 If the left-hand table is associated with an FST, it may be necessary to
1103 relabel it using the output table.
1106 lhs: Left-hand side SymbolTable.
1107 rhs: Left-hand side SymbolTable.
1110 A new merged SymbolTable.
1112 See also: `relabel_symbols`.
1114 return _init_SymbolTable(fst.MergeSymbolTable(deref(lhs._table),
1115 deref(rhs._table), NULL))
1118 ## SymbolTableIterator.
1121 cdef class SymbolTableIterator(object):
1124 SymbolTableIterator(syms)
1126 This class is used for iterating over a symbol table.
1130 return "<SymbolTableIterator at 0x{:x}>".format(id(self))
1132 def __init__(self, _SymbolTable syms):
1133 self._siter.reset(new fst.SymbolTableIterator(deref(syms._table)))
1135 # This just registers this class as a possible iterator.
1139 # Magic method used to get a Pythonic API out of the C++ API.
1143 cdef int64 value = self.value()
1144 cdef string symbol = self.symbol()
1146 return (value, symbol)
1148 cpdef bool done(self):
1152 Indicates whether the iterator is exhausted or not.
1155 True if the iterator is exhausted, False otherwise.
1157 return self._siter.get().Done()
1159 cpdef void next(self):
1163 Advances the iterator.
1165 self._siter.get().Next()
1167 cpdef void reset(self):
1171 Resets the iterator to the initial position.
1173 self._siter.get().Reset()
1175 cpdef string symbol(self):
1179 Returns the current symbol string.
1181 This method returns the current symbol string at this point in the table.
1186 return self._siter.get().Symbol()
1188 cpdef int64 value(self):
1192 Returns the current integer index of the symbol.
1197 return self._siter.get().Value()
1203 cdef class EncodeMapper(object):
1206 EncodeMapper(arc_type="standard", encode_labels=False, encode_weights=False)
1208 Arc encoder class, wrapping EncodeMapperClass.
1210 This class provides an object which can be used to encode or decode FST arcs.
1211 This is most useful to convert an FST to an unweighted acceptor, on which
1212 some FST operations are more efficient, and then decoding the FST afterwards.
1214 To use an instance of this class to encode or decode a mutable FST, pass it
1215 as the first argument to the FST instance methods `encode` and `decode`.
1217 For implementational reasons, it is not currently possible to use an encoder
1218 on disk to construct this class.
1221 arc_type: A string indicating the arc type.
1222 encode_labels: Should labels be encoded?
1223 encode_weights: Should weights be encoded?
1227 return "<EncodeMapper at 0x{:x}>".format(id(self))
1230 arc_type=b"standard",
1231 bool encode_labels=False,
1232 bool encode_weights=False):
1233 cdef uint32 flags = fst.GetEncodeFlags(encode_labels, encode_weights)
1234 self._encoder.reset(new fst.EncodeMapperClass(tostring(arc_type), flags,
1236 if not self._encoder:
1237 raise FstOpError("Unknown arc type: {!r}".format(arc_type))
1239 cpdef string arc_type(self):
1243 Returns a string indicating the arc type.
1245 return self._encoder.get().ArcType()
1247 # Python's equivalent to operator().
1249 def __call__(self, Arc arc):
1251 self(state, ilabel, olabel, weight, nextstate)
1253 Uses the encoder to encode an arc.
1256 ilabel: The integer index of the input label.
1257 olabel: The integer index of the output label.
1258 weight: A Weight or weight string indicating the desired final weight; if
1259 null, it is set to semiring One.
1260 nextstate: The integer index of the destination state.
1263 FstOpError: Incompatible or invalid weight.
1265 return _init_Arc(self._encoder.get().__call__(deref(arc._arc)))
1267 cpdef uint32 flags(self):
1271 Returns the encoder's flags.
1273 return self._encoder.get().Flags()
1275 cpdef _EncodeMapperSymbolTable input_symbols(self):
1279 Returns the encoder's input symbol table, or None if none is present.
1281 cdef fst.SymbolTable *syms = const_cast[SymbolTable_ptr](
1282 self._encoder.get().InputSymbols())
1285 return _init_EncodeMapperSymbolTable(syms, self._encoder)
1287 cpdef _EncodeMapperSymbolTable output_symbols(self):
1289 output_symbols(self)
1291 Returns the encoder's output symbol table, or None if none is present.
1293 cdef fst.SymbolTable *syms = const_cast[SymbolTable_ptr](
1294 self._encoder.get().OutputSymbols())
1297 return _init_EncodeMapperSymbolTable(syms, self._encoder)
1299 cpdef uint64 properties(self, uint64 mask):
1301 properties(self, mask)
1303 Provides property bits.
1305 This method provides user access to the properties of the encoder.
1308 mask: The property mask to be compared to the encoder's properties.
1311 A 64-bit bitmask representing the requested properties.
1313 return self._encoder.get().Properties(mask)
1315 cpdef void set_input_symbols(self, _SymbolTable syms) except *:
1317 set_input_symbols(self, syms)
1319 Sets the encoder's input symbol table.
1322 syms: A SymbolTable.
1324 See also: `set_output_symbols`.
1326 self._encoder.get().SetInputSymbols(syms._table)
1328 cpdef void set_output_symbols(self, _SymbolTable syms) except *:
1330 set_output_symbols(self, syms)
1332 Sets the encoder's output symbol table.
1335 syms: A SymbolTable.
1337 See also: `set_input_symbols`.
1339 self._encoder.get().SetOutputSymbols(syms._table)
1341 cpdef string weight_type(self):
1345 Returns a string indicating the weight type.
1347 return self._encoder.get().WeightType()
1350 ## _Fst, _MutableFst, Fst, and helpers.
1354 # _Fst: base class; has-a FstClass*.
1355 # _MutableFst(_Fst): adds mutable methods.
1356 # Fst(filename): pseudo-constructor.
1359 cdef class _Fst(object):
1364 Immutable FST class, wrapping FstClass.
1366 This class is the basic user-facing FST object. It does not itself support any
1367 mutation operations.
1370 # IPython notebook magic to produce an SVG of the FST.
1371 def _repr_svg_(self):
1372 """IPython notebook magic to produce an SVG of the FST using GraphViz.
1374 This method produces an SVG of the internal graph. Users wishing to create
1375 publication-quality graphs should instead use the method `draw`, which
1376 exposes additional parameters.
1379 OSError: Cannot locate the `dot` executable.
1380 subprocess.CalledProcessError: `dot` returned non-zero exit code.
1382 See also: `draw`, `text`.
1384 # Throws OSError if the dot executable is not found.
1385 proc = subprocess.Popen(["dot", "-Tsvg"], stdin=subprocess.PIPE,
1386 stdout=subprocess.PIPE, stderr=subprocess.PIPE)
1387 cdef stringstream sstrm
1388 fst.DrawFst(deref(self._fst), self._fst.get().InputSymbols(),
1389 self._fst.get().OutputSymbols(), NULL,
1390 self._fst.get().Properties(fst.kAcceptor, True) ==
1392 b"", 8.5, 11, True, False, 0.4, 0.25, 14, 5, b"g", False,
1393 addr(sstrm), b"_repr_svg")
1394 (sout, serr) = proc.communicate(sstrm.str())
1395 if proc.returncode != 0: # Just to be explicit.
1396 raise subprocess.CalledProcessError(proc.returncode, self._DOT_TSVG)
1397 return sout.decode("utf8")
1400 return "<{} Fst at 0x{:x}>".format(self.fst_type(), id(self))
1403 raise FstDeletedConstructorError(
1404 "Cannot construct {}".format(self.__class__.__name__))
1406 # Other magic methods.
1409 return self.text(acceptor=self._fst.get().Properties(fst.kAcceptor, True) ==
1411 show_weight_one=self._fst.get().Properties(fst.kWeighted, True) ==
1414 cpdef string arc_type(self):
1418 Returns a string indicating the arc type.
1420 return self._fst.get().ArcType()
1422 cpdef ArcIterator arcs(self, int64 state):
1426 Returns an iterator over arcs leaving the specified state.
1429 state: The source state ID.
1434 See also: `mutable_arcs`, `states`.
1436 return ArcIterator(self, state)
1438 cpdef _Fst copy(self):
1442 Makes a copy of the FST.
1444 return _init_XFst(new fst.FstClass(deref(self._fst)))
1446 cpdef void draw(self,
1448 _SymbolTable isymbols=None,
1449 _SymbolTable osymbols=None,
1450 SymbolTable ssymbols=None,
1451 bool acceptor=False,
1455 bool portrait=False,
1456 bool vertical=False,
1458 double nodesep=0.25,
1462 bool show_weight_one=False):
1464 draw(self, filename, isymbols=None, osymbols=None, ssymbols=None,
1465 acceptor=False, title="", width=8.5, height=11, portrait=False,
1466 vertical=False, ranksep=0.4, nodesep=0.25, fontsize=14,
1467 precision=5, float_format="g", show_weight_one=False):
1469 Writes out the FST in Graphviz text format.
1471 This method writes out the FST in the dot graph description language. The
1472 graph can be rendered using the `dot` executable provided by Graphviz.
1475 filename: The string location of the output dot/Graphviz file.
1476 isymbols: An optional symbol table used to label input symbols.
1477 osymbols: An optional symbol table used to label output symbols.
1478 ssymbols: An optional symbol table used to label states.
1479 acceptor: Should the figure be rendered in acceptor format if possible?
1480 title: An optional string indicating the figure title.
1481 width: The figure width, in inches.
1482 height: The figure height, in inches.
1483 portrait: Should the figure be rendered in portrait rather than
1485 vertical: Should the figure be rendered bottom-to-top rather than
1487 ranksep: The minimum separation separation between ranks, in inches.
1488 nodesep: The minimum separation between nodes, in inches.
1489 fontsize: Font size, in points.
1490 precision: Numeric precision for floats, in number of chars.
1491 float_format: One of: 'e', 'f' or 'g'.
1492 show_weight_one: Should weights equivalent to semiring One be printed?
1496 cdef string filename_string = tostring(filename)
1497 cdef unique_ptr[ofstream] ostrm
1498 ostrm.reset(new ofstream(filename_string))
1499 cdef fst.SymbolTable *ssymbols_ptr = NULL
1500 if ssymbols is not None:
1501 ssymbols_ptr = ssymbols._table
1502 fst.DrawFst(deref(self._fst),
1503 self._fst.get().InputSymbols() if isymbols is None
1504 else isymbols._table,
1505 self._fst.get().OutputSymbols() if osymbols is None
1506 else osymbols._table,
1507 ssymbols_ptr, acceptor, tostring(title), width, height, portrait,
1508 vertical, ranksep, nodesep, fontsize, precision,
1509 tostring(float_format), show_weight_one, ostrm.get(),
1512 cpdef Weight final(self, int64 state):
1516 Returns the final weight of a state.
1519 state: The integer index of a state.
1522 The final Weight of that state.
1525 FstIndexError: State index out of range.
1527 cdef Weight weight = Weight.__new__(Weight)
1528 weight._weight.reset(new fst.WeightClass(self._fst.get().Final(state)))
1531 cpdef string fst_type(self):
1535 Returns a string indicating the FST type.
1537 return self._fst.get().FstType()
1539 cpdef _FstSymbolTable input_symbols(self):
1543 Returns the FST's input symbol table, or None if none is present.
1545 See also: `input_symbols`.
1547 cdef fst.SymbolTable *syms = const_cast[SymbolTable_ptr](
1548 self._fst.get().InputSymbols())
1551 return _init_FstSymbolTable(syms, self._fst)
1553 cpdef size_t num_arcs(self, int64 state) except *:
1555 num_arcs(self, state)
1557 Returns the number of arcs leaving a state.
1560 state: The integer index of a state.
1563 The number of arcs leaving that state.
1566 FstIndexError: State index out of range.
1568 See also: `num_states`.
1570 cdef size_t result = self._fst.get().NumArcs(state)
1571 if result == SIZE_MAX:
1572 raise FstIndexError("State index out of range")
1575 cpdef size_t num_input_epsilons(self, int64 state) except *:
1577 num_input_epsilsons(self, state)
1579 Returns the number of arcs with epsilon input labels leaving a state.
1582 state: The integer index of a state.
1585 The number of epsilon-input-labeled arcs leaving that state.
1588 FstIndexError: State index out of range.
1590 See also: `num_output_epsilons`.
1592 cdef size_t result = self._fst.get().NumInputEpsilons(state)
1593 if result == SIZE_MAX:
1594 raise FstIndexError("State index out of range")
1597 cpdef size_t num_output_epsilons(self, int64 state) except *:
1599 num_output_epsilsons(self, state)
1601 Returns the number of arcs with epsilon output labels leaving a state.
1604 state: The integer index of a state.
1607 The number of epsilon-output-labeled arcs leaving that state.
1610 FstIndexError: State index out of range.
1612 See also: `num_input_epsilons`.
1614 cdef size_t result = self._fst.get().NumOutputEpsilons(state)
1615 if result == SIZE_MAX:
1616 raise FstIndexError("State index out of range")
1619 cpdef _FstSymbolTable output_symbols(self):
1621 output_symbols(self)
1623 Returns the FST's output symbol table, or None if none is present.
1625 See also: `input_symbols`.
1627 cdef fst.SymbolTable *syms = const_cast[SymbolTable_ptr](
1628 self._fst.get().OutputSymbols())
1631 return _init_FstSymbolTable(syms, self._fst)
1633 cpdef uint64 properties(self, uint64 mask, bool test):
1635 properties(self, mask, test)
1637 Provides property bits.
1639 This method provides user access to the properties attributes for the FST.
1640 The resulting value is a long integer, but when it is cast to a boolean,
1641 it represents whether or not the FST has the `mask` property.
1644 mask: The property mask to be compared to the FST's properties.
1645 test: Should any unknown values be computed before comparing against
1649 A 64-bit bitmask representing the requested properties.
1651 return self._fst.get().Properties(mask, test)
1653 cpdef int64 start(self):
1657 Returns the start state.
1659 return self._fst.get().Start()
1661 cpdef StateIterator states(self):
1665 Returns an iterator over all states in the FST.
1668 A StateIterator object for the FST.
1670 See also: `arcs`, `mutable_arcs`.
1672 return StateIterator(self)
1674 cpdef string text(self, _SymbolTable isymbols=None,
1675 _SymbolTable osymbols=None, _SymbolTable ssymbols=None,
1676 bool acceptor=False, bool show_weight_one=False, missing_sym=b""):
1678 text(self, isymbols=None, osymbols=None, ssymbols=None, acceptor=False,
1679 show_weight_one=False, missing_sym="")
1681 Produces a human-readable string representation of the FST.
1683 This method generates a human-readable string representation of the FST.
1684 The caller may optionally specify SymbolTables used to label input labels,
1685 output labels, or state labels, respectively.
1688 isymbols: An optional symbol table used to label input symbols.
1689 osymbols: An optional symbol table used to label output symbols.
1690 ssymbols: An optional symbol table used to label states.
1691 acceptor: Should the FST be rendered in acceptor format if possible?
1692 show_weight_one: Should weights equivalent to semiring One be printed?
1693 missing_symbol: The string to be printed when symbol table lookup fails.
1696 A formatted string representing the machine.
1698 # Prints FST to stringstream, then returns resulting string.
1699 cdef fst.SymbolTable *ssymbols_ptr = NULL
1700 if ssymbols is not None:
1701 ssymbols_ptr = ssymbols._table
1702 cdef stringstream sstrm
1703 fst.PrintFst(deref(self._fst), sstrm, "<pywrapfst>",
1704 self._fst.get().InputSymbols() if isymbols is None
1705 else isymbols._table,
1706 self._fst.get().OutputSymbols() if osymbols is None
1707 else osymbols._table,
1708 ssymbols_ptr, acceptor, show_weight_one, tostring(missing_sym))
1711 cpdef bool verify(self):
1715 Verifies that an FST's contents are sane.
1718 True if the contents are sane, False otherwise.
1720 return fst.Verify(deref(self._fst))
1722 cpdef string weight_type(self):
1726 Provides the FST's weight type.
1729 A string representing the weight type.
1731 return self._fst.get().WeightType()
1733 cpdef void write(self, filename) except *:
1735 write(self, filename)
1737 Serializes FST to a file.
1739 This method writes the FST to a file in a binary format.
1742 filename: The string location of the output file.
1745 FstIOError: Write failed.
1747 if not self._fst.get().Write(tostring(filename)):
1748 raise FstIOError("Write failed: {!r}".format(filename))
1750 cpdef string write_to_string(self):
1752 write_to_string(self)
1754 Serializes FST to a string.
1759 See also: `read_from_string`.
1761 return self._fst.get().WriteToString()
1764 cdef class _MutableFst(_Fst):
1769 Mutable FST class, wrapping MutableFstClass.
1771 This class extends _Fst by adding mutation operations.
1774 cdef void _check_mutating_imethod(self) except *:
1775 """Checks whether an operation mutating the FST has produced an error.
1777 This function is not visible to Python users.
1779 if self._fst.get().Properties(fst.kError, True) == fst.kError:
1780 raise FstOpError("Operation failed")
1782 cdef void _add_arc(self, int64 state, Arc arc) except *:
1783 if not self._fst.get().ValidStateId(state):
1784 raise FstIndexError("State index out of range")
1785 if not self._mfst.get().AddArc(state, deref(arc._arc)):
1786 raise FstOpError("Incompatible or invalid weight type")
1787 self._check_mutating_imethod()
1789 def add_arc(self, int64 state, Arc arc):
1791 add_arc(self, state, arc)
1793 Adds a new arc to the FST and return self.
1796 state: The integer index of the source state.
1797 arc: The arc to add.
1803 FstIndexError: State index out of range.
1804 FstOpdexError: Incompatible or invalid weight type.
1806 See also: `add_state`.
1808 self._add_arc(state, arc)
1811 cpdef int64 add_state(self) except *:
1815 Adds a new state to the FST and returns the state ID.
1818 The integer index of the new state.
1820 See also: `add_arc`, `set_start`, `set_final`.
1822 cdef int64 result = self._mfst.get().AddState()
1823 self._check_mutating_imethod()
1826 cdef void _arcsort(self, sort_type=b"ilabel") except *:
1827 cdef fst.ArcSortType sort_type_enum
1828 if not fst.GetArcSortType(tostring(sort_type), addr(sort_type_enum)):
1829 raise FstArgError("Unknown sort type {!r}".format(sort_type))
1830 fst.ArcSort(self._mfst.get(), sort_type_enum)
1831 self._check_mutating_imethod()
1833 def arcsort(self, sort_type=b"ilabel"):
1835 arcsort(self, sort_type="ilabel")
1837 Sorts arcs leaving each state of the FST.
1839 This operation destructively sorts arcs leaving each state using either
1840 input or output labels.
1843 sort_type: Either "ilabel" (sort arcs according to input labels) or
1844 "olabel" (sort arcs according to output labels).
1850 FstArgError: Unknown sort type.
1852 See also: `topsort`.
1854 self._arcsort(sort_type)
1857 cdef void _closure(self, bool closure_plus=False) except *:
1858 fst.Closure(self._mfst.get(), fst.GetClosureType(closure_plus))
1859 self._check_mutating_imethod()
1861 def closure(self, bool closure_plus=False):
1863 closure(self, closure_plus=False)
1865 Computes concatenative closure.
1867 This operation destructively converts the FST to its concatenative closure.
1868 If A transduces string x to y with weight a, then the closure transduces x
1869 to y with weight a, xx to yy with weight a \otimes a, xxx to yyy with weight
1870 a \otimes a \otimes a, and so on. The empty string is also transduced to
1871 itself with semiring One if `closure_plus` is False.
1874 closure_plus: If False, do not accept the empty string.
1879 self._closure(closure_plus)
1882 cdef void _concat(self, _Fst ifst) except *:
1883 fst.Concat(self._mfst.get(), deref(ifst._fst))
1884 self._check_mutating_imethod()
1886 def concat(self, _Fst ifst):
1890 Computes the concatenation (product) of two FSTs.
1892 This operation destructively concatenates the FST with a second FST. If A
1893 transduces string x to y with weight a and B transduces string w to v with
1894 weight b, then their concatenation transduces string xw to yv with weight a
1898 ifst: The second input FST.
1906 cdef void _connect(self) except *:
1907 fst.Connect(self._mfst.get())
1908 self._check_mutating_imethod()
1914 Removes unsuccessful paths.
1916 This operation destructively trims the FST, removing states and arcs that
1917 are not part of any successful path.
1925 cdef void _decode(self, EncodeMapper encoder) except *:
1926 fst.Decode(self._mfst.get(), deref(encoder._encoder))
1927 self._check_mutating_imethod()
1929 def decode(self, EncodeMapper encoder):
1931 decode(self, encoder)
1933 Decodes encoded labels and/or weights.
1935 This operation reverses the encoding performed by `encode`.
1938 encoder: An EncodeMapper object used to encode the FST.
1945 self._decode(encoder)
1948 cdef void _delete_arcs(self, int64 state, size_t n=0) except *:
1949 if not (self._mfst.get().DeleteArcs(state, n) if n else
1950 self._mfst.get().DeleteArcs(state)):
1951 raise FstIndexError("State index out of range")
1952 self._check_mutating_imethod()
1954 def delete_arcs(self, int64 state, size_t n=0):
1956 delete_arcs(self, state, n=0)
1958 Deletes arcs leaving a particular state.
1961 state: The integer index of a state.
1962 n: An optional argument indicating how many arcs to be deleted. If this
1963 argument is omitted or passed as zero, all arcs from this state are
1970 FstIndexError: State index out of range.
1972 See also: `delete_states`.
1974 self._delete_arcs(state, n)
1977 cdef void _delete_states(self, states=None) except *:
1978 # Only the former signature has a possible indexing failure.
1980 if not self._mfst.get().DeleteStates(<const vector[int64]> states):
1981 raise FstIndexError("State index out of range")
1983 self._mfst.get().DeleteStates()
1984 self._check_mutating_imethod()
1986 def delete_states(self, states=None):
1988 delete_states(self, states=None)
1993 states: An optional iterable of integer indices of the states to be
1994 deleted. If this argument is omitted, all states are deleted.
2000 FstIndexError: State index out of range.
2002 See also: `delete_arcs`.
2004 self._delete_states(states)
2007 cdef void _encode(self, EncodeMapper encoder) except *:
2008 fst.Encode(self._mfst.get(), encoder._encoder.get())
2009 self._check_mutating_imethod()
2011 def encode(self, EncodeMapper encoder):
2013 encode(self, encoder)
2015 Encodes labels and/or weights.
2017 This operation allows for the representation of a weighted transducer as a
2018 weighted acceptor, an unweighted transducer, or an unweighted acceptor by
2019 considering the pair (input label, output label), the pair (input label,
2020 weight), or the triple (input label, output label, weight) as a single
2021 label. Applying this operation mutates the EncodeMapper argument, which
2022 can then be used to decode.
2025 encoder: An EncodeMapper object to be used as the encoder.
2032 self._encode(encoder)
2035 cdef void _invert(self) except *:
2036 fst.Invert(self._mfst.get())
2037 self._check_mutating_imethod()
2043 Inverts the FST's transduction.
2045 This operation destructively inverts the FST's transduction by exchanging
2046 input and output labels.
2054 cdef void _minimize(self, float delta=fst.kDelta,
2055 bool allow_nondet=False) except *:
2056 # This runs in-place when the second argument is null.
2057 fst.Minimize(self._mfst.get(), NULL, delta, allow_nondet)
2058 self._check_mutating_imethod()
2060 def minimize(self, float delta=fst.kDelta, bool allow_nondet=False):
2062 minimize(self, delta=0.0009765625, allow_nondet=False)
2066 This operation destructively performs the minimization of deterministic
2067 weighted automata and transducers. If the input FST A is an acceptor, this
2068 operation produces the minimal acceptor B equivalent to A, i.e. the
2069 acceptor with a minimal number of states that is equivalent to A. If the
2070 input FST A is a transducer, this operation internally builds an equivalent
2071 transducer with a minimal number of states. However, this minimality is
2072 obtained by allowing transition having strings of symbols as output labels,
2073 this known in the litterature as a real-time transducer. Such transducers
2074 are not directly supported by the library. This function will convert such
2075 transducer by expanding each string-labeled transition into a sequence of
2076 transitions. This will results in the creation of new states, hence losing
2077 the minimality property.
2080 delta: Comparison/quantization delta.
2081 allow_nondet: Attempt minimization of non-deterministic FST?
2086 self._minimize(delta)
2089 cpdef MutableArcIterator mutable_arcs(self, int64 state):
2091 mutable_arcs(self, state)
2093 Returns a mutable iterator over arcs leaving the specified state.
2096 state: The source state ID.
2099 A MutableArcIterator.
2101 See also: `arcs`, `states`.
2103 return MutableArcIterator(self, state)
2105 def mutable_input_symbols(self):
2107 mutable_input_symbols(self)
2109 Returns the FST's (mutable) input symbol table, or None if none is present.
2111 cdef fst.SymbolTable *tst = self._mfst.get().MutableInputSymbols()
2114 return _init_MutableFstSymbolTable(tst, self._mfst)
2116 def mutable_output_symbols(self):
2118 mutable_output_symbols(self)
2120 Returns the FST's (mutable) output symbol table, or None if none is present.
2122 cdef fst.SymbolTable *tst = self._mfst.get().MutableOutputSymbols()
2125 return _init_MutableFstSymbolTable(tst, self._mfst)
2127 cpdef int64 num_states(self):
2131 Returns the number of states.
2133 return self._mfst.get().NumStates()
2135 cdef void _project(self, bool project_output=False) except *:
2136 fst.Project(self._mfst.get(), fst.GetProjectType(project_output))
2137 self._check_mutating_imethod()
2139 def project(self, bool project_output=False):
2141 project(self, project_output=False)
2143 Converts the FST to an acceptor using input or output labels.
2145 This operation destructively projects an FST onto its domain or range by
2146 either copying each arc's input label to its output label (the default) or
2150 project_output: Should the output labels be projected?
2155 See also: `decode`, `encode`, `relabel_pairs`, `relabel_symbols`.
2157 self._project(project_output)
2160 cdef void _prune(self, float delta=fst.kDelta, int64 nstate=fst.kNoStateId,
2161 weight=None) except *:
2162 # Threshold is set to semiring Zero (no pruning) if no weight is specified.
2163 cdef fst.WeightClass wc = _get_WeightClass_or_Zero(self.weight_type(),
2165 fst.Prune(self._mfst.get(), wc, nstate, delta)
2166 self._check_mutating_imethod()
2169 float delta=fst.kDelta,
2170 int64 nstate=fst.kNoStateId,
2173 prune(self, delta=0.0009765625, nstate=-1, weight=None)
2175 Removes paths with weights below a certain threshold.
2177 This operation deletes states and arcs in the input FST that do not belong
2178 to a successful path whose weight is no more (w.r.t the natural semiring
2179 order) than the threshold t \otimes-times the weight of the shortest path in
2180 the input FST. Weights must be commutative and have the path property.
2183 delta: Comparison/quantization delta.
2184 nstate: State number threshold.
2185 weight: A Weight or weight string indicating the desired weight threshold
2186 below which paths are pruned; if omitted, no paths are pruned.
2191 See also: The constructive variant.
2193 self._prune(delta, nstate, weight)
2196 cdef void _push(self,
2197 float delta=fst.kDelta,
2198 bool remove_total_weight=False,
2199 bool to_final=False) except *:
2200 fst.Push(self._mfst.get(), fst.GetReweightType(to_final), delta,
2201 remove_total_weight)
2202 self._check_mutating_imethod()
2205 float delta=fst.kDelta,
2206 bool remove_total_weight=False,
2207 bool to_final=False):
2209 push(self, delta=0.0009765625, remove_total_weight=False, to_final=False)
2211 Pushes weights towards the initial or final states.
2213 This operation destructively produces an equivalent transducer by pushing
2214 the weights towards the initial state or toward the final states. When
2215 pushing weights towards the initial state, the sum of the weight of the
2216 outgoing transitions and final weight at any non-initial state is equal to
2217 one in the resulting machine. When pushing weights towards the final states,
2218 the sum of the weight of the incoming transitions at any state is equal to
2219 one. Weights need to be left distributive when pushing towards the initial
2220 state and right distributive when pushing towards the final states.
2223 delta: Comparison/quantization delta.
2224 remove_total_weight: If pushing weights, should the total weight be
2226 to_final: Push towards final states?
2231 See also: The constructive variant, which also supports label pushing.
2233 self._push(delta, remove_total_weight, to_final)
2236 cdef void _relabel_pairs(self, ipairs=None, opairs=None) except *:
2237 cdef unique_ptr[vector[fst.LabelPair]] _ipairs
2238 _ipairs.reset(new vector[fst.LabelPair]())
2239 cdef unique_ptr[vector[fst.LabelPair]] _opairs
2240 _opairs.reset(new vector[fst.LabelPair]())
2244 for (before, after) in ipairs:
2245 _ipairs.get().push_back(fst.LabelPair(before, after))
2247 for (before, after) in opairs:
2248 _opairs.get().push_back(fst.LabelPair(before, after))
2249 if _ipairs.get().empty() and _opairs.get().empty():
2250 raise FstArgError("No relabeling pairs specified.")
2251 fst.Relabel(self._mfst.get(), deref(_ipairs), deref(_opairs))
2252 self._check_mutating_imethod()
2254 def relabel_pairs(self, ipairs=None, opairs=None):
2256 relabel_pairs(self, ipairs=None, opairs=None)
2258 Replaces input and/or output labels using pairs of labels.
2260 This operation destructively relabels the input and/or output labels of the
2261 FST using pairs of the form (old_ID, new_ID); omitted indices are
2265 ipairs: An iterable containing (older index, newer index) integer pairs.
2266 opairs: An iterable containing (older index, newer index) integer pairs.
2272 FstArgError: No relabeling pairs specified.
2274 See also: `decode`, `encode`, `project`, `relabel_tables`.
2276 self._relabel_pairs(ipairs, opairs)
2279 cdef void _relabel_tables(self,
2280 _SymbolTable old_isymbols=None,
2281 _SymbolTable new_isymbols=None,
2282 unknown_isymbol=b"",
2283 bool attach_new_isymbols=True,
2284 _SymbolTable old_osymbols=None,
2285 _SymbolTable new_osymbols=None,
2286 unknown_osymbol=b"",
2287 bool attach_new_osymbols=True) except *:
2288 if new_isymbols is None and new_osymbols is None:
2289 raise FstArgError("No new SymbolTables specified")
2290 cdef fst.SymbolTable *new_isymbols_ptr = NULL
2291 if new_isymbols is not None:
2292 new_isymbols_ptr = new_isymbols._table
2293 cdef fst.SymbolTable *new_osymbols_ptr = NULL
2294 if new_osymbols is not None:
2295 new_osymbols_ptr = new_osymbols._table
2296 fst.Relabel(self._mfst.get(),
2297 self._fst.get().InputSymbols() if old_isymbols is None else
2298 old_isymbols._table, new_isymbols_ptr, tostring(unknown_isymbol),
2299 attach_new_isymbols,
2300 self._fst.get().OutputSymbols() if old_osymbols is None else
2301 old_osymbols._table, new_osymbols_ptr, tostring(unknown_osymbol),
2302 attach_new_osymbols)
2303 self._check_mutating_imethod()
2305 def relabel_tables(self,
2306 _SymbolTable old_isymbols=None,
2307 _SymbolTable new_isymbols=None,
2308 unknown_isymbol=b"",
2309 bool attach_new_isymbols=True,
2310 _SymbolTable old_osymbols=None,
2311 _SymbolTable new_osymbols=None,
2312 unknown_osymbol=b"",
2313 bool attach_new_osymbols=True):
2315 relabel_tables(self, old_isymbols=None, new_isymbols=None,
2316 unknown_isymbol="", attach_new_isymbols=True,
2317 old_osymbols=None, new_osymbols=None,
2318 unknown_osymbol="", attach_new_osymbols=True)
2320 Replaces input and/or output labels using SymbolTables.
2322 This operation destructively relabels the input and/or output labels of the
2323 FST using user-specified symbol tables; omitted symbols are identity-mapped.
2326 old_isymbols: The old SymbolTable for input labels, defaulting to the
2327 FST's input symbol table.
2328 new_isymbols: A SymbolTable used to relabel the input labels
2329 unknown_isymbol: Input symbol to use to relabel OOVs (if empty,
2330 OOVs raise an exception)
2331 attach_new_isymbols: Should new_isymbols be made the FST's input symbol
2333 old_osymbols: The old SymbolTable for output labels, defaulting to the
2334 FST's output symbol table.
2335 new_osymbols: A SymbolTable used to relabel the output labels.
2336 unknown_osymbol: Outnput symbol to use to relabel OOVs (if empty,
2337 OOVs raise an exception)
2338 attach_new_isymbols: Should new_osymbols be made the FST's output symbol
2345 FstArgError: No SymbolTable specified.
2347 See also: `decode`, `encode`, `project`, `relabel_pairs`.
2349 self._relabel_tables(old_isymbols, new_isymbols,
2350 unknown_isymbol, attach_new_isymbols,
2351 old_osymbols, new_osymbols,
2352 unknown_osymbol, attach_new_osymbols)
2355 cdef void _reserve_arcs(self, int64 state, size_t n) except *:
2356 if not self._mfst.get().ReserveArcs(state, n):
2357 raise FstIndexError("State index out of range")
2358 self._check_mutating_imethod()
2360 def reserve_arcs(self, int64 state, size_t n):
2362 reserve_arcs(self, state, n)
2364 Reserve n arcs at a particular state (best effort).
2367 state: The integer index of a state.
2368 n: The number of arcs to reserve.
2374 FstIndexError: State index out of range.
2376 See also: `reserve_states`.
2378 self._reserve_arcs(state, n)
2381 cdef void _reserve_states(self, int64 n) except *:
2382 self._mfst.get().ReserveStates(n)
2383 self._check_mutating_imethod()
2385 def reserve_states(self, int64 n):
2387 reserve_states(self, n)
2389 Reserve n states (best effort).
2392 n: The number of states to reserve.
2397 See also: `reserve_arcs`.
2399 self._reserve_states(n)
2402 cdef void _reweight(self, potentials, bool to_final=False) except *:
2403 cdef unique_ptr[vector[fst.WeightClass]] _potentials
2404 _potentials.reset(new vector[fst.WeightClass]())
2405 cdef string weight_type = self.weight_type()
2406 for weight in potentials:
2407 _potentials.get().push_back(_get_WeightClass_or_One(self.weight_type(),
2409 fst.Reweight(self._mfst.get(), deref(_potentials),
2410 fst.GetReweightType(to_final))
2411 self._check_mutating_imethod()
2413 def reweight(self, potentials, bool to_final=False):
2415 reweight(self, potentials, to_final=False)
2417 Reweights an FST using an iterable of potentials.
2419 This operation destructively reweights an FST according to the potentials
2420 and in the direction specified by the user. An arc of weight w, with an
2421 origin state of potential p and destination state of potential q, is
2422 reweighted by p^{-1} \otimes (w \otimes q) when reweighting towards the
2423 initial state, and by (p \otimes w) \otimes q^{-1} when reweighting towards
2424 the final states. The weights must be left distributive when reweighting
2425 towards the initial state and right distributive when reweighting towards
2426 the final states (e.g., TropicalWeight and LogWeight).
2429 potentials: An iterable of Weight or weight strings.
2430 to_final: Push towards final states?
2435 self._reweight(potentials, to_final)
2438 cdef void _rmepsilon(self,
2440 float delta=fst.kDelta,
2441 int64 nstate=fst.kNoStateId,
2442 weight=None) except *:
2443 # Threshold is set to semiring Zero (no pruning) if weight unspecified.
2444 cdef fst.WeightClass wc = _get_WeightClass_or_Zero(self.weight_type(),
2446 fst.RmEpsilon(self._mfst.get(), connect, wc, nstate, delta)
2447 self._check_mutating_imethod()
2451 float delta=fst.kDelta,
2452 int64 nstate=fst.kNoStateId,
2455 rmepsilon(self, connect=True, delta=0.0009765625, nstate=-1, weight=None)
2457 Removes epsilon transitions.
2459 This operation destructively removes epsilon transitions, i.e., those where
2460 both input and output labels are epsilon) from an FST.
2463 connect: Should output be trimmed?
2464 delta: Comparison/quantization delta.
2465 nstate: State number threshold.
2466 weight: A Weight or weight string indicating the desired weight threshold
2467 below which paths are pruned; if omitted, no paths are pruned.
2472 See also: The constructive variant, which also supports epsilon removal in
2473 reverse (and which may be more efficient).
2475 self._rmepsilon(connect, delta, nstate, weight)
2478 cdef void _set_final(self, int64 state, weight=None) except *:
2479 if not self._mfst.get().ValidStateId(state):
2480 raise FstIndexError("State index out of range")
2481 cdef fst.WeightClass wc = _get_WeightClass_or_One(self.weight_type(),
2483 if not self._mfst.get().SetFinal(state, wc):
2484 raise FstOpError("Incompatible or invalid weight")
2485 self._check_mutating_imethod()
2487 def set_final(self, int64 state, weight=None):
2489 set_final(self, state, weight)
2491 Sets the final weight for a state.
2494 state: The integer index of a state.
2495 weight: A Weight or weight string indicating the desired final weight; if
2496 omitted, it is set to semiring One.
2499 FstIndexError: State index out of range.
2500 FstOpError: Incompatible or invalid weight.
2502 See also: `set_start`.
2504 self._set_final(state, weight)
2507 cdef void _set_input_symbols(self, _SymbolTable syms) except *:
2509 self._mfst.get().SetInputSymbols(NULL)
2511 self._mfst.get().SetInputSymbols(syms._table)
2512 self._check_mutating_imethod()
2514 def set_input_symbols(self, _SymbolTable syms):
2516 set_input_symbols(self, syms)
2518 Sets the input symbol table.
2520 Passing None as a value will delete the input symbol table.
2523 syms: A SymbolTable.
2528 See also: `set_output_symbols`.
2530 self._set_input_symbols(syms)
2533 cdef void _set_output_symbols(self, _SymbolTable syms) except *:
2535 self._mfst.get().SetOutputSymbols(NULL)
2537 self._mfst.get().SetOutputSymbols(syms._table)
2538 self._check_mutating_imethod()
2540 def set_output_symbols(self, _SymbolTable syms):
2542 set_output_symbols(self, syms)
2544 Sets the output symbol table.
2546 Passing None as a value will delete the output symbol table.
2549 syms: A SymbolTable.
2554 See also: `set_input_symbols`.
2556 self._set_output_symbols(syms)
2559 cdef void _set_properties(self, uint64 props, uint64 mask):
2560 self._mfst.get().SetProperties(props, mask)
2562 def set_properties(self, uint64 props, uint64 mask):
2564 set_properties(self, props, mask)
2566 Sets the properties bits.
2569 props: The properties to be set.
2570 mask: A mask to be applied to the `props` argument before setting the
2576 self._set_properties(props, mask)
2579 cdef void _set_start(self, int64 state) except *:
2580 if not self._mfst.get().SetStart(state):
2581 raise FstIndexError("State index out of range")
2582 self._check_mutating_imethod()
2584 def set_start(self, int64 state):
2586 set_start(self, state)
2588 Sets a state to be the initial state state.
2591 state: The integer index of a state.
2597 FstIndexError: State index out of range.
2599 See also: `set_final`.
2601 self._set_start(state)
2604 cdef void _topsort(self) except *:
2605 # TopSort returns False if the FST is cyclic, and thus can't be TopSorted.
2606 if not fst.TopSort(self._mfst.get()):
2607 logging.warning("Cannot topsort cyclic FST.")
2608 self._check_mutating_imethod()
2614 Sorts transitions by state IDs.
2616 This operation destructively topologically sorts the FST, if it is acyclic;
2617 otherwise it remains unchanged. Once sorted, all transitions are from lower
2618 state IDs to higher state IDs
2623 See also: `arcsort`.
2628 cdef void _union(self, _Fst ifst) except *:
2629 fst.Union(self._mfst.get(), deref(ifst._fst))
2630 self._check_mutating_imethod()
2632 def union(self, _Fst ifst):
2636 Computes the union (sum) of two FSTs.
2638 This operation computes the union (sum) of two FSTs. If A transduces string
2639 x to y with weight a and B transduces string w to v with weight b, then
2640 their union transduces x to y with weight a and w to v with weight b.
2643 ifst: The second input FST.
2652 # Pseudo-constructors for _Fst and _MutableFst.
2654 # _init_Fst and _init_MutableFst use an FstClass pointer to instantiate _Fst
2655 # and _MutableFst objects, respectively. The latter function is only safe to
2656 # call when the FST being wrapped is known to be kMutable. The caller can
2657 # safely use it when they have either checked this bit (e.g., by using
2658 # _init_XFst) or have themselves constructed a mutable container for the
2659 # FstClass pointer they're passing (e.g., most of the constructive operations,
2660 # storing their results in a VectorFstClass, a derivative of MutableFstClass).
2662 # _create_Fst constructs an empty VectorFstClass of a user-specified arc type,
2663 # and passes this pointer to _init_MutableFst.
2665 # _read_Fst reads an FST from disk, performing FST conversion if requested, and
2666 # then passes this pointer to _init_XFst.
2668 # The Python class Fst provides a wrapper for these two operations. The former
2669 # can be accessed by calling Fst(...), which acts like a class method, and the
2670 # latter via Fst.read(...), which acts like a static method. This is a bit
2671 # nasty, but totally hidden from the Python user.
2674 cdef _Fst _init_Fst(FstClass_ptr tfst):
2675 if tfst.Properties(fst.kError, True):
2676 raise FstOpError("Operation failed")
2677 cdef _Fst ofst = _Fst.__new__(_Fst)
2678 ofst._fst.reset(<FstClass_ptr> tfst)
2682 cdef _MutableFst _init_MutableFst(MutableFstClass_ptr tfst):
2683 if tfst.Properties(fst.kError, True):
2684 raise FstOpError("Operation failed")
2685 cdef _MutableFst ofst = _MutableFst.__new__(_MutableFst)
2686 ofst._fst.reset(<MutableFstClass_ptr> tfst)
2687 # Makes a copy of it as the derived type! Cool.
2688 ofst._mfst = static_pointer_cast[fst.MutableFstClass, fst.FstClass](ofst._fst)
2692 cdef _Fst _init_XFst(FstClass_ptr tfst):
2693 if tfst.Properties(fst.kMutable, True):
2694 return _init_MutableFst(static_cast[MutableFstClass_ptr](tfst))
2696 return _init_Fst(tfst)
2699 cdef _MutableFst _create_Fst(arc_type=b"standard"):
2700 cdef fst.VectorFstClass *tfst = new fst.VectorFstClass(
2701 <string> tostring(arc_type))
2703 raise FstOpError("Unknown arc type: {!r}".format(arc_type))
2704 return _init_MutableFst(tfst)
2707 cdef _Fst _read_Fst(filename, fst_type=None):
2708 cdef fst.FstClass *tfst = fst.FstClass.Read(tostring(filename))
2710 raise FstIOError("Read failed: {!r}".format(filename))
2711 # Converts if requested.
2712 cdef string fst_type_string
2714 fst_type_string = tostring(fst_type)
2715 if fst_type_string != tfst.FstType():
2716 tfst = fst.Convert(deref(tfst), fst_type_string)
2718 raise FstOpError("Conversion to {!r} failed.".format(fst_type))
2719 return _init_XFst(tfst)
2722 cdef _Fst _deserialize_Fst(fst_string, fst_type=None):
2723 ofst = fst.FstClass.ReadFromString(fst_string)
2724 if fst_type is not None:
2725 fst_type_string = tostring(fst_type)
2726 if fst_type_string != ofst.FstType():
2727 ofst = fst.Convert(deref(ofst), fst_type_string)
2729 raise FstOpError("Conversion to {!r} failed.".format(fst_type))
2730 return _init_XFst(ofst)
2736 Fst(arc_type="standard")
2738 Constructs an empty FST.
2741 arc_type: A string indicating the arc type.
2744 FstError: Unknown arc type.
2747 FstOpError: operation failed.
2750 def __new__(cls, arc_type=b"standard"):
2751 return _create_Fst(arc_type)
2754 def read(filename, fst_type=None):
2756 read(filename, fst_type=None)
2758 Reads an FST from a file.
2761 filename: The string location of the input file.
2762 fst_type: A string indicating the FST type to convert to; no conversion
2763 is performed if omitted or if the FST is already of the desired type.
2769 FstIOError: Read failed.
2770 FstOpError: Read-time conversion failed.
2772 return _read_Fst(filename, fst_type)
2775 def read_from_string(fst_string, fst_type=None):
2777 read_from_string(fst_string, fst_type=None)
2779 Reads an FST from a serialized string.
2782 fst_string: The string containing the serialized FST.
2783 fst_type: A string indicating the FST type to convert to; no conversion
2784 is performed if omitted or if the FST is already of the desired type.
2790 FstIOError: Read failed.
2791 FstOpError: Read-time conversion failed.
2793 See also: `write_to_string`.
2795 return _deserialize_Fst(fst_string, fst_type)
2801 EXPANDED = fst.kExpanded
2802 MUTABLE = fst.kMutable
2804 ACCEPTOR = fst.kAcceptor
2805 NOT_ACCEPTOR = fst.kNotAcceptor
2806 I_DETERMINISTIC = fst.kIDeterministic
2807 NON_I_DETERMINISTIC = fst.kNonIDeterministic
2808 O_DETERMINISTIC = fst.kODeterministic
2809 NON_O_DETERMINISTIC = fst.kNonODeterministic
2810 EPSILONS = fst.kEpsilons
2811 NO_EPSILONS = fst.kNoEpsilons
2812 I_EPSILONS = fst.kIEpsilons
2813 NO_I_EPSILONS = fst.kNoIEpsilons
2814 O_EPSILONS = fst.kOEpsilons
2815 NO_O_EPSILONS = fst.kNoOEpsilons
2816 I_LABEL_SORTED = fst.kILabelSorted
2817 NOT_I_LABEL_SORTED = fst.kNotILabelSorted
2818 O_LABEL_SORTED = fst.kOLabelSorted
2819 NOT_O_LABEL_SORTED = fst.kNotOLabelSorted
2820 WEIGHTED = fst.kWeighted
2821 UNWEIGHTED = fst.kUnweighted
2822 CYCLIC = fst.kCyclic
2823 ACYCLIC = fst.kAcyclic
2824 INITIAL_CYCLIC = fst.kInitialCyclic
2825 INITIAL_ACYCLIC = fst.kInitialAcyclic
2826 TOP_SORTED = fst.kTopSorted
2827 NOT_TOP_SORTED = fst.kNotTopSorted
2828 ACCESSIBLE = fst.kAccessible
2829 NOT_ACCESSIBLE = fst.kNotAccessible
2830 COACCESSIBLE = fst.kCoAccessible
2831 NOT_COACCESSIBLE = fst.kNotCoAccessible
2832 STRING = fst.kString
2833 NOT_STRING = fst.kNotString
2834 WEIGHTED_CYCLES = fst.kWeightedCycles
2835 UNWEIGHTED_CYCLES = fst.kUnweightedCycles
2836 NULL_PROPERTIES = fst.kNullProperties
2837 COPY_PROPERTIES = fst.kCopyProperties
2838 INTRINSIC_PROPERTIES = fst.kIntrinsicProperties
2839 EXTRINSIC_PROPERTIES = fst.kExtrinsicProperties
2840 SET_START_PROPERTIES = fst.kSetStartProperties
2841 SET_FINAL_PROPERTIES = fst.kSetFinalProperties
2842 ADD_STATE_PROPERTIES = fst.kAddStateProperties
2843 ADD_ARC_PROPERTIES = fst.kAddArcProperties
2844 SET_ARC_PROPERTIES = fst.kSetArcProperties
2845 DELETE_STATE_PROPERTIES = fst.kDeleteStatesProperties
2846 DELETE_ARC_PROPERTIES = fst.kDeleteArcsProperties
2847 STATE_SORT_PROPERTIES = fst.kStateSortProperties
2848 ARC_SORT_PROPERTIES = fst.kArcSortProperties
2849 I_LABEL_INVARIANT_PROPERTIES = fst.kILabelInvariantProperties
2850 O_LABEL_INVARIANT_PROPERTIES = fst.kOLabelInvariantProperties
2851 WEIGHT_INVARIANT_PROPERTIES = fst.kWeightInvariantProperties
2852 ADD_SUPERFINAL_PROPERTIES = fst.kAddSuperFinalProperties
2853 RM_SUPERFINAL_PROPERTIES = fst.kRmSuperFinalProperties
2854 BINARY_PROPERTIES = fst.kBinaryProperties
2855 TRINARY_PROPERTIES = fst.kTrinaryProperties
2856 POS_TRINARY_PROPERTIES = fst.kPosTrinaryProperties
2857 NEG_TRINARY_PROPERTIES = fst.kNegTrinaryProperties
2858 FST_PROPERTIES = fst.kFstProperties
2861 ## Arc iterator properties.
2864 ARC_I_LABEL_VALUE = fst.kArcILabelValue
2865 ARC_O_LABEL_VALUE = fst.kArcOLabelValue
2866 ARC_WEIGHT_VALUE = fst.kArcWeightValue
2867 ARC_NEXT_STATE_VALUE = fst.kArcNextStateValue
2868 ARC_NO_CACHE = fst.kArcNoCache
2869 ARC_VALUE_FLAGS = fst.kArcValueFlags
2870 ARC_FLAGS = fst.kArcFlags
2873 ## EncodeMapper properties.
2876 ENCODE_LABELS = fst.kEncodeLabels
2877 ENCODE_WEIGHTS = fst.kEncodeWeights
2878 ENCODE_FLAGS = fst.kEncodeFlags
2881 ## Arc, ArcIterator, and MutableArcIterator.
2884 cdef class Arc(object):
2887 Arc(ilabel, olabel, weight, nextstate)
2889 This class represents an arc while remaining agnostic about the underlying arc
2890 type. Attributes of the arc can be accessed or mutated, and the arc can be
2894 ilabel: The input label.
2895 olabel: The output label.
2896 weight: The arc weight.
2897 nextstate: The destination state for the arc.
2901 return "<Arc at 0x{:x}>".format(id(self))
2903 def __init__(self, int64 ilabel, int64 olabel, weight, int64 nextstate):
2904 cdef fst.WeightClass wc = _get_WeightClass_or_One(b"tropical", weight)
2905 self._arc.reset(new fst.ArcClass(ilabel, olabel, wc, nextstate))
2907 cpdef Arc copy(self):
2908 return Arc(self.ilabel, self.olabel, self.weight, self.nextstate)
2913 return deref(self._arc).ilabel
2915 def __set__(self, int64 value):
2916 deref(self._arc).ilabel = value
2921 return deref(self._arc).olabel
2923 def __set__(self, int64 value):
2924 deref(self._arc).olabel = value
2929 cdef Weight weight = Weight.__new__(Weight)
2930 weight._weight.reset(new fst.WeightClass(deref(self._arc).weight))
2933 def __set__(self, weight):
2934 deref(self._arc).weight = _get_WeightClass_or_One(b"tropical", weight)
2939 return deref(self._arc).nextstate
2941 def __set__(self, int64 value):
2942 deref(self._arc).nextstate = value
2945 cdef Arc _init_Arc(const fst.ArcClass &arc):
2946 cdef Weight weight = Weight.__new__(Weight)
2947 weight._weight.reset(new fst.WeightClass(arc.weight))
2948 return Arc(<int64> arc.ilabel, <int64> arc.olabel, weight,
2949 <int64> arc.nextstate)
2952 cdef class ArcIterator(object):
2955 ArcIterator(ifst, state)
2957 This class is used for iterating over the arcs leaving some state of an FST.
2961 return "<ArcIterator at 0x{:x}>".format(id(self))
2963 def __init__(self, _Fst ifst, int64 state):
2964 if not ifst._fst.get().ValidStateId(state):
2965 raise FstIndexError("State index out of range")
2966 # Makes copy of the shared_ptr, potentially extending the FST's lifetime.
2967 self._fst = ifst._fst
2968 self._aiter.reset(new fst.ArcIteratorClass(deref(self._fst), state))
2970 # This just registers this class as a possible iterator.
2974 # Magic method used to get a Pythonic API out of the C++ API.
2978 result = self.value()
2982 cpdef bool done(self):
2986 Indicates whether the iterator is exhausted or not.
2989 True if the iterator is exhausted, False otherwise.
2991 return self._aiter.get().Done()
2993 cpdef uint32 flags(self):
2997 Returns the current iterator behavioral flags.
3000 The current iterator behavioral flags as an integer.
3002 return self._aiter.get().Flags()
3004 cpdef void next(self):
3008 Advances the iterator.
3010 self._aiter.get().Next()
3012 cpdef size_t position(self):
3016 Returns the position of the iterator.
3019 The iterator's position, expressed as an integer.
3021 return self._aiter.get().Position()
3023 cpdef void reset(self):
3027 Resets the iterator to the initial position.
3029 self._aiter.get().Reset()
3031 cpdef void seek(self, size_t a):
3035 Advance the iterator to a new position.
3038 a: The position to seek to.
3040 self._aiter.get().Seek(a)
3042 cpdef void set_flags(self, uint32 flags, uint32 mask):
3044 set_flags(self, flags, mask)
3046 Sets the current iterator behavioral flags.
3049 flags: The properties to be set.
3050 mask: A mask to be applied to the `flags` argument before setting them.
3052 self._aiter.get().SetFlags(flags, mask)
3054 cpdef object value(self):
3058 Returns the current arc.
3060 return _init_Arc(self._aiter.get().Value())
3063 cdef class MutableArcIterator(object):
3066 MutableArcIterator(ifst, state)
3068 This class is used for iterating over the arcs leaving some state of an FST,
3069 also permitting mutation of the current arc.
3073 return "<MutableArcIterator at 0x{:x}>".format(id(self))
3075 def __init__(self, _MutableFst ifst, int64 state):
3076 if not ifst._fst.get().ValidStateId(state):
3077 raise FstIndexError("State index out of range")
3078 # Makes copy of the shared_ptr, potentially extending the FST's lifetime.
3079 self._mfst = ifst._mfst
3080 self._aiter.reset(new fst.MutableArcIteratorClass(ifst._mfst.get(), state))
3082 cpdef bool done(self):
3086 Indicates whether the iterator is exhausted or not.
3089 True if the iterator is exhausted, False otherwise.
3091 return self._aiter.get().Done()
3093 cpdef uint32 flags(self):
3097 Returns the current iterator behavioral flags.
3100 The current iterator behavioral flags as an integer.
3102 return self._aiter.get().Flags()
3104 cpdef void next(self):
3108 Advances the iterator.
3110 self._aiter.get().Next()
3112 cpdef size_t position(self):
3116 Returns the position of the iterator.
3119 The iterator's position, expressed as an integer.
3121 return self._aiter.get().Position()
3123 cpdef void reset(self):
3127 Resets the iterator to the initial position.
3129 self._aiter.get().Reset()
3131 cpdef void seek(self, size_t a):
3135 Advance the iterator to a new position.
3138 a: The position to seek to.
3140 self._aiter.get().Seek(a)
3142 cpdef void set_flags(self, uint32 flags, uint32 mask):
3144 set_flags(self, flags, mask)
3146 Sets the current iterator behavioral flags.
3149 flags: The properties to be set.
3150 mask: A mask to be applied to the `flags` argument before setting them.
3152 self._aiter.get().SetFlags(flags, mask)
3154 cpdef void set_value(self, Arc arc):
3156 set_value(self, arc)
3158 Replace the current arc with a new arc.
3161 arc: The arc to replace the current arc with.
3163 self._aiter.get().SetValue(deref(arc._arc))
3165 cpdef object value(self):
3169 Returns the current arc.
3171 return _init_Arc(self._aiter.get().Value())
3177 cdef class StateIterator(object):
3182 This class is used for iterating over the states in an FST.
3186 return "<StateIterator at 0x{:x}>".format(id(self))
3188 def __init__(self, _Fst ifst):
3189 # Makes copy of the shared_ptr, potentially extending the FST's lifetime.
3190 self._fst = ifst._fst
3191 self._siter.reset(new fst.StateIteratorClass(deref(self._fst)))
3193 # This just registers this class as a possible iterator.
3197 # Magic method used to get a Pythonic API out of the C++ API.
3201 cdef int64 result = self.value()
3205 cpdef bool done(self):
3209 Indicates whether the iterator is exhausted or not.
3212 True if the iterator is exhausted, False otherwise.
3214 return self._siter.get().Done()
3216 cpdef void next(self):
3220 Advances the iterator.
3222 self._siter.get().Next()
3224 cpdef void reset(self):
3228 Resets the iterator to the initial position.
3230 self._siter.get().Reset()
3232 cpdef int64 value(self):
3236 Returns the current state index.
3238 return self._siter.get().Value()
3244 cdef _Fst _map(_Fst ifst,
3245 float delta=fst.kDelta,
3246 map_type=b"identity",
3248 cdef fst.MapType map_type_enum
3249 if not fst.GetMapType(tostring(map_type), addr(map_type_enum)):
3250 raise FstArgError("Unknown map type: {!r}".format(map_type))
3251 cdef fst.WeightClass wc = (_get_WeightClass_or_One(ifst.weight_type(),
3252 weight) if map_type_enum == fst.TIMES_MAPPER else
3253 _get_WeightClass_or_Zero(ifst.weight_type(), weight))
3254 return _init_XFst(fst.Map(deref(ifst._fst), map_type_enum, delta, wc))
3257 cpdef _Fst arcmap(_Fst ifst,
3258 float delta=fst.kDelta,
3259 map_type=b"identity",
3262 arcmap(ifst, delta=0.0009765625, map_type="identity", weight=None)
3264 Constructively applies a transform to all arcs and final states.
3266 This operation transforms each arc and final state in the input FST using
3267 one of the following:
3269 * identity: maps to self.
3270 * input_epsilon: replaces all input labels with epsilon.
3271 * invert: reciprocates all non-Zero weights.
3272 * output_epsilon: replaces all output labels with epsilon.
3273 * plus: adds a constant to all weights.
3274 * quantize: quantizes weights.
3275 * rmweight: replaces all non-Zero weights with 1.
3276 * superfinal: redirects final states to a new superfinal state.
3277 * times: right-multiplies a constant to all weights.
3278 * to_log: converts weights to the log semiring.
3279 * to_log64: converts weights to the log64 semiring.
3280 * to_standard: converts weights to the tropical ("standard") semiring.
3283 ifst: The input FST.
3284 delta: Comparison/quantization delta (ignored unless `map_type` is
3286 map_type: A string matching a known mapping operation (see above).
3287 weight: A Weight or weight string passed to the arc-mapper; this is ignored
3288 unless `map_type` is `plus` (in which case it defaults to semiring Zero)
3289 or `times` (in which case it defaults to semiring One).
3292 An FST with arcs and final states remapped.
3295 FstArgError: Unknown map type.
3297 See also: `statemap`.
3299 return _map(ifst, delta, map_type, weight)
3302 cpdef _MutableFst compose(_Fst ifst1,
3304 compose_filter=b"auto",
3307 compose(ifst1, ifst2, compose_filter="auto", connect=True)
3309 Constructively composes two FSTs.
3311 This operation computes the composition of two FSTs. If A transduces string
3312 x to y with weight a and B transduces y to z with weight b, then their
3313 composition transduces string x to z with weight a \otimes b. The output
3314 labels of the first transducer or the input labels of the second transducer
3315 must be sorted (or otherwise support appropriate matchers).
3318 ifst1: The first input FST.
3319 ifst2: The second input FST.
3320 compose_filter: A string matching a known composition filter; one of:
3321 "alt_sequence", "auto", "match", "null", "sequence", "trivial".
3322 connect: Should output be trimmed?
3327 See also: `arcsort`.
3329 cdef VectorFstClass_ptr tfst = new fst.VectorFstClass(ifst1.arc_type())
3330 cdef unique_ptr[fst.ComposeOptions] opts
3331 opts.reset(new fst.ComposeOptions(connect,
3332 _get_compose_filter(tostring(compose_filter))))
3333 fst.Compose(deref(ifst1._fst), deref(ifst2._fst), tfst, deref(opts))
3334 return _init_MutableFst(tfst)
3337 cpdef _Fst convert(_Fst ifst, fst_type=None):
3339 convert(ifst, fst_type=None)
3341 Constructively converts an FST to a new internal representation.
3344 ifst: The input FST.
3345 fst_type: A string indicating the FST type to convert to, or None if
3346 no conversion is desired.
3349 An equivalent Fst converted to the desired FST type.
3352 FstOpError: Conversion failed.
3354 cdef string fst_type_string = "" if fst_type is None else tostring(fst_type)
3355 cdef FstClass_ptr tfst = fst.Convert(deref(ifst._fst), fst_type_string)
3356 # Script-land Convert returns the null pointer to signal failure.
3358 raise FstOpError("Conversion to {!r} failed".format(fst_type))
3359 return _init_XFst(tfst)
3362 cpdef _MutableFst determinize(_Fst ifst,
3363 float delta=fst.kDelta,
3364 det_type=b"functional",
3365 int64 nstate=fst.kNoStateId,
3366 int64 subsequential_label=0,
3368 bool increment_subsequential_label=False):
3370 determinize(ifst, delta=0.0009765625, det_type="functional", nstate=-1,
3371 subsequential_label=0, weight=None,
3372 incremental_subsequential_label=False)
3374 Constructively determinizes a weighted FST.
3376 This operations creates an equivalent FST that has the property that no
3377 state has two transitions with the same input label. For this algorithm,
3378 epsilon transitions are treated as regular symbols (cf. `rmepsilon`).
3381 ifst: The input FST.
3382 delta: Comparison/quantization delta.
3383 det_type: Type of determinization; one of: "functional" (input transducer is
3384 functional), "nonfunctional" (input transducer is not functional) and
3385 disambiguate" (input transducer is not functional but only keep the min
3386 of ambiguous outputs).
3387 nstate: State number threshold.
3388 subsequential_label: Input label of arc corresponding to residual final
3389 output when producing a subsequential transducer.
3390 weight: A Weight or weight string indicating the desired weight threshold
3391 below which paths are pruned; if omitted, no paths are pruned.
3392 increment_subsequential_label: Increment subsequential when creating
3393 several arcs for the residual final output at a given state.
3396 An equivalent deterministic FST.
3399 FstArgError: Unknown determinization type.
3401 See also: `disambiguate`, `rmepsilon`.
3403 cdef VectorFstClass_ptr tfst = new fst.VectorFstClass(ifst.arc_type())
3404 # Threshold is set to semiring Zero (no pruning) if weight unspecified.
3405 cdef fst.WeightClass wc = _get_WeightClass_or_Zero(ifst.weight_type(),
3407 cdef fst.DeterminizeType determinize_type_enum
3408 if not fst.GetDeterminizeType(tostring(det_type),
3409 addr(determinize_type_enum)):
3410 raise FstArgError("Unknown determinization type: {!r}".format(det_type))
3411 cdef unique_ptr[fst.DeterminizeOptions] opts
3412 opts.reset(new fst.DeterminizeOptions(delta, wc, nstate, subsequential_label,
3413 determinize_type_enum,
3414 increment_subsequential_label))
3415 fst.Determinize(deref(ifst._fst), tfst, deref(opts))
3416 return _init_MutableFst(tfst)
3419 cpdef _MutableFst difference(_Fst ifst1,
3421 compose_filter=b"auto",
3424 difference(ifst1, ifst2, compose_filter="auto", connect=True)
3426 Constructively computes the difference of two FSTs.
3428 This operation computes the difference between two FSAs. Only strings that are
3429 in the first automaton but not in second are retained in the result. The first
3430 argument must be an acceptor; the second argument must be an unweighted,
3431 epsilon-free, deterministic acceptor. The output labels of the first
3432 transducer or the input labels of the second transducer must be sorted (or
3433 otherwise support appropriate matchers).
3436 ifst1: The first input FST.
3437 ifst2: The second input FST.
3438 compose_filter: A string matching a known composition filter; one of:
3439 "alt_sequence", "auto", "match", "null", "sequence", "trivial".
3440 connect: Should the output FST be trimmed?
3443 An FST representing the difference of the FSTs.
3445 cdef VectorFstClass_ptr tfst = new fst.VectorFstClass(ifst1.arc_type())
3446 cdef unique_ptr[fst.ComposeOptions] opts
3447 opts.reset(new fst.ComposeOptions(connect, _get_compose_filter(
3448 tostring(compose_filter))))
3449 fst.Difference(deref(ifst1._fst), deref(ifst2._fst), tfst, deref(opts))
3450 return _init_MutableFst(tfst)
3453 cpdef _MutableFst disambiguate(_Fst ifst,
3454 float delta=fst.kDelta,
3455 int64 nstate=fst.kNoStateId,
3456 int64 subsequential_label=0,
3459 disambiguate(ifst, delta=0.0009765625, nstate=-1, subsequential_label=0,
3462 Constructively disambiguates a weighted transducer.
3464 This operation disambiguates a weighted transducer. The result will be an
3465 equivalent FST that has the property that no two successful paths have the
3466 same input labeling. For this algorithm, epsilon transitions are treated as
3467 regular symbols (cf. `rmepsilon`).
3470 ifst: The input FST.
3471 delta: Comparison/quantization delta.
3472 nstate: State number threshold.
3473 subsequential_label: Input label of arc corresponding to residual final
3474 output when producing a subsequential transducer.
3475 weight: A Weight or weight string indicating the desired weight threshold
3476 below which paths are pruned; if omitted, no paths are pruned.
3479 An equivalent disambiguated FST.
3481 See also: `determinize`, `rmepsilon`.
3483 cdef VectorFstClass_ptr tfst = new fst.VectorFstClass(ifst.arc_type())
3484 # Threshold is set to semiring Zero (no pruning) if no weight is specified.
3485 cdef fst.WeightClass wc = _get_WeightClass_or_Zero(ifst.weight_type(),
3487 cdef unique_ptr[fst.DisambiguateOptions] opts
3488 opts.reset(new fst.DisambiguateOptions(delta, wc, nstate,
3489 subsequential_label))
3490 fst.Disambiguate(deref(ifst._fst), tfst, deref(opts))
3491 return _init_MutableFst(tfst)
3494 cpdef _MutableFst epsnormalize(_Fst ifst, bool eps_norm_output=False):
3496 epsnormalize(ifst, eps_norm_output=False)
3498 Constructively epsilon-normalizes an FST.
3500 This operation creates an equivalent FST that is epsilon-normalized. An
3501 acceptor is epsilon-normalized if it it is epsilon-removed (cf. `rmepsilon`).
3502 A transducer is input epsilon-normalized if, in addition, along any path, all
3503 arcs with epsilon input labels follow all arcs with non-epsilon input labels.
3504 Output epsilon-normalized is defined similarly. The input FST must be
3508 ifst: The input FST.
3509 eps_norm_output: Should the FST be output epsilon-normalized?
3512 An equivalent epsilon-normalized FST.
3514 See also: `rmepsilon`.
3516 cdef VectorFstClass_ptr tfst = new fst.VectorFstClass(ifst.arc_type())
3517 fst.EpsNormalize(deref(ifst._fst), tfst, fst.EPS_NORM_OUTPUT if
3518 eps_norm_output else
3520 return _init_MutableFst(tfst)
3523 cpdef bool equal(_Fst ifst1, _Fst ifst2, float delta=fst.kDelta):
3525 equal(ifst1, ifst2, delta=0.0009765625)
3529 This function tests whether two FSTs have the same states with the same
3530 numbering and the same transitions with the same labels and weights in the
3534 ifst1: The first input FST.
3535 ifst2: The second input FST.
3536 delta: Comparison/quantization delta.
3539 True if the FSTs satisfy the above condition, else False.
3541 See also: `equivalent`, `isomorphic`, `randequivalent`.
3543 return fst.Equal(deref(ifst1._fst), deref(ifst2._fst), delta)
3546 cpdef bool equivalent(_Fst ifst1, _Fst ifst2, float delta=fst.kDelta) except *:
3548 equivalent(ifst1, ifst2, delta=0.0009765625)
3550 Are the two acceptors equivalent?
3552 This operation tests whether two epsilon-free deterministic weighted
3553 acceptors are equivalent, that is if they accept the same strings with the
3557 ifst1: The first input FST.
3558 ifst2: The second input FST.
3559 delta: Comparison/quantization delta.
3562 True if the FSTs satisfy the above condition, else False.
3565 FstOpError: Equivalence test encountered error.
3567 See also: `equal`, `isomorphic`, `randequivalent`.
3570 cdef bool result = fst.Equivalent(deref(ifst1._fst), deref(ifst2._fst), delta,
3573 raise FstOpError("Equivalence test encountered error")
3577 cpdef _MutableFst intersect(_Fst ifst1,
3579 compose_filter=b"auto",
3582 intersect(ifst1, ifst2, compose_filter="auto", connect=True)
3584 Constructively intersects two FSTs.
3586 This operation computes the intersection (Hadamard product) of two FSTs.
3587 Only strings that are in both automata are retained in the result. The two
3588 arguments must be acceptors. One of the arguments must be label-sorted (or
3589 otherwise support appropriate matchers).
3592 ifst1: The first input FST.
3593 ifst2: The second input FST.
3594 compose_filter: A string matching a known composition filter; one of:
3595 "alt_sequence", "auto", "match", "null", "sequence", "trivial".
3596 connect: Should output be trimmed?
3601 cdef VectorFstClass_ptr tfst = new fst.VectorFstClass(ifst1.arc_type())
3602 cdef unique_ptr[fst.ComposeOptions] opts
3603 opts.reset(new fst.ComposeOptions(connect,
3604 _get_compose_filter(tostring(compose_filter))))
3605 fst.Intersect(deref(ifst1._fst), deref(ifst2._fst), tfst, deref(opts))
3606 return _init_MutableFst(tfst)
3609 cpdef bool isomorphic(_Fst ifst1, _Fst ifst2, float delta=fst.kDelta):
3611 isomorphic(ifst1, ifst2, delta=0.0009765625)
3613 Are the two acceptors isomorphic?
3615 This operation determines if two transducers with a certain required
3616 determinism have the same states, irrespective of numbering, and the same
3617 transitions with the same labels and weights, irrespective of ordering. In
3618 other words, FSTs A, B are isomorphic if and only if the states of A can be
3619 renumbered and the transitions leaving each state reordered so the two are
3620 equal (according to the definition given in `equal`).
3623 ifst1: The first input FST.
3624 ifst2: The second input FST.
3625 delta: Comparison/quantization delta.
3628 True if the two transducers satisfy the above condition, else False.
3630 See also: `equal`, `equivalent`, `randequivalent`.
3632 return fst.Isomorphic(deref(ifst1._fst), deref(ifst2._fst), delta)
3635 cpdef _MutableFst prune(_Fst ifst,
3636 float delta=fst.kDelta,
3637 int64 nstate=fst.kNoStateId,
3640 prune(ifst, delta=0.0009765625, nstate=-1, weight=None)
3642 Constructively removes paths with weights below a certain threshold.
3644 This operation deletes states and arcs in the input FST that do not belong
3645 to a successful path whose weight is no more (w.r.t the natural semiring
3646 order) than the threshold t \otimes-times the weight of the shortest path in
3647 the input FST. Weights must be commutative and have the path property.
3650 ifst: The input FST.
3651 delta: Comparison/quantization delta.
3652 nstate: State number threshold.
3653 weight: A Weight or weight string indicating the desired weight threshold
3654 below which paths are pruned; if omitted, no paths are pruned.
3659 See also: The destructive variant.
3661 cdef VectorFstClass_ptr tfst = new fst.VectorFstClass(ifst.arc_type())
3662 cdef fst.WeightClass wc = _get_WeightClass_or_Zero(ifst.weight_type(), weight)
3663 fst.Prune(deref(ifst._fst), tfst, wc, nstate, delta)
3664 return _init_MutableFst(tfst)
3667 cpdef _MutableFst push(_Fst ifst,
3668 float delta=fst.kDelta,
3669 bool push_weights=False,
3670 bool push_labels=False,
3671 bool remove_common_affix=False,
3672 bool remove_total_weight=False,
3673 bool to_final=False):
3675 push(ifst, delta=0.0009765625, push_weights=False, push_labels=False,
3676 remove_common_affix=False, remove_total_weight=False, to_final=False)
3678 Constructively pushes weights/labels towards initial or final states.
3680 This operation produces an equivalent transducer by pushing the weights
3681 and/or the labels towards the initial state or toward the final states.
3683 When pushing weights towards the initial state, the sum of the weight of the
3684 outgoing transitions and final weight at any non-initial state is equal to 1
3685 in the resulting machine. When pushing weights towards the final states, the
3686 sum of the weight of the incoming transitions at any state is equal to 1.
3687 Weights need to be left distributive when pushing towards the initial state
3688 and right distributive when pushing towards the final states.
3690 Pushing labels towards the initial state consists in minimizing at every
3691 state the length of the longest common prefix of the output labels of the
3692 outgoing paths. Pushing labels towards the final states consists in
3693 minimizing at every state the length of the longest common suffix of the
3694 output labels of the incoming paths.
3697 ifst: The input FST.
3698 delta: Comparison/quantization delta.
3699 push_weights: Should weights be pushed?
3700 push_labels: Should labels be pushed?
3701 remove_common_affix: If pushing labels, should common prefix/suffix be
3703 remove_total_weight: If pushing weights, should total weight be removed?
3704 to_final: Push towards final states?
3707 An equivalent pushed FST.
3709 See also: The destructive variant.
3711 # This is copied, almost verbatim, from ./fstpush.cc.
3712 cdef VectorFstClass_ptr tfst = new fst.VectorFstClass(ifst.arc_type())
3713 cdef uint32 flags = fst.GetPushFlags(push_weights, push_labels,
3714 remove_common_affix, remove_total_weight)
3715 fst.Push(deref(ifst._fst), tfst, flags, fst.GetReweightType(to_final), delta)
3716 return _init_MutableFst(tfst)
3719 cpdef bool randequivalent(_Fst ifst1,
3722 float delta=fst.kDelta,
3725 int32 max_length=INT32_MAX) except *:
3727 randequivalent(ifst1, ifst2, npath=1, delta=0.0009765625, seed=0,
3728 select="uniform", max_length=2147483647)
3730 Are two acceptors stochastically equivalent?
3732 This operation tests whether two FSTs are equivalent by randomly generating
3733 paths alternatively in each of the two FSTs. For each randomly generated path,
3734 the algorithm computes for each of the two FSTs the sum of the weights of all
3735 the successful paths sharing the same input and output labels as the randomly
3736 generated path and checks that these two values are within `delta`.
3739 ifst1: The first input FST.
3740 ifst2: The second input FST.
3741 npath: The number of random paths to generate.
3742 delta: Comparison/quantization delta.
3743 seed: An optional seed value for random path generation; if zero, the
3744 current time and process ID is used.
3745 select: A string matching a known random arc selection type; one of:
3746 "uniform", "log_prob", "fast_log_prob".
3747 max_length: The maximum length of each random path.
3750 True if the two transducers satisfy the above condition, else False.
3753 FstOpError: Random equivalence test encountered error.
3755 See also: `equal`, `equivalent`, `isomorphic`, `randgen`.
3757 cdef fst.RandArcSelection ras = _get_rand_arc_selection(tostring(select))
3758 cdef unique_ptr[fst.RandGenOptions[fst.RandArcSelection]] opts
3759 # The three trailing options will be ignored by RandEquivalent.
3760 opts.reset(new fst.RandGenOptions[fst.RandArcSelection](ras, max_length,
3763 seed = time(NULL) + getpid()
3765 cdef bool result = fst.RandEquivalent(deref(ifst1._fst), deref(ifst2._fst),
3766 npath, delta, seed, deref(opts),
3769 raise FstOpError("Random equivalence test encountered error")
3773 cpdef _MutableFst randgen(_Fst ifst,
3777 int32 max_length=INT32_MAX,
3778 bool weighted=False,
3779 bool remove_total_weight=False):
3781 randgen(ifst, npath=1, seed=0, select="uniform", max_length=2147483647,
3782 weight=False, remove_total_weight=False)
3784 Randomly generate successful paths in an FST.
3786 This operation randomly generates a set of successful paths in the input FST.
3787 This relies on a mechanism for selecting arcs, specified using the `select`
3788 argument. The default selector, "uniform", randomly selects a transition
3789 using a uniform distribution. The "log_prob" selector randomly selects a
3790 transition w.r.t. the weights treated as negative log probabilities after
3791 normalizing for the total weight leaving the state. In all cases, finality is
3792 treated as a transition to a super-final state.
3795 ifst: The input FST.
3796 npath: The number of random paths to generate.
3797 seed: An optional seed value for random path generation; if zero, the
3798 current time and process ID is used.
3799 select: A string matching a known random arc selection type; one of:
3800 "uniform", "log_prob", "fast_log_prob".
3801 max_length: The maximum length of each random path.
3802 weighted: Should the output be weighted by path count?
3803 remove_total_weight: Should the total weight be removed (ignored when
3804 `weighted` is False)?
3807 An FST containing one or more random paths.
3809 See also: `randequivalent`.
3811 cdef fst.RandArcSelection ras = _get_rand_arc_selection(tostring(select))
3812 cdef unique_ptr[fst.RandGenOptions[fst.RandArcSelection]] opts
3813 opts.reset(new fst.RandGenOptions[fst.RandArcSelection](ras, max_length,
3815 remove_total_weight))
3816 cdef VectorFstClass_ptr tfst = new fst.VectorFstClass(ifst.arc_type())
3818 seed = time(NULL) + getpid()
3819 fst.RandGen(deref(ifst._fst), tfst, seed, deref(opts))
3820 return _init_MutableFst(tfst)
3823 cpdef _MutableFst replace(pairs,
3824 call_arc_labeling=b"input",
3825 return_arc_labeling=b"neither",
3826 bool epsilon_on_replace=False,
3827 int64 return_label=0):
3829 replace(pairs, call_arc_labeling="input", return_arc_labeling="neither",
3830 epsilon_on_replace=False, return_label=0)
3832 Recursively replaces arcs in the FST with other FST(s).
3834 This operation performs the dynamic replacement of arcs in one FST with
3835 another FST, allowing the definition of FSTs analogous to RTNs. It takes as
3836 input a set of pairs of a set of pairs formed by a non-terminal label and
3837 its corresponding FST, and a label identifying the root FST in that set.
3838 The resulting FST is obtained by taking the root FST and recursively replacing
3839 each arc having a nonterminal as output label by its corresponding FST. More
3840 precisely, an arc from state s to state d with (nonterminal) output label n in
3841 this FST is replaced by redirecting this "call" arc to the initial state of a
3842 copy F of the FST for n, and adding "return" arcs from each final state of F
3843 to d. Optional arguments control how the call and return arcs are labeled; by
3844 default, the only non-epsilon label is placed on the call arc.
3848 pairs: An iterable of (nonterminal label, FST) pairs, where the former is an
3849 unsigned integer and the latter is an Fst instance.
3850 call_arc_labeling: A string indicating which call arc labels should be
3851 non-epsilon. One of: "input" (default), "output", "both", "neither".
3852 This value is set to "neither" if epsilon_on_replace is True.
3853 return_arc_labeling: A string indicating which return arc labels should be
3854 non-epsilon. One of: "input", "output", "both", "neither" (default).
3855 This value is set to "neither" if epsilon_on_replace is True.
3856 epsilon_on_replace: Should call and return arcs be epsilon arcs? If True,
3857 this effectively overrides call_arc_labeling and return_arc_labeling,
3858 setting both to "neither".
3859 return_label: The integer label for return arcs.
3862 An FST resulting from expanding the input RTN.
3864 cdef vector[fst.LabelFstClassPair] _pairs
3865 cdef int64 root_label
3869 (root_label, ifst) = next(it)
3870 _pairs.push_back(fst.LabelFstClassPair(root_label, ifst._fst.get()))
3871 cdef VectorFstClass_ptr tfst = new fst.VectorFstClass(ifst.arc_type())
3872 for (label, ifst) in it:
3873 _pairs.push_back(fst.LabelFstClassPair(label, ifst._fst.get()))
3874 cdef fst.ReplaceLabelType cal = _get_replace_label_type(
3875 tostring(call_arc_labeling), epsilon_on_replace)
3876 cdef fst.ReplaceLabelType ral = _get_replace_label_type(
3877 tostring(return_arc_labeling), epsilon_on_replace)
3878 cdef unique_ptr[fst.ReplaceOptions] opts
3879 opts.reset(new fst.ReplaceOptions(root_label, cal, ral, return_label))
3880 fst.Replace(_pairs, tfst, deref(opts))
3881 return _init_MutableFst(tfst)
3884 cpdef _MutableFst reverse(_Fst ifst, bool require_superinitial=True):
3886 reverse(ifst, require_superinitial=True)
3888 Constructively reverses an FST's transduction.
3890 This operation reverses an FST. If A transduces string x to y with weight a,
3891 then the reverse of A transduces the reverse of x to the reverse of y with
3892 weight a.Reverse(). (Typically, a = a.Reverse() and Arc = RevArc, e.g.,
3893 TropicalWeight and LogWeight.) In general, e.g., when the weights only form a
3894 left or right semiring, the output arc type must match the input arc type.
3897 ifst: The input FST.
3898 require_superinitial: Should a superinitial state be created?
3903 cdef VectorFstClass_ptr tfst = new fst.VectorFstClass(ifst.arc_type())
3904 fst.Reverse(deref(ifst._fst), tfst, require_superinitial)
3905 return _init_MutableFst(tfst)
3908 cpdef _MutableFst rmepsilon(_Fst ifst,
3910 float delta=fst.kDelta,
3911 int64 nstate=fst.kNoStateId,
3916 rmepsilon(ifst, connect=True, delta=0.0009765625, nstate=-1,
3917 queue_type="auto", reverse=False, weight=None)
3919 Constructively removes epsilon transitions from an FST.
3921 This operation removes epsilon transitions (those where both input and output
3922 labels are epsilon) from an FST.
3925 ifst: The input FST.
3926 connect: Should output be trimmed?
3927 delta: Comparison/quantization delta.
3928 nstate: State number threshold.
3929 queue_type: A string matching a known queue type; one of: "auto", "fifo",
3930 "lifo", "shortest", "state", "top".
3931 reverse: Should epsilon transitions be removed in reverse order?
3932 weight: A string indicating the desired weight threshold; paths with
3933 weights below this threshold will be pruned.
3936 An equivalent FST with no epsilon transitions.
3938 cdef fst.WeightClass wc = _get_WeightClass_or_Zero(ifst.weight_type(), weight)
3939 cdef unique_ptr[fst.RmEpsilonOptions] opts
3940 opts.reset(new fst.RmEpsilonOptions(_get_queue_type(tostring(queue_type)),
3941 delta, connect, wc, nstate))
3942 cdef VectorFstClass_ptr tfst = new fst.VectorFstClass(ifst.arc_type())
3943 fst.RmEpsilon(deref(ifst._fst), tfst, reverse, deref(opts))
3944 return _init_MutableFst(tfst)
3947 # Pure C++ helper for shortestdistance.
3950 cdef vector[fst.WeightClass] *_shortestdistance(_Fst ifst,
3951 float delta=fst.kDelta, int64 nstate=fst.kNoStateId, queue_type=b"auto",
3952 bool reverse=False) except *:
3953 cdef unique_ptr[vector[fst.WeightClass]] distance
3954 distance.reset(new vector[fst.WeightClass]())
3955 # For scoping reasons, these have to be declared here even though they may
3956 # not be used in all cases.
3957 cdef unique_ptr[fst.ShortestDistanceOptions] opts
3959 # Only the simpler signature supports shortest distance to final states;
3960 # `nstate` and `queue_type` arguments are ignored.
3961 fst.ShortestDistance(deref(ifst._fst), distance.get(), True, delta)
3963 opts.reset(new fst.ShortestDistanceOptions(
3964 _get_queue_type(tostring(queue_type)), fst.ANY_ARC_FILTER, nstate,
3966 fst.ShortestDistance(deref(ifst._fst), distance.get(), deref(opts))
3967 return distance.release()
3970 def shortestdistance(_Fst ifst,
3971 float delta=fst.kDelta,
3972 int64 nstate=fst.kNoStateId,
3974 bool reverse=False):
3976 shortestdistance(ifst, delta=0.0009765625, nstate=-1, queue_type="auto",
3979 Compute the shortest distance from the initial or final state.
3981 This operation computes the shortest distance from the initial state (when
3982 `reverse` is False) or from every state to the final state (when `reverse` is
3983 True). The shortest distance from p to q is the \otimes-sum of the weights of
3984 all the paths between p and q. The weights must be right (if `reverse` is
3985 False) or left (if `reverse` is True) distributive, and k-closed (i.e., 1
3986 \otimes x \otimes x^2 \otimes ... \otimes x^{k + 1} = 1 \otimes x \otimes x^2
3987 \otimes ... \otimes x^k; e.g., TropicalWeight).
3990 ifst: The input FST.
3991 delta: Comparison/quantization delta.
3992 nstate: State number threshold (this is ignored if `reverse` is True).
3993 queue_type: A string matching a known queue type; one of: "auto", "fifo",
3994 "lifo", "shortest", "state", "top" (this is ignored if `reverse` is
3996 reverse: Should the reverse distance (from each state to the final state)
4000 A list of Weight objects representing the shortest distance for each state.
4002 cdef unique_ptr[vector[fst.WeightClass]] distance
4003 distance.reset(_shortestdistance(ifst, delta, nstate, queue_type, reverse))
4004 # Packs the distances, as strings, into a Python list.
4005 cdef string weight_type = ifst.weight_type()
4007 # This is just the Cython version of the normal vector iteration idiom.
4008 cdef vector[fst.WeightClass].iterator it = distance.get().begin()
4009 while it != distance.get().end():
4010 result.append(Weight(weight_type, deref(it).ToString()))
4015 cpdef _MutableFst shortestpath(_Fst ifst,
4016 float delta=fst.kDelta,
4018 int64 nstate=fst.kNoStateId,
4023 shortestpath(ifst, delta=0.0009765625, nshortest=1, nstate=-1,
4024 queue_type="auto", unique=False, weight=None)
4026 Construct an FST containing the shortest path(s) in the input FST.
4028 This operation produces an FST containing the n-shortest paths in the input
4029 FST. The n-shortest paths are the n-lowest weight paths w.r.t. the natural
4030 semiring order. The single path that can be read from the ith of at most n
4031 transitions leaving the initial state of the resulting FST is the ith
4032 shortest path. The weights need to be right distributive and have the path
4033 property. They also need to be left distributive as well for n-shortest with
4034 n > 1 (e.g., TropicalWeight).
4037 ifst: The input FST.
4038 delta: Comparison/quantization delta.
4039 nshortest: The number of paths to return.
4040 nstate: State number threshold.
4041 queue_type: A string matching a known queue type; one of: "auto", "fifo",
4042 "lifo", "shortest", "state", "top".
4043 unique: Should the resulting FST only contain distinct paths? (Requires
4044 the input FST to be an acceptor; epsilons are treated as if they are
4046 weight: A Weight or weight string indicating the desired weight threshold
4047 below which paths are pruned; if omitted, no paths are pruned.
4050 An FST containing the n-shortest paths.
4052 cdef VectorFstClass_ptr tfst = new fst.VectorFstClass(ifst.arc_type())
4053 cdef unique_ptr[vector[fst.WeightClass]] distance
4054 distance.reset(new vector[fst.WeightClass]())
4055 # Threshold is set to semiring Zero (no pruning) if no weight is specified.
4056 cdef fst.WeightClass wc = _get_WeightClass_or_Zero(ifst.weight_type(), weight)
4057 cdef unique_ptr[fst.ShortestPathOptions] opts
4058 opts.reset(new fst.ShortestPathOptions(_get_queue_type(tostring(queue_type)),
4059 nshortest, unique, False, delta,
4061 fst.ShortestPath(deref(ifst._fst), tfst, distance.get(), deref(opts))
4062 return _init_MutableFst(tfst)
4065 cpdef _Fst statemap(_Fst ifst, map_type):
4067 state_map(ifst, map_type)
4069 Constructively applies a transform to all states.
4071 This operation transforms each state according to the requested map type.
4072 Note that currently, only one state-mapping operation is supported.
4075 ifst: The input FST.
4076 map_type: A string matching a known mapping operation; one of: "arc_sum"
4077 (sum weights of identically-labeled multi-arcs), "arc_unique" (deletes
4078 non-unique identically-labeled multi-arcs).
4081 An FST with states remapped.
4084 FstArgError: Unknown map type.
4088 return _map(ifst, fst.kDelta, map_type, None)
4091 cpdef _MutableFst synchronize(_Fst ifst):
4095 Constructively synchronizes an FST.
4097 This operation synchronizes a transducer. The result will be an equivalent
4098 FST that has the property that during the traversal of a path, the delay is
4099 either zero or strictly increasing, where the delay is the difference between
4100 the number of non-epsilon output labels and input labels along the path. For
4101 the algorithm to terminate, the input transducer must have bounded delay,
4102 i.e., the delay of every cycle must be zero.
4105 ifst: The input FST.
4108 An equivalent synchronized FST.
4110 cdef VectorFstClass_ptr tfst = new fst.VectorFstClass(ifst.arc_type())
4111 fst.Synchronize(deref(ifst._fst), tfst)
4112 return _init_MutableFst(tfst)
4118 cdef class Compiler(object):
4121 Compiler(fst_type="vector", arc_type="standard", isymbols=None,
4122 osymbols=None, ssymbols=None, acceptor=False, keep_isymbols=False,
4123 keep_osymbols=False, keep_state_numbering=False,
4124 allow_negative_labels=False)
4126 Class used to compile FSTs from strings.
4128 This class is used to compile FSTs specified using the AT&T FSM library
4129 format described here:
4131 http://web.eecs.umich.edu/~radev/NLP-fall2015/resources/fsm_archive/fsm.5.html
4133 This is the same format used by the `fstcompile` executable.
4135 Compiler options (symbol tables, etc.) are set at construction time.
4137 compiler = fst.Compiler(isymbols=ascii_syms, osymbols=ascii_syms)
4139 Once constructed, Compiler instances behave like a file handle opened for
4143 print >> compiler, "0 1 50 50"
4144 print >> compiler, "1 2 49 49"
4145 print >> compiler, "2 2 49 49"
4146 print >> compiler, "2"
4148 The `compile` method returns an actual FST instance:
4150 sheep_machine = compiler.compile()
4152 Compilation flushes the internal buffer, so the compiler instance can be
4153 reused to compile new machines with the same symbol tables (etc.)
4156 fst_type: A string indicating the container type for the compiled FST.
4157 arc_type: A string indicating the arc type for the compiled FST.
4158 isymbols: An optional SymbolTable used to label input symbols.
4159 osymbols: An optional SymbolTable used to label output symbols.
4160 ssymbols: An optional SymbolTable used to label states.
4161 acceptor: Should the FST be rendered in acceptor format if possible?
4162 keep_isymbols: Should the input symbol table be stored in the FST?
4163 keep_osymbols: Should the output symbol table be stored in the FST?
4164 keep_state_numbering: Should the state numbering be preserved?
4165 allow_negative_labels: Should negative labels be allowed? (Not
4166 recommended; may cause conflicts).
4170 string fst_type=b"vector",
4171 string arc_type=b"standard",
4172 SymbolTable isymbols=None,
4173 SymbolTable osymbols=None,
4174 SymbolTable ssymbols=None,
4175 bool acceptor=False,
4176 bool keep_isymbols=False,
4177 bool keep_osymbols=False,
4178 bool keep_state_numbering=False,
4179 bool allow_negative_labels=False):
4180 self._sstrm.reset(new stringstream())
4181 self._fst_type = tostring(fst_type)
4182 self._arc_type = tostring(arc_type)
4183 self._isymbols = NULL
4184 if isymbols is not None:
4185 self._isymbols = isymbols._table
4186 self._osymbols = NULL
4187 if osymbols is not None:
4188 self._osymbols = osymbols._table
4189 self._ssymbols = NULL
4190 if ssymbols is not None:
4191 self._ssymbols = ssymbols._table
4192 self._acceptor = acceptor
4193 self._keep_isymbols = keep_isymbols
4194 self._keep_osymbols = keep_osymbols
4195 self._keep_state_numbering = keep_state_numbering
4196 self._allow_negative_labels = allow_negative_labels
4198 cpdef _Fst compile(self):
4202 Compiles the FST in the compiler string buffer.
4204 This method compiles the FST and returns the resulting machine.
4207 The FST described by the compiler string buffer.
4210 FstOpError: Compilation failed.
4212 cdef fst.FstClass *tfst = fst.CompileFstInternal(deref(self._sstrm),
4213 "<pywrapfst>", self._fst_type, self._arc_type, self._isymbols,
4214 self._osymbols, self._ssymbols, self._acceptor, self._keep_isymbols,
4215 self._keep_osymbols, self._keep_state_numbering,
4216 self._allow_negative_labels)
4217 self._sstrm.reset(new stringstream())
4219 raise FstOpError("Compilation failed")
4220 return _init_XFst(tfst)
4222 cpdef void write(self, expression):
4226 Writes a string into the compiler string buffer.
4228 This method adds a line to the compiler string buffer. It is normally
4229 invoked using the right shift operator, like so:
4231 compiler = fst.Compiler()
4232 print >> compiler, "0 0 49 49"
4233 print >> compiler, "0"
4236 expression: A string expression to add to compiler string buffer.
4238 deref(self._sstrm) << tostring(expression)
4241 ## FarReader and FarWriter.
4244 cdef class FarReader(object):
4249 FAR ("Fst ARchive") reader object.
4251 This class is used to read a FAR from disk. FARs contain one or more FSTs (of
4252 the same arc type) indexed by a unique string key. To construct a FarReader
4253 object, use the `open` class method.
4256 arc_type: A string indicating the arc type.
4257 far_type: A string indicating the FAR type.
4261 raise FstDeletedConstructorError(
4262 "Cannot construct {}".format(self.__class__.__name__))
4265 return "<{} FarReader at 0x{:x}>".format(self.far_type(), id(self))
4268 def open(cls, *filenames):
4270 FarReader.open(*filenames)
4272 Creates a FarReader object.
4274 This class method creates a FarReader given the string location of one or
4275 more FAR files on disk.
4278 *filenames: The string location of one or more input FAR files.
4281 A new FarReader instance.
4284 FstIOError: Read failed.
4286 filenames = [tostring(filename) for filename in filenames]
4287 cdef fst.FarReaderClass *tfar = fst.FarReaderClass.Open(filenames)
4289 raise FstIOError("Read failed: {!r}".format(filenames))
4290 cdef FarReader result = FarReader.__new__(FarReader)
4291 result._reader.reset(tfar)
4294 # This just registers this class as a possible iterator.
4298 # Magic method used to get a Pythonic API out of the C++ API.
4302 cdef string k = self.get_key()
4303 cdef _MutableFst f = self.get_fst()
4307 cpdef string arc_type(self):
4311 Returns a string indicating the arc type.
4313 return self._reader.get().ArcType()
4315 cpdef bool done(self):
4319 Indicates whether the iterator is exhausted or not.
4322 True if the iterator is exhausted, False otherwise.
4324 return self._reader.get().Done()
4326 cpdef bool error(self):
4330 Indicates whether the FarReader has encountered an error.
4333 True if the FarReader is in an errorful state, False otherwise.
4335 return self._reader.get().Error()
4337 cpdef string far_type(self):
4338 return fst.GetFarTypeString(self._reader.get().Type())
4340 cpdef bool find(self, key):
4344 Sets the current position to the first entry greater than or equal to the
4345 key (a string) and indicates whether or not a match was found.
4351 True if the key was found, False otherwise.
4353 return self._reader.get().Find(tostring(key))
4355 cpdef _Fst get_fst(self):
4359 Returns the FST at the current position.
4362 A copy of the FST at the current position.
4364 cdef fst.FstClass *tfst = new fst.FstClass(
4365 deref(self._reader.get().GetFstClass()))
4366 return _init_XFst(tfst)
4368 cpdef string get_key(self):
4372 Returns the string key at the current position.
4375 The string key at the current position.
4377 return self._reader.get().GetKey()
4379 cpdef void next(self):
4383 Advances the iterator.
4385 self._reader.get().Next()
4387 cpdef void reset(self):
4391 Resets the iterator to the initial position.
4393 self._reader.get().Reset()
4395 # Dictionary-like access by combining `find` and `get_fst`.
4396 def __getitem__(self, key):
4397 if not self.find(key):
4399 return self.get_fst()
4402 cdef class FarWriter(object):
4407 FAR ("Fst ARchive") writer object.
4409 This class is used to write FSTs (of the same arc type) to a FAR on disk. To
4410 construct a FarWriter, use the `create` class method.
4412 Note that the data is not guaranteed to flush to disk until the FarWriter
4413 is garbage-collected. If a FarWriter has been assigned to only one variable,
4414 then calling `del` on that variable should decrement the object's reference
4415 count from 1 to 0, triggering a flush to disk on the next GC cycle.
4418 arc_type: A string indicating the arc type.
4419 far_type: A string indicating the FAR type.
4423 raise FstDeletedConstructorError(
4424 "Cannot construct {}".format(self.__class__.__name__))
4427 return "<{} FarWriter at 0x{:x}>".format(self.far_type(), id(self))
4430 def create(cls, filename, arc_type=b"standard", far_type=b"default"):
4434 Creates a FarWriter object.
4436 This class method creates a FarWriter given the desired output location,
4437 arc type, and FAR type.
4440 filename: The string location for the output FAR files.
4441 arc_type: A string indicating the arc type.
4442 far_type: A string indicating the FAR type; one of: "fst", "stlist",
4443 "sttable", "sstable", "default".
4446 A new FarWriter instance.
4449 FstIOError: Read failed.
4451 cdef fst.FarType ft = fst.GetFarType(tostring(far_type))
4452 cdef fst.FarWriterClass *tfar = fst.FarWriterClass.Create(
4453 tostring(filename), tostring(arc_type), ft)
4455 raise FstIOError("Open failed: {!r}".format(filename))
4456 cdef FarWriter result = FarWriter.__new__(FarWriter)
4457 result._writer.reset(tfar)
4460 # NB: Invoking this method is DANGEROUS: calling any other method on the
4461 # instance after this is invoked may result in a null dereference.
4462 cdef void _close(self):
4463 self._writer.reset()
4465 cpdef void add(self, key, _Fst ifst) except *:
4467 add(self, key, ifst)
4469 Adds an FST to the FAR.
4471 This method adds an FST to the FAR which can be retrieved with the
4472 specified string key.
4475 key: The string used to key the input FST.
4476 ifst: The FST to write to the FAR.
4479 FstArgError: Key out of order.
4480 FstOpError: Incompatible or invalid arc type.
4482 # Failure here results from passing an FST with a different arc type than
4483 # used by the FAR was initialized to use.
4484 if not self._writer.get().Add(tostring(key), deref(ifst._fst)):
4485 raise FstOpError("Incompatible or invalid arc type")
4486 # An error here usually indicates a key out of order.
4487 if self._writer.get().Error():
4488 raise FstArgError("Key out of order")
4490 cpdef string arc_type(self):
4494 Returns a string indicating the arc type.
4496 return self._writer.get().ArcType()
4498 cpdef bool error(self):
4502 Indicates whether the FarWriter has encountered an error.
4505 True if the FarWriter is in an errorful state, False otherwise.
4507 return self._writer.get().Error()
4509 cpdef string far_type(self):
4513 Returns a string indicating the FAR type.
4515 return fst.GetFarTypeString(self._writer.get().Type())
4517 # Dictionary-like assignment.
4518 def __setitem__(self, key, _Fst fst):
4522 ## Cleanup operations for module entrance and exit.
4525 # Masks fst_error_fatal flags while this module is running, returning to the
4526 # previous state upon module exit.
4529 _fst_error_fatal_old = fst.FLAGS_fst_error_fatal
4530 fst.FLAGS_fst_error_fatal = False
4534 def _reset_fst_error_fatal():
4535 fst.FLAGS_fst_error_fatal = _fst_error_fatal_old