1 #cython: nonecheck=True
2 # See www.openfst.org for extensive documentation on this weighted
3 # finite-state transducer library.
6 """Python interface to the FST scripting API.
8 Operations which construct new FSTs are implemented as traditional functions, as
9 are two-argument boolean functions like `equal` and `equivalent`. Destructive
10 operations---those that mutate an FST, in place---are instance methods, as is
11 `write`. Operator overloading is not used. The following example, based on
12 Mohri et al. 2002, shows the construction of an ASR system given a pronunciation
13 lexicon L, grammar G, a transducer from context-dependent phones to
14 context-independent phones C, and an HMM set H:
16 L = fst.Fst.read("L.fst")
17 G = fst.Fst.read("G.fst")
18 C = fst.Fst.read("C.fst")
19 H = fst.Fst.read("H.fst")
20 LG = fst.determinize(fst.compose(L, G))
21 CLG = fst.determinize(fst.compose(C, LG))
22 HCLG = fst.determinize(fst.compose(H, CLG))
23 HCLG.minimize() # NB: works in-place.
25 Python variables here use snake_case and constants are in all caps, minus the
29 # Overview of the file:
34 # * Weight and helpers
35 # * _SymbolTable, _EncodeMapperSymbolTable, _FstSymbolTable,
36 # _MutableFstSymbolTable, SymbolTable, and helpers
37 # * SymbolTableIterator
39 # * _Fst, _MutableFst, Fst, and helpers
41 # * Arc, ArcIterator, and MutableArcIterator
45 # * FarReader and FarWriter
46 # * Cleanup operations for module entrance and exit.
48 # TODO(kbg): Try breaking this apart into smaller pieces.
50 # A few of the more idiosyncratic choices made here are due to "impedance
51 # mismatches" between C++ and Python, as follows.
53 # Another issue is that due to differences in C++ and Python scope rules, most
54 # C++ class instances have to be heap-allocated. Since all are packed into
55 # Python class instances, Python destructors are used to semi-automatically
56 # free C++ instances. The one exception are the various `...Options` structs.
57 # All that is included here are the constructors; there is no need to include
58 # the names of the struct members. Cython does not draw any meaningful
59 # distinction between structs and C++ classes, so these look just like class
62 # Cython's type annotations (e.g., `string`) are used when the variables will
63 # be sent as arguments to C++ functions, but are not used for variables used
66 # Internal functions which may raise a Python error do not have a C++ return
67 # type simply because this leads the C++ compiler to think that the resulting
68 # value could be used before it is populated.
74 from libc.stdint cimport INT32_MAX
75 from libc.stdint cimport SIZE_MAX
76 from posix.unistd cimport getpid
79 from libcpp cimport bool
80 from libcpp.cast cimport const_cast
81 from libcpp.cast cimport static_cast
84 from ios cimport ofstream
85 from memory cimport static_pointer_cast
87 # Cython operator workarounds.
88 from cython.operator cimport address as addr # &foo
89 from cython.operator cimport dereference as deref # *foo
90 from cython.operator cimport preincrement as inc # ++foo
99 # TODO(kbg): Figure out how to access static class variables so I don't have
104 ## Custom exceptions.
107 class FstError(Exception):
112 class FstArgError(FstError, ValueError):
117 class FstBadWeightError(FstError, ValueError):
122 class FstDeletedConstructorError(FstError, RuntimeError):
127 class FstIndexError(FstError, IndexError):
132 class FstIOError(FstError, IOError):
137 class FstOpError(FstError, RuntimeError):
145 cdef string tostring(data, encoding="utf8") except *:
146 """Converts strings to bytestrings.
148 This function converts Python bytestrings and Unicode strings to bytestrings
149 encoded in UTF-8. It is used to process most Python string arguments before
150 passing them to the lower-level library.
153 data: A Unicode string or bytestring.
154 encoding: The desired encoding, defaulting to UTF-8.
160 FstArgError: Cannot encode string.
163 This function is not visible to Python users.
165 # A Python bytestring can be implicitly cast to a C++ string.
166 if isinstance(data, bytes):
168 elif isinstance(data, unicode):
169 return data.encode(encoding)
170 raise FstArgError("Cannot encode as string: {!r}".format(data))
173 cdef string weighttostring(data, encoding="utf8") except *:
174 """Converts strings or numerics to bytestrings.
176 This function converts Python bytestrings, Unicode strings, and numerics
177 which can be cast to floats to bytestrings encoded in UTF-8. It is used to
178 process Python string arguments so they can be used to construct Weight
179 objects. In most cases, weights are underlyingly floating-point, but since
180 not all weights are, they can only be constructed using a string.
183 data: A Unicode string, bytestring, or type which can be converted to a
190 FstArgError: Cannot encode string.
191 ValueError: Invalid literal for float.
194 This function is not visible to Python users.
196 # A Python bytestring can be implicitly cast to a C++ string.
197 if isinstance(data, bytes):
199 elif isinstance(data, unicode):
200 return data.encode(encoding)
201 elif isinstance(data, numbers.Number):
202 return str(data).encode(encoding)
203 raise FstArgError("Cannot encode as string: {!r}".format(data))
206 cdef fst.ComposeFilter _get_compose_filter(
207 const string &compose_filter) except *:
208 """Matches string with the appropriate ComposeFilter enum value.
210 This function takes a string argument and returns the matching ComposeFilter
211 enum value used to initialize ComposeOptions instances. ComposeOptions is used
212 by difference and intersection in addition to composition.
215 compose_filter: A string matching a known composition filter; one of:
216 "alt_sequence", "auto", "match", "null", "sequence", "trivial".
219 A ComposeFilter enum value.
222 FstArgError: Unknown compose filter type.
224 This function is not visible to Python users.
226 cdef fst.ComposeFilter compose_filter_enum
227 if not fst.GetComposeFilter(compose_filter, addr(compose_filter_enum)):
228 raise FstArgError("Unknown compose filter type: {!r}".format(
230 return compose_filter_enum
233 cdef fst.DeterminizeType _get_determinize_type(const string &det_type) except *:
234 """Matches string with the appropriate DeterminizeType enum value.
237 det_type: A string matching a known determinization type; one of:
238 "functional", "nonfunctional", "disambiguate".
241 A DeterminizeType enum value.
244 FstArgError: Unknown determinization type.
246 This function is not visible to Python users.
248 cdef fst.DeterminizeType det_type_enum
249 if not fst.GetDeterminizeType(det_type, addr(det_type_enum)):
250 raise FstArgError("Unknown determinization type: {!r}".format(det_type))
254 cdef fst.QueueType _get_queue_type(const string &queue_type) except *:
255 """Matches string with the appropriate QueueType enum value.
257 This function takes a string argument and returns the matching QueueType enum
258 value passed to the RmEpsilonOptions constructor.
261 queue_type: A string matching a known queue type; one of: "auto", "fifo",
262 "lifo", "shortest", "state", "top".
265 A QueueType enum value.
268 FstArgError: Unknown queue type.
270 This function is not visible to Python users.
272 cdef fst.QueueType queue_type_enum
273 if not fst.GetQueueType(queue_type, addr(queue_type_enum)):
274 raise FstArgError("Unknown queue type: {!r}".format(queue_type))
275 return queue_type_enum
278 cdef fst.RandArcSelection _get_rand_arc_selection(
279 const string &select) except *:
280 """Matches string with the appropriate RandArcSelection enum value.
282 This function takes a string argument and returns the matching
283 RandArcSelection enum value passed to the RandGenOptions constructor.
286 select: A string matching a known random arc selection type; one of:
287 "uniform", "log_prob", "fast_log_prob".
290 A RandArcSelection enum value.
293 FstArgError: Unknown random arc selection type.
295 This function is not visible to Python users.
297 cdef fst.RandArcSelection select_enum
298 if not fst.GetRandArcSelection(select, addr(select_enum)):
299 raise FstArgError("Unknown random arc selection type: {!r}".format(select))
303 cdef fst.ReplaceLabelType _get_replace_label_type(
304 const string &replace_label_type, bool epsilon_on_replace) except *:
305 """Matches string with the appropriate ReplaceLabelType enum value.
307 This function takes a string argument and returns the matching
308 ReplaceLabelType enum value passed to the ReplaceOptions constructor.
311 replace_label_type: A string matching a known replace label type; one of:
312 "neither", "input", "output", "both".
313 epsilon_on_replace: Should call/return arcs be epsilon arcs?
316 A ReplaceLabelType enum value.
319 FstArgError: Unknown replace label type.
321 This function is not visible to Python users.
323 cdef fst.ReplaceLabelType replace_label_type_enum
324 if not fst.GetReplaceLabelType(replace_label_type, epsilon_on_replace,
325 addr(replace_label_type_enum)):
326 raise FstArgError("Unknown replace label type: {!r}".format(
328 return replace_label_type_enum
331 ## Weight and helpers.
334 cdef class Weight(object):
337 Weight(weight_type, weight_string)
341 This class represents an FST weight. When passed as an argument to an FST
342 operation, it should have the weight type of the input FST(s) to said
346 weight_type: A string indicating the weight type.
347 weight_string: A string indicating the underlying weight.
350 FstArgError: Weight type not found.
351 FstBadWeightError: Invalid weight.
355 return "<{} Weight {} at 0x{:x}>".format(self.type(), self.to_string(),
359 return self.to_string()
361 # This attempts to convert the string form into a float, raising
362 # ValueError when that is not appropriate.
365 return float(self.to_string())
367 def __init__(self, weight_type, weight):
368 self._weight.reset(new fst.WeightClass(tostring(weight_type),
369 weighttostring(weight)))
372 cdef void _check_weight(self) except *:
373 if self.type() == b"none":
374 raise FstArgError("Weight type not found")
375 if self.to_string() == b"BadNumber":
376 raise FstBadWeightError("Invalid weight")
378 cpdef Weight copy(self):
382 Returns a copy of the Weight.
384 cdef Weight result = Weight.__new__(Weight)
385 result._weight.reset(new
386 fst.WeightClass(<fst.WeightClass> deref(self._weight)))
389 # To get around the inability to declare cdef class methods, we define the
390 # C++ part out-of-class and then call it from within.
393 def Zero(cls, weight_type):
395 Weight.Zero(weight_type)
397 Constructs semiring zero.
399 return _Weight_Zero(weight_type)
402 def One(cls, weight_type):
404 Weight.One(weight_type)
406 Constructs semiring One.
408 return _Weight_One(weight_type)
411 def NoWeight(cls, weight_type):
413 Weight.NoWeight(weight_type)
415 Constructs a non-member weight in the semiring.
417 return _Weight_NoWeight(weight_type)
419 def __richcmp__(Weight w1, Weight w2, int op):
420 # TODO(kbg): Replace this with __eq__ once Cython 0.27 is widely available.
422 return fst.Eq(deref(w1._weight), deref(w2._weight))
424 return fst.Ne(deref(w1._weight), deref(w2._weight))
426 raise NotImplementedError("Invalid operator {!r}".format(op))
428 cpdef string to_string(self):
429 return self._weight.get().ToString()
431 cpdef string type(self):
434 Returns a string indicating the weight type.
436 return self._weight.get().Type()
439 cdef Weight _plus(Weight lhs, Weight rhs):
440 cdef Weight result = Weight.__new__(Weight)
441 result._weight.reset(new fst.WeightClass(fst.Plus(deref(lhs._weight),
442 deref(rhs._weight))))
446 def plus(Weight lhs, Weight rhs):
450 Computes the sum of two Weights in the same semiring.
452 This function computes lhs \oplus rhs, raising an exception if lhs and rhs
453 are not in the same semiring.
456 lhs: Left-hand side Weight.
457 rhs: Right-hand side Weight.
463 FstArgError: Weight type not found (or not in same semiring).
464 FstBadWeightError: invalid weight.
466 cdef Weight result = _plus(lhs, rhs)
467 result._check_weight()
471 cdef Weight _times(Weight lhs, Weight rhs):
472 cdef Weight result = Weight.__new__(Weight)
473 result._weight.reset(new fst.WeightClass(fst.Times(deref(lhs._weight),
474 deref(rhs._weight))))
478 def times(Weight lhs, Weight rhs):
482 Computes the product of two Weights in the same semiring.
484 This function computes lhs \otimes rhs, raising an exception if lhs and rhs
485 are not in the same semiring.
488 lhs: Left-hand side Weight.
489 rhs: Right-hand side Weight.
495 FstArgError: Weight type not found (or not in same semiring).
496 FstBadWeightError: Invalid weight.
498 cdef Weight result = _times(lhs, rhs)
499 result._check_weight()
503 cdef Weight _divide(Weight lhs, Weight rhs):
504 cdef Weight result = Weight.__new__(Weight)
505 result._weight.reset(new fst.WeightClass(fst.Divide(deref(lhs._weight),
506 deref(rhs._weight))))
510 def divide(Weight lhs, Weight rhs):
514 Computes the quotient of two Weights in the same semiring.
516 This function computes lhs \oslash rhs, raising an exception if lhs and rhs
517 are not in the same semiring. As there is no way to specify whether to use
518 left vs. right division, this assumes a commutative semiring in which these
519 are equivalent operations.
522 lhs: Left-hand side Weight.
523 rhs: Right-hand side Weight.
529 FstArgError: Weight type not found (or not in same semiring).
530 FstBadWeightError: Invalid weight.
532 cdef Weight result = _divide(lhs, rhs)
533 result._check_weight()
537 cdef Weight _power(Weight w, size_t n):
538 cdef Weight result = Weight.__new__(Weight)
539 result._weight.reset(new fst.WeightClass(fst.Power(deref(w._weight), n)))
543 def power(Weight w, size_t n):
547 Computes the iterated product of a weight.
557 FstArgError: Weight type not found (or not in same semiring).
558 FstBadWeightError: Invalid weight.
560 cdef Weight result = _power(w, n)
561 result._check_weight()
565 cdef fst.WeightClass _get_WeightClass_or_Zero(const string &weight_type,
567 """Converts weight string to a WeightClass.
569 This function constructs a WeightClass instance of the desired weight type.
570 If the first argument is null, the weight is set to semiring Zero.
573 weight_type: A string denoting the desired weight type.
574 weight: A object indicating the desired weight; if omitted, the weight is
575 set to semiring Zero.
578 A WeightClass object.
580 This function is not visible to Python users.
582 cdef fst.WeightClass result
584 result = fst.WeightClass.Zero(weight_type)
585 elif isinstance(weight, Weight):
586 result = deref(<fst.WeightClass *> (<Weight> weight)._weight.get())
588 result = fst.WeightClass(weight_type, weighttostring(weight))
589 if result.ToString() == b"BadNumber":
590 raise FstBadWeightError(weighttostring(weight))
594 cdef fst.WeightClass _get_WeightClass_or_One(const string &weight_type,
596 """Converts weight string to a WeightClass.
598 This function constructs a WeightClass instance of the desired weight type.
599 If the first argument is null, the weight is set to semiring One.
602 weight_type: A string denoting the desired weight type.
603 weight: A object indicating the desired weight; if omitted, the weight is
607 A WeightClass object.
609 This function is not visible to Python users.
611 cdef fst.WeightClass result
613 result = fst.WeightClass.One(weight_type)
614 elif isinstance(weight, Weight):
615 result = deref(<fst.WeightClass *> (<Weight> weight)._weight.get())
617 result = fst.WeightClass(weight_type, weighttostring(weight))
618 if result.ToString() == b"BadNumber":
619 raise FstBadWeightError(weighttostring(weight))
623 cdef Weight _Weight_Zero(weight_type):
624 cdef Weight result = Weight.__new__(Weight)
625 result._weight.reset(new fst.WeightClass(fst.WeightClass.Zero(
626 tostring(weight_type))))
627 if result._weight.get().Type() == b"none":
628 raise FstArgError("Weight type not found")
632 cdef Weight _Weight_One(weight_type):
633 cdef Weight result = Weight.__new__(Weight)
634 result._weight.reset(new fst.WeightClass(
635 fst.WeightClass.One(tostring(weight_type))))
636 if result._weight.get().Type() == b"none":
637 raise FstArgError("Weight type not found")
641 cdef Weight _Weight_NoWeight(weight_type):
642 cdef Weight result = Weight.__new__(Weight)
643 result._weight.reset(new fst.WeightClass(
644 fst.WeightClass.NoWeight(tostring(weight_type))))
648 ## _SymbolTable, _MutableSymbolTable, _EncodeMapperSymbolTable, _FstSymbolTable,
649 ## _MutableFstSymbolTable, SymbolTable, and helpers.
651 # SymbolTable hierarchy:
653 # _SymbolTable: abstract base class; has-a SymbolTable*
654 # _EncodeMapperSymbolTable(_SymbolTable): constant symbol table returned by
655 # EncodeMapper.input_symbols/output_symbols
656 # _FstSymbolTable(_SymbolTable): constant symbol table returned by
657 # _Fst.input_symbols/output_symbols
659 # _MutableSymbolTable(_SymbolTable): abstract base class adding mutation methods
660 # _MutableFstSymbolTable(_MutableSymbolTable): mutable symbol table returned by
661 # _MutableFst.mutable_input_symbols/mutable_output_symbols
662 # SymbolTable(_MutableSymbolTable): adds constructor
665 cdef class _SymbolTable(object):
670 Base class for the symbol table hierarchy.
672 This class is the base class for SymbolTable. It has a "deleted" constructor
673 and implementations for the const methods of the wrapped SymbolTable.
676 # NB: Do not expose any non-const methods of the wrapped SymbolTable here.
677 # Doing so will allow undefined behavior.
680 raise FstDeletedConstructorError(
681 "Cannot construct {}".format(self.__class__.__name__))
684 return SymbolTableIterator(self)
686 cpdef int64 available_key(self):
690 Returns an integer indicating the next available key index in the table.
692 return self._table.AvailableKey()
694 cpdef string checksum(self):
698 Returns a string indicating the label-agnostic MD5 checksum for the table.
700 return self._table.CheckSum()
702 cpdef SymbolTable copy(self):
706 Returns a mutable copy of the SymbolTable.
708 return _init_SymbolTable(self._table.Copy())
714 Given a symbol or index, finds the other one.
716 This method returns the index associated with a symbol key, or the symbol
717 associated with a index key.
720 key: Either a string or an index.
723 If the key is a string, the associated index or NO_LABEL if not found; if
724 the key is an integer, the associated symbol or an empty string if
728 return self._table.FindIndex(tostring(key))
730 return self._table.FindSymbol(key)
732 cpdef int64 get_nth_key(self, ssize_t pos) except *:
734 get_nth_key(self, pos)
736 Retrieves the integer index of the n-th key in the table.
739 pos: The n-th key to retrieve.
742 The integer index of the n-th key, or NO_LABEL if not found.
744 return self._table.GetNthKey(pos)
746 cpdef string labeled_checksum(self):
748 labeled_checksum(self)
750 Returns a string indicating the label-dependent MD5 checksum for the table.
752 return self._table.LabeledCheckSum()
754 cpdef bool member(self, key):
758 Given a symbol or index, returns whether it is found in the table.
760 This method returns a boolean indicating whether the given symbol or index
761 is present in the table. If one intends to perform subsequent lookup, it is
762 better to simply call the find method, catching the KeyError.
765 key: Either a string or an index.
768 Whether or not the key is present (as a string or a index) in the table.
771 return self._table.MemberSymbol(tostring(key))
773 return self._table.MemberIndex(key)
775 def __contains__(self, key):
776 return self.member(key)
778 cpdef string name(self):
782 Returns the symbol table's name.
784 return self._table.Name()
786 cpdef size_t num_symbols(self):
790 Returns the number of symbols in the symbol table.
792 return self._table.NumSymbols()
794 cpdef void write(self, filename) except *:
796 write(self, filename)
798 Serializes symbol table to a file.
800 This methods writes the SymbolTable to a file in binary format.
803 filename: The string location of the output file.
806 FstIOError: Write failed.
808 if not self._table.Write(tostring(filename)):
809 raise FstIOError("Write failed: {!r}".format(filename))
811 cpdef void write_text(self, filename) except *:
813 write_text(self, filename)
815 Writes symbol table to text file.
817 This method writes the SymbolTable to a file in human-readable format.
820 filename: The string location of the output file.
823 FstIOError: Write failed.
825 if not self._table.WriteText(tostring(filename)):
826 raise FstIOError("Write failed: {!r}".format(filename))
829 cdef class _EncodeMapperSymbolTable(_SymbolTable):
834 Immutable SymbolTable class for tables stored in an EncodeMapper.
836 This class wraps a library const SymbolTable and exposes const methods of the
837 wrapped object. It is only to be returned by method, never constructed
841 # NB: Do not expose any non-const methods of the wrapped SymbolTable here.
842 # Doing so will allow undefined behavior.
845 return "<const EncodeMapper SymbolTable {!r} at 0x{:x}>".format(self.name(),
849 cdef class _FstSymbolTable(_SymbolTable):
854 Mutable SymbolTable class for tables stored in a mutable FST.
856 This class wraps a library SymbolTable and exposes methods of the wrapped
857 object. It is only to be returned by method, never constructed directly.
860 # NB: Do not expose any non-const methods of the wrapped SymbolTable here.
861 # Doing so will allow undefined behavior.
864 return "<const Fst SymbolTable {!r} at 0x{:x}>".format(self.name(),
868 cdef class _MutableSymbolTable(_SymbolTable):
873 Base class for mutable symbol tables.
875 This class is the base class for a mutable SymbolTable. It has a "deleted"
876 constructor and implementations of all methods of the wrapped SymbolTable.
879 cpdef int64 add_symbol(self, symbol, int64 key=kNoSymbol):
881 add_symbol(self, symbol, key=NO_SYMBOL)
883 Adds a symbol to the table and returns the index.
885 This method adds a symbol to the table. The caller can optionally
886 specify a non-negative integer index for the key.
889 symbol: A symbol string.
890 key: An index for the symbol; if not specified, the next index will be
894 The integer key of the new symbol.
896 cdef string symbol_string = tostring(symbol)
898 return self._table.AddSymbol(symbol_string, key)
900 return self._table.AddSymbol(symbol_string)
902 cpdef void add_table(self, _SymbolTable syms):
904 add_table(self, syms)
906 Adds another SymbolTable to this table.
908 This method merges another symbol table into the current table. All key
909 values will be offset by the current available key.
912 syms: A SymbolTable to be merged with the current table.
914 self._table.AddTable(deref(syms._table))
916 cpdef void set_name(self, new_name) except *:
917 self._table.SetName(tostring(new_name))
920 cdef class _MutableFstSymbolTable(_MutableSymbolTable):
924 Mutable SymbolTable assigned to an FST.
928 return "<Fst SymbolTable {!r} at 0x{:x}>".format(self.name(), id(self))
931 cdef class SymbolTable(_MutableSymbolTable):
934 SymbolTable(name="<unspecified>")
936 Mutable SymbolTable class.
938 This class wraps the library SymbolTable and exposes both const (i.e.,
939 access) and non-const (i.e., mutation) methods of wrapped object.
941 Unlike other classes in the hierarchy, it has a working constructor and can be
942 used to programmatically construct a SymbolTable in memory.
945 name: An optional string indicating the table's name.
949 return "<SymbolTable {!r} at 0x{:x}>".format(self.name(), id(self))
951 def __init__(self, name=b"<unspecified>"):
952 self._table = new fst.SymbolTable(tostring(name))
953 self._smart_table.reset(self._table)
956 def read(cls, filename):
958 SymbolTable.read(filename)
960 Reads symbol table from binary file.
962 This class method creates a new SymbolTable from a symbol table binary file.
965 filename: The string location of the input binary file.
968 A new SymbolTable instance.
970 See also: `SymbolTable.read_fst`, `SymbolTable.read_text`.
972 cdef fst.SymbolTable *tsyms = fst.SymbolTable.Read(tostring(filename))
974 raise FstIOError("Read failed: {!r}".format(filename))
975 return _init_SymbolTable(tsyms)
978 def read_text(cls, filename, bool allow_negative_labels=False):
980 SymbolTable.read_text(filename)
982 Reads symbol table from text file.
984 This class method creates a new SymbolTable from a symbol table text file.
987 filename: The string location of the input text file.
988 allow_negative_labels: Should negative labels be allowed? (Not
989 recommended; may cause conflicts).
992 A new SymbolTable instance.
994 See also: `SymbolTable.read`, `SymbolTable.read_fst`.
996 cdef unique_ptr[fst.SymbolTableTextOptions] opts
997 opts.reset(new fst.SymbolTableTextOptions(allow_negative_labels))
998 cdef fst.SymbolTable *tsyms = fst.SymbolTable.ReadText(tostring(filename),
1001 raise FstIOError("Read failed: {!r}".format(filename))
1002 return _init_SymbolTable(tsyms)
1005 def read_fst(cls, filename, bool input_table):
1007 SymbolTable.read_fst(filename, input_table)
1009 Reads symbol table from an FST file without loading the corresponding FST.
1011 This class method creates a new SymbolTable by reading either the input or
1012 output symbol table from an FST file, without loading the corresponding FST.
1015 filename: The string location of the input FST file.
1016 input_table: Should the input table be read (True) or the output table
1020 A new SymbolTable instance, or None if none can be read.
1023 FstIOError: Read failed.
1025 See also: `SymbolTable.read`, `SymbolTable.read_text`.
1027 cdef fst.SymbolTable *tsyms = fst.FstReadSymbols(filename, input_table)
1029 raise FstIOError("Read failed: {!r}".format(filename))
1030 return _init_SymbolTable(tsyms)
1033 cdef _EncodeMapperSymbolTable _init_EncodeMapperSymbolTable(
1034 fst.SymbolTable *table, shared_ptr[fst.EncodeMapperClass] encoder):
1035 cdef _EncodeMapperSymbolTable result = (
1036 _EncodeMapperSymbolTable.__new__(_EncodeMapperSymbolTable))
1037 result._table = table
1038 result._encoder = encoder
1042 cdef _FstSymbolTable _init_FstSymbolTable(fst.SymbolTable *table,
1043 shared_ptr[fst.FstClass] ifst):
1044 cdef _FstSymbolTable result = _FstSymbolTable.__new__(_FstSymbolTable)
1045 result._table = table
1050 cdef _MutableFstSymbolTable _init_MutableFstSymbolTable(fst.SymbolTable *table,
1051 shared_ptr[fst.MutableFstClass] ifst):
1052 cdef _MutableFstSymbolTable result = (
1053 _MutableFstSymbolTable.__new__(_MutableFstSymbolTable))
1054 result._table = table
1059 cdef SymbolTable _init_SymbolTable(fst.SymbolTable *table):
1060 cdef SymbolTable result = SymbolTable.__new__(SymbolTable)
1061 result._table = table
1065 # Constructive SymbolTable operations.
1068 cpdef SymbolTable compact_symbol_table(_SymbolTable syms):
1070 compact_symbol_table(syms)
1072 Constructively relabels a SymbolTable to make it a contiguous mapping.
1075 syms: Input SymbolTable.
1078 A new compacted SymbolTable.
1080 return _init_SymbolTable(fst.CompactSymbolTable(deref(syms._table)))
1083 cpdef SymbolTable merge_symbol_table(_SymbolTable lhs, _SymbolTable rhs):
1085 merge_symbol_table(lhs, rhs)
1087 Merges all symbols from the left table into the right.
1089 This function creates a new SymbolTable which is the merger of the two input
1090 symbol Tables. Symbols in the right-hand table that conflict with those in the
1091 left-hand table will be assigned values from the left-hand table. Thus the
1092 returned table will never modify symbol assignments from the left-hand side,
1093 but may do so on the right.
1095 If the left-hand table is associated with an FST, it may be necessary to
1096 relabel it using the output table.
1099 lhs: Left-hand side SymbolTable.
1100 rhs: Left-hand side SymbolTable.
1103 A new merged SymbolTable.
1105 See also: `relabel_symbols`.
1107 return _init_SymbolTable(fst.MergeSymbolTable(deref(lhs._table),
1108 deref(rhs._table), NULL))
1111 ## SymbolTableIterator.
1114 cdef class SymbolTableIterator(object):
1117 SymbolTableIterator(syms)
1119 This class is used for iterating over a symbol table.
1123 return "<SymbolTableIterator at 0x{:x}>".format(id(self))
1125 def __init__(self, _SymbolTable syms):
1126 self._siter.reset(new fst.SymbolTableIterator(deref(syms._table)))
1128 # This just registers this class as a possible iterator.
1132 # Magic method used to get a Pythonic API out of the C++ API.
1136 cdef int64 value = self.value()
1137 cdef string symbol = self.symbol()
1139 return (value, symbol)
1141 cpdef bool done(self):
1145 Indicates whether the iterator is exhausted or not.
1148 True if the iterator is exhausted, False otherwise.
1150 return self._siter.get().Done()
1152 cpdef void next(self):
1156 Advances the iterator.
1158 self._siter.get().Next()
1160 cpdef void reset(self):
1164 Resets the iterator to the initial position.
1166 self._siter.get().Reset()
1168 cpdef string symbol(self):
1172 Returns the current symbol string.
1174 This method returns the current symbol string at this point in the table.
1179 return self._siter.get().Symbol()
1181 cpdef int64 value(self):
1185 Returns the current integer index of the symbol.
1190 return self._siter.get().Value()
1196 cdef class EncodeMapper(object):
1199 EncodeMapper(arc_type="standard", encode_labels=False, encode_weights=False)
1201 Arc encoder class, wrapping EncodeMapperClass.
1203 This class provides an object which can be used to encode or decode FST arcs.
1204 This is most useful to convert an FST to an unweighted acceptor, on which
1205 some FST operations are more efficient, and then decoding the FST afterwards.
1207 To use an instance of this class to encode or decode a mutable FST, pass it
1208 as the first argument to the FST instance methods `encode` and `decode`.
1210 For implementational reasons, it is not currently possible to use an encoder
1211 on disk to construct this class.
1214 arc_type: A string indicating the arc type.
1215 encode_labels: Should labels be encoded?
1216 encode_weights: Should weights be encoded?
1220 return "<EncodeMapper at 0x{:x}>".format(id(self))
1223 arc_type=b"standard",
1224 bool encode_labels=False,
1225 bool encode_weights=False):
1226 cdef uint32 flags = fst.GetEncodeFlags(encode_labels, encode_weights)
1227 self._encoder.reset(new fst.EncodeMapperClass(tostring(arc_type), flags,
1229 if not self._encoder:
1230 raise FstOpError("Unknown arc type: {!r}".format(arc_type))
1232 cpdef string arc_type(self):
1236 Returns a string indicating the arc type.
1238 return self._encoder.get().ArcType()
1240 # Python's equivalent to operator().
1242 def __call__(self, Arc arc):
1244 self(state, ilabel, olabel, weight, nextstate)
1246 Uses the encoder to encode an arc.
1249 ilabel: The integer index of the input label.
1250 olabel: The integer index of the output label.
1251 weight: A Weight or weight string indicating the desired final weight; if
1252 null, it is set to semiring One.
1253 nextstate: The integer index of the destination state.
1256 FstOpError: Incompatible or invalid weight.
1258 return _init_Arc(self._encoder.get().__call__(deref(arc._arc)))
1260 cpdef uint32 flags(self):
1264 Returns the encoder's flags.
1266 return self._encoder.get().Flags()
1268 cpdef _EncodeMapperSymbolTable input_symbols(self):
1272 Returns the encoder's input symbol table, or None if none is present.
1274 cdef fst.SymbolTable *syms = const_cast[SymbolTable_ptr](
1275 self._encoder.get().InputSymbols())
1278 return _init_EncodeMapperSymbolTable(syms, self._encoder)
1280 cpdef _EncodeMapperSymbolTable output_symbols(self):
1282 output_symbols(self)
1284 Returns the encoder's output symbol table, or None if none is present.
1286 cdef fst.SymbolTable *syms = const_cast[SymbolTable_ptr](
1287 self._encoder.get().OutputSymbols())
1290 return _init_EncodeMapperSymbolTable(syms, self._encoder)
1292 cpdef uint64 properties(self, uint64 mask):
1294 properties(self, mask)
1296 Provides property bits.
1298 This method provides user access to the properties of the encoder.
1301 mask: The property mask to be compared to the encoder's properties.
1304 A 64-bit bitmask representing the requested properties.
1306 return self._encoder.get().Properties(mask)
1308 cpdef void set_input_symbols(self, _SymbolTable syms) except *:
1310 set_input_symbols(self, syms)
1312 Sets the encoder's input symbol table.
1315 syms: A SymbolTable.
1317 See also: `set_output_symbols`.
1319 self._encoder.get().SetInputSymbols(syms._table)
1321 cpdef void set_output_symbols(self, _SymbolTable syms) except *:
1323 set_output_symbols(self, syms)
1325 Sets the encoder's output symbol table.
1328 syms: A SymbolTable.
1330 See also: `set_input_symbols`.
1332 self._encoder.get().SetOutputSymbols(syms._table)
1334 cpdef string weight_type(self):
1338 Returns a string indicating the weight type.
1340 return self._encoder.get().WeightType()
1343 ## _Fst, _MutableFst, Fst, and helpers.
1347 # _Fst: base class; has-a FstClass*.
1348 # _MutableFst(_Fst): adds mutable methods.
1349 # Fst(filename): pseudo-constructor.
1352 cdef class _Fst(object):
1357 Immutable FST class, wrapping FstClass.
1359 This class is the basic user-facing FST object. It does not itself support any
1360 mutation operations.
1363 # IPython notebook magic to produce an SVG of the FST.
1364 def _repr_svg_(self):
1365 """IPython notebook magic to produce an SVG of the FST using GraphViz.
1367 This method produces an SVG of the internal graph. Users wishing to create
1368 publication-quality graphs should instead use the method `draw`, which
1369 exposes additional parameters.
1372 OSError: Cannot locate the `dot` executable.
1373 subprocess.CalledProcessError: `dot` returned non-zero exit code.
1375 See also: `draw`, `text`.
1377 # Throws OSError if the dot executable is not found.
1378 proc = subprocess.Popen(["dot", "-Tsvg"], stdin=subprocess.PIPE,
1379 stdout=subprocess.PIPE, stderr=subprocess.PIPE)
1380 cdef stringstream sstrm
1381 fst.DrawFst(deref(self._fst), self._fst.get().InputSymbols(),
1382 self._fst.get().OutputSymbols(), NULL,
1383 self._fst.get().Properties(fst.kAcceptor, True) ==
1385 b"", 8.5, 11, True, False, 0.4, 0.25, 14, 5, b"g", False,
1386 addr(sstrm), b"_repr_svg")
1387 (sout, serr) = proc.communicate(sstrm.str())
1388 if proc.returncode != 0: # Just to be explicit.
1389 raise subprocess.CalledProcessError(proc.returncode, self._DOT_TSVG)
1390 return sout.decode("utf8")
1393 return "<{} Fst at 0x{:x}>".format(self.fst_type(), id(self))
1396 raise FstDeletedConstructorError(
1397 "Cannot construct {}".format(self.__class__.__name__))
1399 # Other magic methods.
1402 return self.text(acceptor=self._fst.get().Properties(fst.kAcceptor, True) ==
1404 show_weight_one=self._fst.get().Properties(fst.kWeighted, True) ==
1407 cpdef string arc_type(self):
1411 Returns a string indicating the arc type.
1413 return self._fst.get().ArcType()
1415 cpdef ArcIterator arcs(self, int64 state):
1419 Returns an iterator over arcs leaving the specified state.
1422 state: The source state ID.
1427 See also: `mutable_arcs`, `states`.
1429 return ArcIterator(self, state)
1431 cpdef _Fst copy(self):
1435 Makes a copy of the FST.
1437 return _init_XFst(new fst.FstClass(deref(self._fst)))
1439 cpdef void draw(self,
1441 _SymbolTable isymbols=None,
1442 _SymbolTable osymbols=None,
1443 SymbolTable ssymbols=None,
1444 bool acceptor=False,
1448 bool portrait=False,
1449 bool vertical=False,
1451 double nodesep=0.25,
1455 bool show_weight_one=False):
1457 draw(self, filename, isymbols=None, osymbols=None, ssymbols=None,
1458 acceptor=False, title="", width=8.5, height=11, portrait=False,
1459 vertical=False, ranksep=0.4, nodesep=0.25, fontsize=14,
1460 precision=5, float_format="g", show_weight_one=False):
1462 Writes out the FST in Graphviz text format.
1464 This method writes out the FST in the dot graph description language. The
1465 graph can be rendered using the `dot` executable provided by Graphviz.
1468 filename: The string location of the output dot/Graphviz file.
1469 isymbols: An optional symbol table used to label input symbols.
1470 osymbols: An optional symbol table used to label output symbols.
1471 ssymbols: An optional symbol table used to label states.
1472 acceptor: Should the figure be rendered in acceptor format if possible?
1473 title: An optional string indicating the figure title.
1474 width: The figure width, in inches.
1475 height: The figure height, in inches.
1476 portrait: Should the figure be rendered in portrait rather than
1478 vertical: Should the figure be rendered bottom-to-top rather than
1480 ranksep: The minimum separation separation between ranks, in inches.
1481 nodesep: The minimum separation between nodes, in inches.
1482 fontsize: Font size, in points.
1483 precision: Numeric precision for floats, in number of chars.
1484 float_format: One of: 'e', 'f' or 'g'.
1485 show_weight_one: Should weights equivalent to semiring One be printed?
1489 cdef string filename_string = tostring(filename)
1490 cdef unique_ptr[ofstream] ostrm
1491 ostrm.reset(new ofstream(filename_string))
1492 cdef fst.SymbolTable *ssymbols_ptr = NULL
1493 if ssymbols is not None:
1494 ssymbols_ptr = ssymbols._table
1495 fst.DrawFst(deref(self._fst),
1496 self._fst.get().InputSymbols() if isymbols is None
1497 else isymbols._table,
1498 self._fst.get().OutputSymbols() if osymbols is None
1499 else osymbols._table,
1500 ssymbols_ptr, acceptor, tostring(title), width, height, portrait,
1501 vertical, ranksep, nodesep, fontsize, precision,
1502 tostring(float_format), show_weight_one, ostrm.get(),
1505 cpdef Weight final(self, int64 state):
1509 Returns the final weight of a state.
1512 state: The integer index of a state.
1515 The final Weight of that state.
1518 FstIndexError: State index out of range.
1520 cdef Weight weight = Weight.__new__(Weight)
1521 weight._weight.reset(new fst.WeightClass(self._fst.get().Final(state)))
1524 cpdef string fst_type(self):
1528 Returns a string indicating the FST type.
1530 return self._fst.get().FstType()
1532 cpdef _FstSymbolTable input_symbols(self):
1536 Returns the FST's input symbol table, or None if none is present.
1538 See also: `input_symbols`.
1540 cdef fst.SymbolTable *syms = const_cast[SymbolTable_ptr](
1541 self._fst.get().InputSymbols())
1544 return _init_FstSymbolTable(syms, self._fst)
1546 cpdef size_t num_arcs(self, int64 state) except *:
1548 num_arcs(self, state)
1550 Returns the number of arcs leaving a state.
1553 state: The integer index of a state.
1556 The number of arcs leaving that state.
1559 FstIndexError: State index out of range.
1561 See also: `num_states`.
1563 cdef size_t result = self._fst.get().NumArcs(state)
1564 if result == SIZE_MAX:
1565 raise FstIndexError("State index out of range")
1568 cpdef size_t num_input_epsilons(self, int64 state) except *:
1570 num_input_epsilons(self, state)
1572 Returns the number of arcs with epsilon input labels leaving a state.
1575 state: The integer index of a state.
1578 The number of epsilon-input-labeled arcs leaving that state.
1581 FstIndexError: State index out of range.
1583 See also: `num_output_epsilons`.
1585 cdef size_t result = self._fst.get().NumInputEpsilons(state)
1586 if result == SIZE_MAX:
1587 raise FstIndexError("State index out of range")
1590 cpdef size_t num_output_epsilons(self, int64 state) except *:
1592 num_output_epsilons(self, state)
1594 Returns the number of arcs with epsilon output labels leaving a state.
1597 state: The integer index of a state.
1600 The number of epsilon-output-labeled arcs leaving that state.
1603 FstIndexError: State index out of range.
1605 See also: `num_input_epsilons`.
1607 cdef size_t result = self._fst.get().NumOutputEpsilons(state)
1608 if result == SIZE_MAX:
1609 raise FstIndexError("State index out of range")
1612 cpdef _FstSymbolTable output_symbols(self):
1614 output_symbols(self)
1616 Returns the FST's output symbol table, or None if none is present.
1618 See also: `input_symbols`.
1620 cdef fst.SymbolTable *syms = const_cast[SymbolTable_ptr](
1621 self._fst.get().OutputSymbols())
1624 return _init_FstSymbolTable(syms, self._fst)
1626 cpdef uint64 properties(self, uint64 mask, bool test):
1628 properties(self, mask, test)
1630 Provides property bits.
1632 This method provides user access to the properties attributes for the FST.
1633 The resulting value is a long integer, but when it is cast to a boolean,
1634 it represents whether or not the FST has the `mask` property.
1637 mask: The property mask to be compared to the FST's properties.
1638 test: Should any unknown values be computed before comparing against
1642 A 64-bit bitmask representing the requested properties.
1644 return self._fst.get().Properties(mask, test)
1646 cpdef int64 start(self):
1650 Returns the start state.
1652 return self._fst.get().Start()
1654 cpdef StateIterator states(self):
1658 Returns an iterator over all states in the FST.
1661 A StateIterator object for the FST.
1663 See also: `arcs`, `mutable_arcs`.
1665 return StateIterator(self)
1667 cpdef string text(self, _SymbolTable isymbols=None,
1668 _SymbolTable osymbols=None, _SymbolTable ssymbols=None,
1669 bool acceptor=False, bool show_weight_one=False, missing_sym=b""):
1671 text(self, isymbols=None, osymbols=None, ssymbols=None, acceptor=False,
1672 show_weight_one=False, missing_sym="")
1674 Produces a human-readable string representation of the FST.
1676 This method generates a human-readable string representation of the FST.
1677 The caller may optionally specify SymbolTables used to label input labels,
1678 output labels, or state labels, respectively.
1681 isymbols: An optional symbol table used to label input symbols.
1682 osymbols: An optional symbol table used to label output symbols.
1683 ssymbols: An optional symbol table used to label states.
1684 acceptor: Should the FST be rendered in acceptor format if possible?
1685 show_weight_one: Should weights equivalent to semiring One be printed?
1686 missing_symbol: The string to be printed when symbol table lookup fails.
1689 A formatted string representing the machine.
1691 # Prints FST to stringstream, then returns resulting string.
1692 cdef fst.SymbolTable *ssymbols_ptr = NULL
1693 if ssymbols is not None:
1694 ssymbols_ptr = ssymbols._table
1695 cdef stringstream sstrm
1696 fst.PrintFst(deref(self._fst), sstrm, b"<pywrapfst>",
1697 self._fst.get().InputSymbols() if isymbols is None
1698 else isymbols._table,
1699 self._fst.get().OutputSymbols() if osymbols is None
1700 else osymbols._table,
1701 ssymbols_ptr, acceptor, show_weight_one, tostring(missing_sym))
1704 cpdef bool verify(self):
1708 Verifies that an FST's contents are sane.
1711 True if the contents are sane, False otherwise.
1713 return fst.Verify(deref(self._fst))
1715 cpdef string weight_type(self):
1719 Provides the FST's weight type.
1722 A string representing the weight type.
1724 return self._fst.get().WeightType()
1726 cpdef void write(self, filename) except *:
1728 write(self, filename)
1730 Serializes FST to a file.
1732 This method writes the FST to a file in a binary format.
1735 filename: The string location of the output file.
1738 FstIOError: Write failed.
1740 if not self._fst.get().Write(tostring(filename)):
1741 raise FstIOError("Write failed: {!r}".format(filename))
1743 cpdef string write_to_string(self):
1745 write_to_string(self)
1747 Serializes FST to a string.
1752 See also: `read_from_string`.
1754 return self._fst.get().WriteToString()
1757 cdef class _MutableFst(_Fst):
1762 Mutable FST class, wrapping MutableFstClass.
1764 This class extends _Fst by adding mutation operations.
1767 cdef void _check_mutating_imethod(self) except *:
1768 """Checks whether an operation mutating the FST has produced an error.
1770 This function is not visible to Python users.
1772 if self._fst.get().Properties(fst.kError, True) == fst.kError:
1773 raise FstOpError("Operation failed")
1775 cdef void _add_arc(self, int64 state, Arc arc) except *:
1776 if not self._fst.get().ValidStateId(state):
1777 raise FstIndexError("State index out of range")
1778 if not self._mfst.get().AddArc(state, deref(arc._arc)):
1779 raise FstOpError("Incompatible or invalid weight type")
1780 self._check_mutating_imethod()
1782 def add_arc(self, int64 state, Arc arc):
1784 add_arc(self, state, arc)
1786 Adds a new arc to the FST and return self.
1789 state: The integer index of the source state.
1790 arc: The arc to add.
1796 FstIndexError: State index out of range.
1797 FstOpdexError: Incompatible or invalid weight type.
1799 See also: `add_state`.
1801 self._add_arc(state, arc)
1804 cpdef int64 add_state(self) except *:
1808 Adds a new state to the FST and returns the state ID.
1811 The integer index of the new state.
1813 See also: `add_arc`, `set_start`, `set_final`.
1815 cdef int64 result = self._mfst.get().AddState()
1816 self._check_mutating_imethod()
1819 cdef void _arcsort(self, sort_type=b"ilabel") except *:
1820 cdef fst.ArcSortType sort_type_enum
1821 if not fst.GetArcSortType(tostring(sort_type), addr(sort_type_enum)):
1822 raise FstArgError("Unknown sort type {!r}".format(sort_type))
1823 fst.ArcSort(self._mfst.get(), sort_type_enum)
1824 self._check_mutating_imethod()
1826 def arcsort(self, sort_type=b"ilabel"):
1828 arcsort(self, sort_type="ilabel")
1830 Sorts arcs leaving each state of the FST.
1832 This operation destructively sorts arcs leaving each state using either
1833 input or output labels.
1836 sort_type: Either "ilabel" (sort arcs according to input labels) or
1837 "olabel" (sort arcs according to output labels).
1843 FstArgError: Unknown sort type.
1845 See also: `topsort`.
1847 self._arcsort(sort_type)
1850 cdef void _closure(self, bool closure_plus=False) except *:
1851 fst.Closure(self._mfst.get(), fst.GetClosureType(closure_plus))
1852 self._check_mutating_imethod()
1854 def closure(self, bool closure_plus=False):
1856 closure(self, closure_plus=False)
1858 Computes concatenative closure.
1860 This operation destructively converts the FST to its concatenative closure.
1861 If A transduces string x to y with weight a, then the closure transduces x
1862 to y with weight a, xx to yy with weight a \otimes a, xxx to yyy with weight
1863 a \otimes a \otimes a, and so on. The empty string is also transduced to
1864 itself with semiring One if `closure_plus` is False.
1867 closure_plus: If False, do not accept the empty string.
1872 self._closure(closure_plus)
1875 cdef void _concat(self, _Fst ifst) except *:
1876 fst.Concat(self._mfst.get(), deref(ifst._fst))
1877 self._check_mutating_imethod()
1879 def concat(self, _Fst ifst):
1883 Computes the concatenation (product) of two FSTs.
1885 This operation destructively concatenates the FST with a second FST. If A
1886 transduces string x to y with weight a and B transduces string w to v with
1887 weight b, then their concatenation transduces string xw to yv with weight a
1891 ifst: The second input FST.
1899 cdef void _connect(self) except *:
1900 fst.Connect(self._mfst.get())
1901 self._check_mutating_imethod()
1907 Removes unsuccessful paths.
1909 This operation destructively trims the FST, removing states and arcs that
1910 are not part of any successful path.
1918 cdef void _decode(self, EncodeMapper encoder) except *:
1919 fst.Decode(self._mfst.get(), deref(encoder._encoder))
1920 self._check_mutating_imethod()
1922 def decode(self, EncodeMapper encoder):
1924 decode(self, encoder)
1926 Decodes encoded labels and/or weights.
1928 This operation reverses the encoding performed by `encode`.
1931 encoder: An EncodeMapper object used to encode the FST.
1938 self._decode(encoder)
1941 cdef void _delete_arcs(self, int64 state, size_t n=0) except *:
1942 if not (self._mfst.get().DeleteArcs(state, n) if n else
1943 self._mfst.get().DeleteArcs(state)):
1944 raise FstIndexError("State index out of range")
1945 self._check_mutating_imethod()
1947 def delete_arcs(self, int64 state, size_t n=0):
1949 delete_arcs(self, state, n=0)
1951 Deletes arcs leaving a particular state.
1954 state: The integer index of a state.
1955 n: An optional argument indicating how many arcs to be deleted. If this
1956 argument is omitted or passed as zero, all arcs from this state are
1963 FstIndexError: State index out of range.
1965 See also: `delete_states`.
1967 self._delete_arcs(state, n)
1970 cdef void _delete_states(self, states=None) except *:
1971 # Only the former signature has a possible indexing failure.
1973 if not self._mfst.get().DeleteStates(<const vector[int64]> states):
1974 raise FstIndexError("State index out of range")
1976 self._mfst.get().DeleteStates()
1977 self._check_mutating_imethod()
1979 def delete_states(self, states=None):
1981 delete_states(self, states=None)
1986 states: An optional iterable of integer indices of the states to be
1987 deleted. If this argument is omitted, all states are deleted.
1993 FstIndexError: State index out of range.
1995 See also: `delete_arcs`.
1997 self._delete_states(states)
2000 cdef void _encode(self, EncodeMapper encoder) except *:
2001 fst.Encode(self._mfst.get(), encoder._encoder.get())
2002 self._check_mutating_imethod()
2004 def encode(self, EncodeMapper encoder):
2006 encode(self, encoder)
2008 Encodes labels and/or weights.
2010 This operation allows for the representation of a weighted transducer as a
2011 weighted acceptor, an unweighted transducer, or an unweighted acceptor by
2012 considering the pair (input label, output label), the pair (input label,
2013 weight), or the triple (input label, output label, weight) as a single
2014 label. Applying this operation mutates the EncodeMapper argument, which
2015 can then be used to decode.
2018 encoder: An EncodeMapper object to be used as the encoder.
2025 self._encode(encoder)
2028 cdef void _invert(self) except *:
2029 fst.Invert(self._mfst.get())
2030 self._check_mutating_imethod()
2036 Inverts the FST's transduction.
2038 This operation destructively inverts the FST's transduction by exchanging
2039 input and output labels.
2047 cdef void _minimize(self, float delta=fst.kDelta,
2048 bool allow_nondet=False) except *:
2049 # This runs in-place when the second argument is null.
2050 fst.Minimize(self._mfst.get(), NULL, delta, allow_nondet)
2051 self._check_mutating_imethod()
2053 def minimize(self, float delta=fst.kDelta, bool allow_nondet=False):
2055 minimize(self, delta=0.0009765625, allow_nondet=False)
2059 This operation destructively performs the minimization of deterministic
2060 weighted automata and transducers. If the input FST A is an acceptor, this
2061 operation produces the minimal acceptor B equivalent to A, i.e. the
2062 acceptor with a minimal number of states that is equivalent to A. If the
2063 input FST A is a transducer, this operation internally builds an equivalent
2064 transducer with a minimal number of states. However, this minimality is
2065 obtained by allowing transition having strings of symbols as output labels,
2066 this known in the litterature as a real-time transducer. Such transducers
2067 are not directly supported by the library. This function will convert such
2068 transducer by expanding each string-labeled transition into a sequence of
2069 transitions. This will results in the creation of new states, hence losing
2070 the minimality property.
2073 delta: Comparison/quantization delta.
2074 allow_nondet: Attempt minimization of non-deterministic FST?
2079 self._minimize(delta)
2082 cpdef MutableArcIterator mutable_arcs(self, int64 state):
2084 mutable_arcs(self, state)
2086 Returns a mutable iterator over arcs leaving the specified state.
2089 state: The source state ID.
2092 A MutableArcIterator.
2094 See also: `arcs`, `states`.
2096 return MutableArcIterator(self, state)
2098 def mutable_input_symbols(self):
2100 mutable_input_symbols(self)
2102 Returns the FST's (mutable) input symbol table, or None if none is present.
2104 cdef fst.SymbolTable *tst = self._mfst.get().MutableInputSymbols()
2107 return _init_MutableFstSymbolTable(tst, self._mfst)
2109 def mutable_output_symbols(self):
2111 mutable_output_symbols(self)
2113 Returns the FST's (mutable) output symbol table, or None if none is present.
2115 cdef fst.SymbolTable *tst = self._mfst.get().MutableOutputSymbols()
2118 return _init_MutableFstSymbolTable(tst, self._mfst)
2120 cpdef int64 num_states(self):
2124 Returns the number of states.
2126 return self._mfst.get().NumStates()
2128 cdef void _project(self, bool project_output=False) except *:
2129 fst.Project(self._mfst.get(), fst.GetProjectType(project_output))
2130 self._check_mutating_imethod()
2132 def project(self, bool project_output=False):
2134 project(self, project_output=False)
2136 Converts the FST to an acceptor using input or output labels.
2138 This operation destructively projects an FST onto its domain or range by
2139 either copying each arc's input label to its output label (the default) or
2143 project_output: Should the output labels be projected?
2148 See also: `decode`, `encode`, `relabel_pairs`, `relabel_symbols`.
2150 self._project(project_output)
2153 cdef void _prune(self, float delta=fst.kDelta, int64 nstate=fst.kNoStateId,
2154 weight=None) except *:
2155 # Threshold is set to semiring Zero (no pruning) if no weight is specified.
2156 cdef fst.WeightClass wc = _get_WeightClass_or_Zero(self.weight_type(),
2158 fst.Prune(self._mfst.get(), wc, nstate, delta)
2159 self._check_mutating_imethod()
2162 float delta=fst.kDelta,
2163 int64 nstate=fst.kNoStateId,
2166 prune(self, delta=0.0009765625, nstate=NO_STATE_ID, weight=None)
2168 Removes paths with weights below a certain threshold.
2170 This operation deletes states and arcs in the input FST that do not belong
2171 to a successful path whose weight is no more (w.r.t the natural semiring
2172 order) than the threshold t \otimes-times the weight of the shortest path in
2173 the input FST. Weights must be commutative and have the path property.
2176 delta: Comparison/quantization delta.
2177 nstate: State number threshold.
2178 weight: A Weight or weight string indicating the desired weight threshold
2179 below which paths are pruned; if omitted, no paths are pruned.
2184 See also: The constructive variant.
2186 self._prune(delta, nstate, weight)
2189 cdef void _push(self,
2190 float delta=fst.kDelta,
2191 bool remove_total_weight=False,
2192 bool to_final=False) except *:
2193 fst.Push(self._mfst.get(), fst.GetReweightType(to_final), delta,
2194 remove_total_weight)
2195 self._check_mutating_imethod()
2198 float delta=fst.kDelta,
2199 bool remove_total_weight=False,
2200 bool to_final=False):
2202 push(self, delta=0.0009765625, remove_total_weight=False, to_final=False)
2204 Pushes weights towards the initial or final states.
2206 This operation destructively produces an equivalent transducer by pushing
2207 the weights towards the initial state or toward the final states. When
2208 pushing weights towards the initial state, the sum of the weight of the
2209 outgoing transitions and final weight at any non-initial state is equal to
2210 one in the resulting machine. When pushing weights towards the final states,
2211 the sum of the weight of the incoming transitions at any state is equal to
2212 one. Weights need to be left distributive when pushing towards the initial
2213 state and right distributive when pushing towards the final states.
2216 delta: Comparison/quantization delta.
2217 remove_total_weight: If pushing weights, should the total weight be
2219 to_final: Push towards final states?
2224 See also: The constructive variant, which also supports label pushing.
2226 self._push(delta, remove_total_weight, to_final)
2229 cdef void _relabel_pairs(self, ipairs=None, opairs=None) except *:
2230 cdef unique_ptr[vector[fst.LabelPair]] _ipairs
2231 _ipairs.reset(new vector[fst.LabelPair]())
2232 cdef unique_ptr[vector[fst.LabelPair]] _opairs
2233 _opairs.reset(new vector[fst.LabelPair]())
2237 for (before, after) in ipairs:
2238 _ipairs.get().push_back(fst.LabelPair(before, after))
2240 for (before, after) in opairs:
2241 _opairs.get().push_back(fst.LabelPair(before, after))
2242 if _ipairs.get().empty() and _opairs.get().empty():
2243 raise FstArgError("No relabeling pairs specified.")
2244 fst.Relabel(self._mfst.get(), deref(_ipairs), deref(_opairs))
2245 self._check_mutating_imethod()
2247 def relabel_pairs(self, ipairs=None, opairs=None):
2249 relabel_pairs(self, ipairs=None, opairs=None)
2251 Replaces input and/or output labels using pairs of labels.
2253 This operation destructively relabels the input and/or output labels of the
2254 FST using pairs of the form (old_ID, new_ID); omitted indices are
2258 ipairs: An iterable containing (older index, newer index) integer pairs.
2259 opairs: An iterable containing (older index, newer index) integer pairs.
2265 FstArgError: No relabeling pairs specified.
2267 See also: `decode`, `encode`, `project`, `relabel_tables`.
2269 self._relabel_pairs(ipairs, opairs)
2272 cdef void _relabel_tables(self,
2273 _SymbolTable old_isymbols=None,
2274 _SymbolTable new_isymbols=None,
2275 unknown_isymbol=b"",
2276 bool attach_new_isymbols=True,
2277 _SymbolTable old_osymbols=None,
2278 _SymbolTable new_osymbols=None,
2279 unknown_osymbol=b"",
2280 bool attach_new_osymbols=True) except *:
2281 if new_isymbols is None and new_osymbols is None:
2282 raise FstArgError("No new SymbolTables specified")
2283 cdef fst.SymbolTable *new_isymbols_ptr = NULL
2284 if new_isymbols is not None:
2285 new_isymbols_ptr = new_isymbols._table
2286 cdef fst.SymbolTable *new_osymbols_ptr = NULL
2287 if new_osymbols is not None:
2288 new_osymbols_ptr = new_osymbols._table
2289 fst.Relabel(self._mfst.get(),
2290 self._fst.get().InputSymbols() if old_isymbols is None else
2291 old_isymbols._table, new_isymbols_ptr, tostring(unknown_isymbol),
2292 attach_new_isymbols,
2293 self._fst.get().OutputSymbols() if old_osymbols is None else
2294 old_osymbols._table, new_osymbols_ptr, tostring(unknown_osymbol),
2295 attach_new_osymbols)
2296 self._check_mutating_imethod()
2298 def relabel_tables(self,
2299 _SymbolTable old_isymbols=None,
2300 _SymbolTable new_isymbols=None,
2301 unknown_isymbol=b"",
2302 bool attach_new_isymbols=True,
2303 _SymbolTable old_osymbols=None,
2304 _SymbolTable new_osymbols=None,
2305 unknown_osymbol=b"",
2306 bool attach_new_osymbols=True):
2308 relabel_tables(self, old_isymbols=None, new_isymbols=None,
2309 unknown_isymbol="", attach_new_isymbols=True,
2310 old_osymbols=None, new_osymbols=None,
2311 unknown_osymbol="", attach_new_osymbols=True)
2313 Replaces input and/or output labels using SymbolTables.
2315 This operation destructively relabels the input and/or output labels of the
2316 FST using user-specified symbol tables; omitted symbols are identity-mapped.
2319 old_isymbols: The old SymbolTable for input labels, defaulting to the
2320 FST's input symbol table.
2321 new_isymbols: A SymbolTable used to relabel the input labels
2322 unknown_isymbol: Input symbol to use to relabel OOVs (if empty,
2323 OOVs raise an exception)
2324 attach_new_isymbols: Should new_isymbols be made the FST's input symbol
2326 old_osymbols: The old SymbolTable for output labels, defaulting to the
2327 FST's output symbol table.
2328 new_osymbols: A SymbolTable used to relabel the output labels.
2329 unknown_osymbol: Outnput symbol to use to relabel OOVs (if empty,
2330 OOVs raise an exception)
2331 attach_new_isymbols: Should new_osymbols be made the FST's output symbol
2338 FstArgError: No SymbolTable specified.
2340 See also: `decode`, `encode`, `project`, `relabel_pairs`.
2342 self._relabel_tables(old_isymbols, new_isymbols,
2343 unknown_isymbol, attach_new_isymbols,
2344 old_osymbols, new_osymbols,
2345 unknown_osymbol, attach_new_osymbols)
2348 cdef void _reserve_arcs(self, int64 state, size_t n) except *:
2349 if not self._mfst.get().ReserveArcs(state, n):
2350 raise FstIndexError("State index out of range")
2351 self._check_mutating_imethod()
2353 def reserve_arcs(self, int64 state, size_t n):
2355 reserve_arcs(self, state, n)
2357 Reserve n arcs at a particular state (best effort).
2360 state: The integer index of a state.
2361 n: The number of arcs to reserve.
2367 FstIndexError: State index out of range.
2369 See also: `reserve_states`.
2371 self._reserve_arcs(state, n)
2374 cdef void _reserve_states(self, int64 n) except *:
2375 self._mfst.get().ReserveStates(n)
2376 self._check_mutating_imethod()
2378 def reserve_states(self, int64 n):
2380 reserve_states(self, n)
2382 Reserve n states (best effort).
2385 n: The number of states to reserve.
2390 See also: `reserve_arcs`.
2392 self._reserve_states(n)
2395 cdef void _reweight(self, potentials, bool to_final=False) except *:
2396 cdef unique_ptr[vector[fst.WeightClass]] _potentials
2397 _potentials.reset(new vector[fst.WeightClass]())
2398 cdef string weight_type = self.weight_type()
2399 for weight in potentials:
2400 _potentials.get().push_back(_get_WeightClass_or_One(self.weight_type(),
2402 fst.Reweight(self._mfst.get(), deref(_potentials),
2403 fst.GetReweightType(to_final))
2404 self._check_mutating_imethod()
2406 def reweight(self, potentials, bool to_final=False):
2408 reweight(self, potentials, to_final=False)
2410 Reweights an FST using an iterable of potentials.
2412 This operation destructively reweights an FST according to the potentials
2413 and in the direction specified by the user. An arc of weight w, with an
2414 origin state of potential p and destination state of potential q, is
2415 reweighted by p^{-1} \otimes (w \otimes q) when reweighting towards the
2416 initial state, and by (p \otimes w) \otimes q^{-1} when reweighting towards
2417 the final states. The weights must be left distributive when reweighting
2418 towards the initial state and right distributive when reweighting towards
2419 the final states (e.g., TropicalWeight and LogWeight).
2422 potentials: An iterable of Weight or weight strings.
2423 to_final: Push towards final states?
2428 self._reweight(potentials, to_final)
2431 cdef void _rmepsilon(self,
2433 float delta=fst.kDelta,
2434 int64 nstate=fst.kNoStateId,
2435 weight=None) except *:
2436 # Threshold is set to semiring Zero (no pruning) if weight unspecified.
2437 cdef fst.WeightClass wc = _get_WeightClass_or_Zero(self.weight_type(),
2439 fst.RmEpsilon(self._mfst.get(), connect, wc, nstate, delta)
2440 self._check_mutating_imethod()
2444 float delta=fst.kDelta,
2445 int64 nstate=fst.kNoStateId,
2448 rmepsilon(self, connect=True, delta=0.0009765625, nstate=NO_STATE_ID,
2451 Removes epsilon transitions.
2453 This operation destructively removes epsilon transitions, i.e., those where
2454 both input and output labels are epsilon) from an FST.
2457 connect: Should output be trimmed?
2458 delta: Comparison/quantization delta.
2459 nstate: State number threshold.
2460 weight: A Weight or weight string indicating the desired weight threshold
2461 below which paths are pruned; if omitted, no paths are pruned.
2466 See also: The constructive variant, which also supports epsilon removal in
2467 reverse (and which may be more efficient).
2469 self._rmepsilon(connect, delta, nstate, weight)
2472 cdef void _set_final(self, int64 state, weight=None) except *:
2473 if not self._mfst.get().ValidStateId(state):
2474 raise FstIndexError("State index out of range")
2475 cdef fst.WeightClass wc = _get_WeightClass_or_One(self.weight_type(),
2477 if not self._mfst.get().SetFinal(state, wc):
2478 raise FstOpError("Incompatible or invalid weight")
2479 self._check_mutating_imethod()
2481 def set_final(self, int64 state, weight=None):
2483 set_final(self, state, weight)
2485 Sets the final weight for a state.
2488 state: The integer index of a state.
2489 weight: A Weight or weight string indicating the desired final weight; if
2490 omitted, it is set to semiring One.
2493 FstIndexError: State index out of range.
2494 FstOpError: Incompatible or invalid weight.
2496 See also: `set_start`.
2498 self._set_final(state, weight)
2501 cdef void _set_input_symbols(self, _SymbolTable syms) except *:
2503 self._mfst.get().SetInputSymbols(NULL)
2505 self._mfst.get().SetInputSymbols(syms._table)
2506 self._check_mutating_imethod()
2508 def set_input_symbols(self, _SymbolTable syms):
2510 set_input_symbols(self, syms)
2512 Sets the input symbol table.
2514 Passing None as a value will delete the input symbol table.
2517 syms: A SymbolTable.
2522 See also: `set_output_symbols`.
2524 self._set_input_symbols(syms)
2527 cdef void _set_output_symbols(self, _SymbolTable syms) except *:
2529 self._mfst.get().SetOutputSymbols(NULL)
2531 self._mfst.get().SetOutputSymbols(syms._table)
2532 self._check_mutating_imethod()
2534 def set_output_symbols(self, _SymbolTable syms):
2536 set_output_symbols(self, syms)
2538 Sets the output symbol table.
2540 Passing None as a value will delete the output symbol table.
2543 syms: A SymbolTable.
2548 See also: `set_input_symbols`.
2550 self._set_output_symbols(syms)
2553 cdef void _set_properties(self, uint64 props, uint64 mask):
2554 self._mfst.get().SetProperties(props, mask)
2556 def set_properties(self, uint64 props, uint64 mask):
2558 set_properties(self, props, mask)
2560 Sets the properties bits.
2563 props: The properties to be set.
2564 mask: A mask to be applied to the `props` argument before setting the
2570 self._set_properties(props, mask)
2573 cdef void _set_start(self, int64 state) except *:
2574 if not self._mfst.get().SetStart(state):
2575 raise FstIndexError("State index out of range")
2576 self._check_mutating_imethod()
2578 def set_start(self, int64 state):
2580 set_start(self, state)
2582 Sets a state to be the initial state state.
2585 state: The integer index of a state.
2591 FstIndexError: State index out of range.
2593 See also: `set_final`.
2595 self._set_start(state)
2598 cdef void _topsort(self) except *:
2599 # TopSort returns False if the FST is cyclic, and thus can't be TopSorted.
2600 if not fst.TopSort(self._mfst.get()):
2601 logging.warning("Cannot topsort cyclic FST.")
2602 self._check_mutating_imethod()
2608 Sorts transitions by state IDs.
2610 This operation destructively topologically sorts the FST, if it is acyclic;
2611 otherwise it remains unchanged. Once sorted, all transitions are from lower
2612 state IDs to higher state IDs
2617 See also: `arcsort`.
2622 cdef void _union(self, _Fst ifst) except *:
2623 fst.Union(self._mfst.get(), deref(ifst._fst))
2624 self._check_mutating_imethod()
2626 def union(self, _Fst ifst):
2630 Computes the union (sum) of two FSTs.
2632 This operation computes the union (sum) of two FSTs. If A transduces string
2633 x to y with weight a and B transduces string w to v with weight b, then
2634 their union transduces x to y with weight a and w to v with weight b.
2637 ifst: The second input FST.
2646 # Pseudo-constructors for _Fst and _MutableFst.
2648 # _init_Fst and _init_MutableFst use an FstClass pointer to instantiate _Fst
2649 # and _MutableFst objects, respectively. The latter function is only safe to
2650 # call when the FST being wrapped is known to be kMutable. The caller can
2651 # safely use it when they have either checked this bit (e.g., by using
2652 # _init_XFst) or have themselves constructed a mutable container for the
2653 # FstClass pointer they're passing (e.g., most of the constructive operations,
2654 # storing their results in a VectorFstClass, a derivative of MutableFstClass).
2656 # _create_Fst constructs an empty VectorFstClass of a user-specified arc type,
2657 # and passes this pointer to _init_MutableFst.
2659 # _read_Fst reads an FST from disk, performing FST conversion if requested, and
2660 # then passes this pointer to _init_XFst.
2662 # The Python class Fst provides a wrapper for these two operations. The former
2663 # can be accessed by calling Fst(...), which acts like a class method, and the
2664 # latter via Fst.read(...), which acts like a static method. This is a bit
2665 # nasty, but totally hidden from the Python user.
2668 cdef _Fst _init_Fst(FstClass_ptr tfst):
2669 if tfst.Properties(fst.kError, True):
2670 raise FstOpError("Operation failed")
2671 cdef _Fst ofst = _Fst.__new__(_Fst)
2672 ofst._fst.reset(<FstClass_ptr> tfst)
2676 cdef _MutableFst _init_MutableFst(MutableFstClass_ptr tfst):
2677 if tfst.Properties(fst.kError, True):
2678 raise FstOpError("Operation failed")
2679 cdef _MutableFst ofst = _MutableFst.__new__(_MutableFst)
2680 ofst._fst.reset(<MutableFstClass_ptr> tfst)
2681 # Makes a copy of it as the derived type! Cool.
2682 ofst._mfst = static_pointer_cast[fst.MutableFstClass, fst.FstClass](ofst._fst)
2686 cdef _Fst _init_XFst(FstClass_ptr tfst):
2687 if tfst.Properties(fst.kMutable, True):
2688 return _init_MutableFst(static_cast[MutableFstClass_ptr](tfst))
2690 return _init_Fst(tfst)
2693 cdef _MutableFst _create_Fst(arc_type=b"standard"):
2694 cdef unique_ptr[fst.VectorFstClass] tfst
2695 tfst.reset(new fst.VectorFstClass(<string> tostring(arc_type)))
2696 if tfst.get() == NULL:
2697 raise FstOpError("Unknown arc type: {!r}".format(arc_type))
2698 return _init_MutableFst(tfst.release())
2701 cdef _Fst _read_Fst(filename, fst_type=None):
2702 cdef unique_ptr[fst.FstClass] tfst
2703 tfst.reset(fst.FstClass.Read(tostring(filename)))
2704 if tfst.get() == NULL:
2705 raise FstIOError("Read failed: {!r}".format(filename))
2706 # Converts if requested.
2707 cdef string fst_type_string
2709 fst_type_string = tostring(fst_type)
2710 if fst_type_string != tfst.get().FstType():
2711 tfst.reset(fst.Convert(deref(tfst), fst_type_string))
2712 if tfst.get() == NULL:
2713 raise FstOpError("Conversion to {!r} failed.".format(fst_type))
2714 return _init_XFst(tfst.release())
2717 cdef _Fst _deserialize_Fst(fst_string, fst_type=None):
2718 cdef unique_ptr[fst.FstClass] ofst
2719 ofst.reset(fst.FstClass.ReadFromString(fst_string))
2720 if fst_type is not None:
2721 fst_type_string = tostring(fst_type)
2722 if fst_type_string != ofst.get().FstType():
2723 ofst.reset(fst.Convert(deref(ofst), fst_type_string))
2724 if ofst.get() == NULL:
2725 raise FstOpError("Conversion to {!r} failed.".format(fst_type))
2726 return _init_XFst(ofst.release())
2732 Fst(arc_type="standard")
2734 Constructs an empty FST.
2737 arc_type: A string indicating the arc type.
2740 FstError: Unknown arc type.
2743 FstOpError: operation failed.
2746 def __new__(cls, arc_type=b"standard"):
2747 return _create_Fst(arc_type)
2750 def read(filename, fst_type=None):
2752 read(filename, fst_type=None)
2754 Reads an FST from a file.
2757 filename: The string location of the input file.
2758 fst_type: A string indicating the FST type to convert to; no conversion
2759 is performed if omitted or if the FST is already of the desired type.
2765 FstIOError: Read failed.
2766 FstOpError: Read-time conversion failed.
2768 return _read_Fst(filename, fst_type)
2771 def read_from_string(fst_string, fst_type=None):
2773 read_from_string(fst_string, fst_type=None)
2775 Reads an FST from a serialized string.
2778 fst_string: The string containing the serialized FST.
2779 fst_type: A string indicating the FST type to convert to; no conversion
2780 is performed if omitted or if the FST is already of the desired type.
2786 FstIOError: Read failed.
2787 FstOpError: Read-time conversion failed.
2789 See also: `write_to_string`.
2791 return _deserialize_Fst(fst_string, fst_type)
2797 NO_LABEL = fst.kNoLabel
2798 NO_STATE_ID = fst.kNoStateId
2799 # TODO(kbg): Figure out how to access static class variables so I don't have
2800 # to do it this way.
2801 NO_SYMBOL = kNoSymbol
2807 EXPANDED = fst.kExpanded
2808 MUTABLE = fst.kMutable
2810 ACCEPTOR = fst.kAcceptor
2811 NOT_ACCEPTOR = fst.kNotAcceptor
2812 I_DETERMINISTIC = fst.kIDeterministic
2813 NON_I_DETERMINISTIC = fst.kNonIDeterministic
2814 O_DETERMINISTIC = fst.kODeterministic
2815 NON_O_DETERMINISTIC = fst.kNonODeterministic
2816 EPSILONS = fst.kEpsilons
2817 NO_EPSILONS = fst.kNoEpsilons
2818 I_EPSILONS = fst.kIEpsilons
2819 NO_I_EPSILONS = fst.kNoIEpsilons
2820 O_EPSILONS = fst.kOEpsilons
2821 NO_O_EPSILONS = fst.kNoOEpsilons
2822 I_LABEL_SORTED = fst.kILabelSorted
2823 NOT_I_LABEL_SORTED = fst.kNotILabelSorted
2824 O_LABEL_SORTED = fst.kOLabelSorted
2825 NOT_O_LABEL_SORTED = fst.kNotOLabelSorted
2826 WEIGHTED = fst.kWeighted
2827 UNWEIGHTED = fst.kUnweighted
2828 CYCLIC = fst.kCyclic
2829 ACYCLIC = fst.kAcyclic
2830 INITIAL_CYCLIC = fst.kInitialCyclic
2831 INITIAL_ACYCLIC = fst.kInitialAcyclic
2832 TOP_SORTED = fst.kTopSorted
2833 NOT_TOP_SORTED = fst.kNotTopSorted
2834 ACCESSIBLE = fst.kAccessible
2835 NOT_ACCESSIBLE = fst.kNotAccessible
2836 COACCESSIBLE = fst.kCoAccessible
2837 NOT_COACCESSIBLE = fst.kNotCoAccessible
2838 STRING = fst.kString
2839 NOT_STRING = fst.kNotString
2840 WEIGHTED_CYCLES = fst.kWeightedCycles
2841 UNWEIGHTED_CYCLES = fst.kUnweightedCycles
2842 NULL_PROPERTIES = fst.kNullProperties
2843 COPY_PROPERTIES = fst.kCopyProperties
2844 INTRINSIC_PROPERTIES = fst.kIntrinsicProperties
2845 EXTRINSIC_PROPERTIES = fst.kExtrinsicProperties
2846 SET_START_PROPERTIES = fst.kSetStartProperties
2847 SET_FINAL_PROPERTIES = fst.kSetFinalProperties
2848 ADD_STATE_PROPERTIES = fst.kAddStateProperties
2849 ADD_ARC_PROPERTIES = fst.kAddArcProperties
2850 SET_ARC_PROPERTIES = fst.kSetArcProperties
2851 DELETE_STATE_PROPERTIES = fst.kDeleteStatesProperties
2852 DELETE_ARC_PROPERTIES = fst.kDeleteArcsProperties
2853 STATE_SORT_PROPERTIES = fst.kStateSortProperties
2854 ARC_SORT_PROPERTIES = fst.kArcSortProperties
2855 I_LABEL_INVARIANT_PROPERTIES = fst.kILabelInvariantProperties
2856 O_LABEL_INVARIANT_PROPERTIES = fst.kOLabelInvariantProperties
2857 WEIGHT_INVARIANT_PROPERTIES = fst.kWeightInvariantProperties
2858 ADD_SUPERFINAL_PROPERTIES = fst.kAddSuperFinalProperties
2859 RM_SUPERFINAL_PROPERTIES = fst.kRmSuperFinalProperties
2860 BINARY_PROPERTIES = fst.kBinaryProperties
2861 TRINARY_PROPERTIES = fst.kTrinaryProperties
2862 POS_TRINARY_PROPERTIES = fst.kPosTrinaryProperties
2863 NEG_TRINARY_PROPERTIES = fst.kNegTrinaryProperties
2864 FST_PROPERTIES = fst.kFstProperties
2867 ## Arc iterator properties.
2870 ARC_I_LABEL_VALUE = fst.kArcILabelValue
2871 ARC_O_LABEL_VALUE = fst.kArcOLabelValue
2872 ARC_WEIGHT_VALUE = fst.kArcWeightValue
2873 ARC_NEXT_STATE_VALUE = fst.kArcNextStateValue
2874 ARC_NO_CACHE = fst.kArcNoCache
2875 ARC_VALUE_FLAGS = fst.kArcValueFlags
2876 ARC_FLAGS = fst.kArcFlags
2879 ## EncodeMapper properties.
2882 ENCODE_LABELS = fst.kEncodeLabels
2883 ENCODE_WEIGHTS = fst.kEncodeWeights
2884 ENCODE_FLAGS = fst.kEncodeFlags
2887 ## Arc, ArcIterator, and MutableArcIterator.
2890 cdef class Arc(object):
2893 Arc(ilabel, olabel, weight, nextstate)
2895 This class represents an arc while remaining agnostic about the underlying arc
2896 type. Attributes of the arc can be accessed or mutated, and the arc can be
2900 ilabel: The input label.
2901 olabel: The output label.
2902 weight: The arc weight.
2903 nextstate: The destination state for the arc.
2907 return "<Arc at 0x{:x}>".format(id(self))
2909 def __init__(self, int64 ilabel, int64 olabel, weight, int64 nextstate):
2910 cdef fst.WeightClass wc = _get_WeightClass_or_One(b"tropical", weight)
2911 self._arc.reset(new fst.ArcClass(ilabel, olabel, wc, nextstate))
2913 cpdef Arc copy(self):
2914 return Arc(self.ilabel, self.olabel, self.weight, self.nextstate)
2919 return deref(self._arc).ilabel
2921 def __set__(self, int64 value):
2922 deref(self._arc).ilabel = value
2927 return deref(self._arc).olabel
2929 def __set__(self, int64 value):
2930 deref(self._arc).olabel = value
2935 cdef Weight weight = Weight.__new__(Weight)
2936 weight._weight.reset(new fst.WeightClass(deref(self._arc).weight))
2939 def __set__(self, weight):
2940 deref(self._arc).weight = _get_WeightClass_or_One(b"tropical", weight)
2945 return deref(self._arc).nextstate
2947 def __set__(self, int64 value):
2948 deref(self._arc).nextstate = value
2951 cdef Arc _init_Arc(const fst.ArcClass &arc):
2952 cdef Weight weight = Weight.__new__(Weight)
2953 weight._weight.reset(new fst.WeightClass(arc.weight))
2954 return Arc(<int64> arc.ilabel, <int64> arc.olabel, weight,
2955 <int64> arc.nextstate)
2958 cdef class ArcIterator(object):
2961 ArcIterator(ifst, state)
2963 This class is used for iterating over the arcs leaving some state of an FST.
2967 return "<ArcIterator at 0x{:x}>".format(id(self))
2969 def __init__(self, _Fst ifst, int64 state):
2970 if not ifst._fst.get().ValidStateId(state):
2971 raise FstIndexError("State index out of range")
2972 # Makes copy of the shared_ptr, potentially extending the FST's lifetime.
2973 self._fst = ifst._fst
2974 self._aiter.reset(new fst.ArcIteratorClass(deref(self._fst), state))
2976 # This just registers this class as a possible iterator.
2980 # Magic method used to get a Pythonic API out of the C++ API.
2984 result = self.value()
2988 cpdef bool done(self):
2992 Indicates whether the iterator is exhausted or not.
2995 True if the iterator is exhausted, False otherwise.
2997 return self._aiter.get().Done()
2999 cpdef uint32 flags(self):
3003 Returns the current iterator behavioral flags.
3006 The current iterator behavioral flags as an integer.
3008 return self._aiter.get().Flags()
3010 cpdef void next(self):
3014 Advances the iterator.
3016 self._aiter.get().Next()
3018 cpdef size_t position(self):
3022 Returns the position of the iterator.
3025 The iterator's position, expressed as an integer.
3027 return self._aiter.get().Position()
3029 cpdef void reset(self):
3033 Resets the iterator to the initial position.
3035 self._aiter.get().Reset()
3037 cpdef void seek(self, size_t a):
3041 Advance the iterator to a new position.
3044 a: The position to seek to.
3046 self._aiter.get().Seek(a)
3048 cpdef void set_flags(self, uint32 flags, uint32 mask):
3050 set_flags(self, flags, mask)
3052 Sets the current iterator behavioral flags.
3055 flags: The properties to be set.
3056 mask: A mask to be applied to the `flags` argument before setting them.
3058 self._aiter.get().SetFlags(flags, mask)
3060 cpdef object value(self):
3064 Returns the current arc.
3066 return _init_Arc(self._aiter.get().Value())
3069 cdef class MutableArcIterator(object):
3072 MutableArcIterator(ifst, state)
3074 This class is used for iterating over the arcs leaving some state of an FST,
3075 also permitting mutation of the current arc.
3079 return "<MutableArcIterator at 0x{:x}>".format(id(self))
3081 def __init__(self, _MutableFst ifst, int64 state):
3082 if not ifst._fst.get().ValidStateId(state):
3083 raise FstIndexError("State index out of range")
3084 # Makes copy of the shared_ptr, potentially extending the FST's lifetime.
3085 self._mfst = ifst._mfst
3086 self._aiter.reset(new fst.MutableArcIteratorClass(ifst._mfst.get(), state))
3088 cpdef bool done(self):
3092 Indicates whether the iterator is exhausted or not.
3095 True if the iterator is exhausted, False otherwise.
3097 return self._aiter.get().Done()
3099 cpdef uint32 flags(self):
3103 Returns the current iterator behavioral flags.
3106 The current iterator behavioral flags as an integer.
3108 return self._aiter.get().Flags()
3110 cpdef void next(self):
3114 Advances the iterator.
3116 self._aiter.get().Next()
3118 cpdef size_t position(self):
3122 Returns the position of the iterator.
3125 The iterator's position, expressed as an integer.
3127 return self._aiter.get().Position()
3129 cpdef void reset(self):
3133 Resets the iterator to the initial position.
3135 self._aiter.get().Reset()
3137 cpdef void seek(self, size_t a):
3141 Advance the iterator to a new position.
3144 a: The position to seek to.
3146 self._aiter.get().Seek(a)
3148 cpdef void set_flags(self, uint32 flags, uint32 mask):
3150 set_flags(self, flags, mask)
3152 Sets the current iterator behavioral flags.
3155 flags: The properties to be set.
3156 mask: A mask to be applied to the `flags` argument before setting them.
3158 self._aiter.get().SetFlags(flags, mask)
3160 cpdef void set_value(self, Arc arc):
3162 set_value(self, arc)
3164 Replace the current arc with a new arc.
3167 arc: The arc to replace the current arc with.
3169 self._aiter.get().SetValue(deref(arc._arc))
3171 cpdef object value(self):
3175 Returns the current arc.
3177 return _init_Arc(self._aiter.get().Value())
3183 cdef class StateIterator(object):
3188 This class is used for iterating over the states in an FST.
3192 return "<StateIterator at 0x{:x}>".format(id(self))
3194 def __init__(self, _Fst ifst):
3195 # Makes copy of the shared_ptr, potentially extending the FST's lifetime.
3196 self._fst = ifst._fst
3197 self._siter.reset(new fst.StateIteratorClass(deref(self._fst)))
3199 # This just registers this class as a possible iterator.
3203 # Magic method used to get a Pythonic API out of the C++ API.
3207 cdef int64 result = self.value()
3211 cpdef bool done(self):
3215 Indicates whether the iterator is exhausted or not.
3218 True if the iterator is exhausted, False otherwise.
3220 return self._siter.get().Done()
3222 cpdef void next(self):
3226 Advances the iterator.
3228 self._siter.get().Next()
3230 cpdef void reset(self):
3234 Resets the iterator to the initial position.
3236 self._siter.get().Reset()
3238 cpdef int64 value(self):
3242 Returns the current state index.
3244 return self._siter.get().Value()
3250 cdef _Fst _map(_Fst ifst,
3251 float delta=fst.kDelta,
3252 map_type=b"identity",
3254 cdef fst.MapType map_type_enum
3255 if not fst.GetMapType(tostring(map_type), addr(map_type_enum)):
3256 raise FstArgError("Unknown map type: {!r}".format(map_type))
3257 cdef fst.WeightClass wc = (_get_WeightClass_or_One(ifst.weight_type(),
3258 weight) if map_type_enum == fst.TIMES_MAPPER else
3259 _get_WeightClass_or_Zero(ifst.weight_type(), weight))
3260 return _init_XFst(fst.Map(deref(ifst._fst), map_type_enum, delta, wc))
3263 cpdef _Fst arcmap(_Fst ifst,
3264 float delta=fst.kDelta,
3265 map_type=b"identity",
3268 arcmap(ifst, delta=0.0009765625, map_type="identity", weight=None)
3270 Constructively applies a transform to all arcs and final states.
3272 This operation transforms each arc and final state in the input FST using
3273 one of the following:
3275 * identity: maps to self.
3276 * input_epsilon: replaces all input labels with epsilon.
3277 * invert: reciprocates all non-Zero weights.
3278 * output_epsilon: replaces all output labels with epsilon.
3279 * plus: adds a constant to all weights.
3280 * quantize: quantizes weights.
3281 * rmweight: replaces all non-Zero weights with 1.
3282 * superfinal: redirects final states to a new superfinal state.
3283 * times: right-multiplies a constant to all weights.
3284 * to_log: converts weights to the log semiring.
3285 * to_log64: converts weights to the log64 semiring.
3286 * to_standard: converts weights to the tropical ("standard") semiring.
3289 ifst: The input FST.
3290 delta: Comparison/quantization delta (ignored unless `map_type` is
3292 map_type: A string matching a known mapping operation (see above).
3293 weight: A Weight or weight string passed to the arc-mapper; ignored unless
3294 `map_type` is `plus` (in which case it defaults to semiring Zero) or
3295 `times` (in which case it defaults to semiring One).
3298 An FST with arcs and final states remapped.
3301 FstArgError: Unknown map type.
3303 See also: `statemap`.
3305 return _map(ifst, delta, map_type, weight)
3308 cpdef _MutableFst compose(_Fst ifst1,
3310 compose_filter=b"auto",
3313 compose(ifst1, ifst2, compose_filter="auto", connect=True)
3315 Constructively composes two FSTs.
3317 This operation computes the composition of two FSTs. If A transduces string
3318 x to y with weight a and B transduces y to z with weight b, then their
3319 composition transduces string x to z with weight a \otimes b. The output
3320 labels of the first transducer or the input labels of the second transducer
3321 must be sorted (or otherwise support appropriate matchers).
3324 ifst1: The first input FST.
3325 ifst2: The second input FST.
3326 compose_filter: A string matching a known composition filter; one of:
3327 "alt_sequence", "auto", "match", "null", "sequence", "trivial".
3328 connect: Should output be trimmed?
3333 See also: `arcsort`.
3335 cdef unique_ptr[fst.VectorFstClass] tfst
3336 tfst.reset(new fst.VectorFstClass(ifst1.arc_type()))
3337 cdef unique_ptr[fst.ComposeOptions] opts
3338 opts.reset(new fst.ComposeOptions(connect,
3339 _get_compose_filter(tostring(compose_filter))))
3340 fst.Compose(deref(ifst1._fst), deref(ifst2._fst), tfst.get(), deref(opts))
3341 return _init_MutableFst(tfst.release())
3344 cpdef _Fst convert(_Fst ifst, fst_type=None):
3346 convert(ifst, fst_type=None)
3348 Constructively converts an FST to a new internal representation.
3351 ifst: The input FST.
3352 fst_type: A string indicating the FST type to convert to, or None if
3353 no conversion is desired.
3356 An equivalent Fst converted to the desired FST type.
3359 FstOpError: Conversion failed.
3361 cdef string fst_type_string = b"" if fst_type is None else tostring(fst_type)
3362 cdef unique_ptr[fst.FstClass] tfst
3363 tfst.reset(fst.Convert(deref(ifst._fst), fst_type_string))
3364 # Script-land Convert returns the null pointer to signal failure.
3365 if tfst.get() == NULL:
3366 raise FstOpError("Conversion to {!r} failed".format(fst_type))
3367 return _init_XFst(tfst.release())
3370 cpdef _MutableFst determinize(_Fst ifst,
3371 float delta=fst.kDelta,
3372 det_type=b"functional",
3373 int64 nstate=fst.kNoStateId,
3374 int64 subsequential_label=0,
3376 bool increment_subsequential_label=False):
3378 determinize(ifst, delta=0.0009765625, det_type="functional",
3379 nstate=NO_STATE_ID, subsequential_label=0, weight=None,
3380 incremental_subsequential_label=False)
3382 Constructively determinizes a weighted FST.
3384 This operations creates an equivalent FST that has the property that no
3385 state has two transitions with the same input label. For this algorithm,
3386 epsilon transitions are treated as regular symbols (cf. `rmepsilon`).
3389 ifst: The input FST.
3390 delta: Comparison/quantization delta.
3391 det_type: Type of determinization; one of: "functional" (input transducer is
3392 functional), "nonfunctional" (input transducer is not functional) and
3393 disambiguate" (input transducer is not functional but only keep the min
3394 of ambiguous outputs).
3395 nstate: State number threshold.
3396 subsequential_label: Input label of arc corresponding to residual final
3397 output when producing a subsequential transducer.
3398 weight: A Weight or weight string indicating the desired weight threshold
3399 below which paths are pruned; if omitted, no paths are pruned.
3400 increment_subsequential_label: Increment subsequential when creating
3401 several arcs for the residual final output at a given state.
3404 An equivalent deterministic FST.
3407 FstArgError: Unknown determinization type.
3409 See also: `disambiguate`, `rmepsilon`.
3411 cdef unique_ptr[fst.VectorFstClass] tfst
3412 tfst.reset(new fst.VectorFstClass(ifst.arc_type()))
3413 # Threshold is set to semiring Zero (no pruning) if weight unspecified.
3414 cdef fst.WeightClass wc = _get_WeightClass_or_Zero(ifst.weight_type(),
3416 cdef fst.DeterminizeType determinize_type_enum
3417 if not fst.GetDeterminizeType(tostring(det_type),
3418 addr(determinize_type_enum)):
3419 raise FstArgError("Unknown determinization type: {!r}".format(det_type))
3420 cdef unique_ptr[fst.DeterminizeOptions] opts
3421 opts.reset(new fst.DeterminizeOptions(delta, wc, nstate, subsequential_label,
3422 determinize_type_enum,
3423 increment_subsequential_label))
3424 fst.Determinize(deref(ifst._fst), tfst.get(), deref(opts))
3425 return _init_MutableFst(tfst.release())
3428 cpdef _MutableFst difference(_Fst ifst1,
3430 compose_filter=b"auto",
3433 difference(ifst1, ifst2, compose_filter="auto", connect=True)
3435 Constructively computes the difference of two FSTs.
3437 This operation computes the difference between two FSAs. Only strings that are
3438 in the first automaton but not in second are retained in the result. The first
3439 argument must be an acceptor; the second argument must be an unweighted,
3440 epsilon-free, deterministic acceptor. The output labels of the first
3441 transducer or the input labels of the second transducer must be sorted (or
3442 otherwise support appropriate matchers).
3445 ifst1: The first input FST.
3446 ifst2: The second input FST.
3447 compose_filter: A string matching a known composition filter; one of:
3448 "alt_sequence", "auto", "match", "null", "sequence", "trivial".
3449 connect: Should the output FST be trimmed?
3452 An FST representing the difference of the FSTs.
3454 cdef unique_ptr[fst.VectorFstClass] tfst
3455 tfst.reset(new fst.VectorFstClass(ifst1.arc_type()))
3456 cdef unique_ptr[fst.ComposeOptions] opts
3457 opts.reset(new fst.ComposeOptions(connect, _get_compose_filter(
3458 tostring(compose_filter))))
3459 fst.Difference(deref(ifst1._fst), deref(ifst2._fst), tfst.get(), deref(opts))
3460 return _init_MutableFst(tfst.release())
3463 cpdef _MutableFst disambiguate(_Fst ifst,
3464 float delta=fst.kDelta,
3465 int64 nstate=fst.kNoStateId,
3466 int64 subsequential_label=0,
3469 disambiguate(ifst, delta=0.0009765625, nstate=NO_STATE_ID,
3470 subsequential_label=0, weight=None):
3472 Constructively disambiguates a weighted transducer.
3474 This operation disambiguates a weighted transducer. The result will be an
3475 equivalent FST that has the property that no two successful paths have the
3476 same input labeling. For this algorithm, epsilon transitions are treated as
3477 regular symbols (cf. `rmepsilon`).
3480 ifst: The input FST.
3481 delta: Comparison/quantization delta.
3482 nstate: State number threshold.
3483 subsequential_label: Input label of arc corresponding to residual final
3484 output when producing a subsequential transducer.
3485 weight: A Weight or weight string indicating the desired weight threshold
3486 below which paths are pruned; if omitted, no paths are pruned.
3489 An equivalent disambiguated FST.
3491 See also: `determinize`, `rmepsilon`.
3493 cdef unique_ptr[fst.VectorFstClass] tfst
3494 tfst.reset(new fst.VectorFstClass(ifst.arc_type()))
3495 # Threshold is set to semiring Zero (no pruning) if no weight is specified.
3496 cdef fst.WeightClass wc = _get_WeightClass_or_Zero(ifst.weight_type(),
3498 cdef unique_ptr[fst.DisambiguateOptions] opts
3499 opts.reset(new fst.DisambiguateOptions(delta, wc, nstate,
3500 subsequential_label))
3501 fst.Disambiguate(deref(ifst._fst), tfst.get(), deref(opts))
3502 return _init_MutableFst(tfst.release())
3505 cpdef _MutableFst epsnormalize(_Fst ifst, bool eps_norm_output=False):
3507 epsnormalize(ifst, eps_norm_output=False)
3509 Constructively epsilon-normalizes an FST.
3511 This operation creates an equivalent FST that is epsilon-normalized. An
3512 acceptor is epsilon-normalized if it it is epsilon-removed (cf. `rmepsilon`).
3513 A transducer is input epsilon-normalized if, in addition, along any path, all
3514 arcs with epsilon input labels follow all arcs with non-epsilon input labels.
3515 Output epsilon-normalized is defined similarly. The input FST must be
3519 ifst: The input FST.
3520 eps_norm_output: Should the FST be output epsilon-normalized?
3523 An equivalent epsilon-normalized FST.
3525 See also: `rmepsilon`.
3527 cdef unique_ptr[fst.VectorFstClass] tfst
3528 tfst.reset(new fst.VectorFstClass(ifst.arc_type()))
3529 fst.EpsNormalize(deref(ifst._fst), tfst.get(), fst.EPS_NORM_OUTPUT if
3530 eps_norm_output else
3532 return _init_MutableFst(tfst.release())
3535 cpdef bool equal(_Fst ifst1, _Fst ifst2, float delta=fst.kDelta):
3537 equal(ifst1, ifst2, delta=0.0009765625)
3541 This function tests whether two FSTs have the same states with the same
3542 numbering and the same transitions with the same labels and weights in the
3546 ifst1: The first input FST.
3547 ifst2: The second input FST.
3548 delta: Comparison/quantization delta.
3551 True if the FSTs satisfy the above condition, else False.
3553 See also: `equivalent`, `isomorphic`, `randequivalent`.
3555 return fst.Equal(deref(ifst1._fst), deref(ifst2._fst), delta)
3558 cpdef bool equivalent(_Fst ifst1, _Fst ifst2, float delta=fst.kDelta) except *:
3560 equivalent(ifst1, ifst2, delta=0.0009765625)
3562 Are the two acceptors equivalent?
3564 This operation tests whether two epsilon-free deterministic weighted
3565 acceptors are equivalent, that is if they accept the same strings with the
3569 ifst1: The first input FST.
3570 ifst2: The second input FST.
3571 delta: Comparison/quantization delta.
3574 True if the FSTs satisfy the above condition, else False.
3577 FstOpError: Equivalence test encountered error.
3579 See also: `equal`, `isomorphic`, `randequivalent`.
3582 cdef bool result = fst.Equivalent(deref(ifst1._fst), deref(ifst2._fst), delta,
3585 raise FstOpError("Equivalence test encountered error")
3589 cpdef _MutableFst intersect(_Fst ifst1,
3591 compose_filter=b"auto",
3594 intersect(ifst1, ifst2, compose_filter="auto", connect=True)
3596 Constructively intersects two FSTs.
3598 This operation computes the intersection (Hadamard product) of two FSTs.
3599 Only strings that are in both automata are retained in the result. The two
3600 arguments must be acceptors. One of the arguments must be label-sorted (or
3601 otherwise support appropriate matchers).
3604 ifst1: The first input FST.
3605 ifst2: The second input FST.
3606 compose_filter: A string matching a known composition filter; one of:
3607 "alt_sequence", "auto", "match", "null", "sequence", "trivial".
3608 connect: Should output be trimmed?
3613 cdef unique_ptr[fst.VectorFstClass] tfst
3614 tfst.reset(new fst.VectorFstClass(ifst1.arc_type()))
3615 cdef unique_ptr[fst.ComposeOptions] opts
3616 opts.reset(new fst.ComposeOptions(connect,
3617 _get_compose_filter(tostring(compose_filter))))
3618 fst.Intersect(deref(ifst1._fst), deref(ifst2._fst), tfst.get(), deref(opts))
3619 return _init_MutableFst(tfst.release())
3622 cpdef bool isomorphic(_Fst ifst1, _Fst ifst2, float delta=fst.kDelta):
3624 isomorphic(ifst1, ifst2, delta=0.0009765625)
3626 Are the two acceptors isomorphic?
3628 This operation determines if two transducers with a certain required
3629 determinism have the same states, irrespective of numbering, and the same
3630 transitions with the same labels and weights, irrespective of ordering. In
3631 other words, FSTs A, B are isomorphic if and only if the states of A can be
3632 renumbered and the transitions leaving each state reordered so the two are
3633 equal (according to the definition given in `equal`).
3636 ifst1: The first input FST.
3637 ifst2: The second input FST.
3638 delta: Comparison/quantization delta.
3641 True if the two transducers satisfy the above condition, else False.
3643 See also: `equal`, `equivalent`, `randequivalent`.
3645 return fst.Isomorphic(deref(ifst1._fst), deref(ifst2._fst), delta)
3648 cpdef _MutableFst prune(_Fst ifst,
3649 float delta=fst.kDelta,
3650 int64 nstate=fst.kNoStateId,
3653 prune(ifst, delta=0.0009765625, nstate=NO_STATE_ID, weight=None)
3655 Constructively removes paths with weights below a certain threshold.
3657 This operation deletes states and arcs in the input FST that do not belong
3658 to a successful path whose weight is no more (w.r.t the natural semiring
3659 order) than the threshold t \otimes-times the weight of the shortest path in
3660 the input FST. Weights must be commutative and have the path property.
3663 ifst: The input FST.
3664 delta: Comparison/quantization delta.
3665 nstate: State number threshold.
3666 weight: A Weight or weight string indicating the desired weight threshold
3667 below which paths are pruned; if omitted, no paths are pruned.
3672 See also: The destructive variant.
3674 cdef unique_ptr[fst.VectorFstClass] tfst
3675 tfst.reset(new fst.VectorFstClass(ifst.arc_type()))
3676 cdef fst.WeightClass wc = _get_WeightClass_or_Zero(ifst.weight_type(), weight)
3677 fst.Prune(deref(ifst._fst), tfst.get(), wc, nstate, delta)
3678 return _init_MutableFst(tfst.release())
3681 cpdef _MutableFst push(_Fst ifst,
3682 float delta=fst.kDelta,
3683 bool push_weights=False,
3684 bool push_labels=False,
3685 bool remove_common_affix=False,
3686 bool remove_total_weight=False,
3687 bool to_final=False):
3689 push(ifst, delta=0.0009765625, push_weights=False, push_labels=False,
3690 remove_common_affix=False, remove_total_weight=False, to_final=False)
3692 Constructively pushes weights/labels towards initial or final states.
3694 This operation produces an equivalent transducer by pushing the weights
3695 and/or the labels towards the initial state or toward the final states.
3697 When pushing weights towards the initial state, the sum of the weight of the
3698 outgoing transitions and final weight at any non-initial state is equal to 1
3699 in the resulting machine. When pushing weights towards the final states, the
3700 sum of the weight of the incoming transitions at any state is equal to 1.
3701 Weights need to be left distributive when pushing towards the initial state
3702 and right distributive when pushing towards the final states.
3704 Pushing labels towards the initial state consists in minimizing at every
3705 state the length of the longest common prefix of the output labels of the
3706 outgoing paths. Pushing labels towards the final states consists in
3707 minimizing at every state the length of the longest common suffix of the
3708 output labels of the incoming paths.
3711 ifst: The input FST.
3712 delta: Comparison/quantization delta.
3713 push_weights: Should weights be pushed?
3714 push_labels: Should labels be pushed?
3715 remove_common_affix: If pushing labels, should common prefix/suffix be
3717 remove_total_weight: If pushing weights, should total weight be removed?
3718 to_final: Push towards final states?
3721 An equivalent pushed FST.
3723 See also: The destructive variant.
3725 # This is copied, almost verbatim, from ./fstpush.cc.
3726 cdef unique_ptr[fst.VectorFstClass] tfst
3727 tfst.reset(new fst.VectorFstClass(ifst.arc_type()))
3728 cdef uint32 flags = fst.GetPushFlags(push_weights, push_labels,
3729 remove_common_affix, remove_total_weight)
3730 fst.Push(deref(ifst._fst), tfst.get(), flags, fst.GetReweightType(to_final),
3732 return _init_MutableFst(tfst.release())
3735 cpdef bool randequivalent(_Fst ifst1,
3738 float delta=fst.kDelta,
3741 int32 max_length=INT32_MAX) except *:
3743 randequivalent(ifst1, ifst2, npath=1, delta=0.0009765625, seed=0,
3744 select="uniform", max_length=2147483647)
3746 Are two acceptors stochastically equivalent?
3748 This operation tests whether two FSTs are equivalent by randomly generating
3749 paths alternatively in each of the two FSTs. For each randomly generated path,
3750 the algorithm computes for each of the two FSTs the sum of the weights of all
3751 the successful paths sharing the same input and output labels as the randomly
3752 generated path and checks that these two values are within `delta`.
3755 ifst1: The first input FST.
3756 ifst2: The second input FST.
3757 npath: The number of random paths to generate.
3758 delta: Comparison/quantization delta.
3759 seed: An optional seed value for random path generation; if zero, the
3760 current time and process ID is used.
3761 select: A string matching a known random arc selection type; one of:
3762 "uniform", "log_prob", "fast_log_prob".
3763 max_length: The maximum length of each random path.
3766 True if the two transducers satisfy the above condition, else False.
3769 FstOpError: Random equivalence test encountered error.
3771 See also: `equal`, `equivalent`, `isomorphic`, `randgen`.
3773 cdef fst.RandArcSelection ras = _get_rand_arc_selection(tostring(select))
3774 cdef unique_ptr[fst.RandGenOptions[fst.RandArcSelection]] opts
3775 # The three trailing options will be ignored by RandEquivalent.
3776 opts.reset(new fst.RandGenOptions[fst.RandArcSelection](ras, max_length,
3779 seed = time(NULL) + getpid()
3781 cdef bool result = fst.RandEquivalent(deref(ifst1._fst), deref(ifst2._fst),
3782 npath, delta, seed, deref(opts),
3785 raise FstOpError("Random equivalence test encountered error")
3789 cpdef _MutableFst randgen(_Fst ifst,
3793 int32 max_length=INT32_MAX,
3794 bool weighted=False,
3795 bool remove_total_weight=False):
3797 randgen(ifst, npath=1, seed=0, select="uniform", max_length=2147483647,
3798 weight=False, remove_total_weight=False)
3800 Randomly generate successful paths in an FST.
3802 This operation randomly generates a set of successful paths in the input FST.
3803 This relies on a mechanism for selecting arcs, specified using the `select`
3804 argument. The default selector, "uniform", randomly selects a transition
3805 using a uniform distribution. The "log_prob" selector randomly selects a
3806 transition w.r.t. the weights treated as negative log probabilities after
3807 normalizing for the total weight leaving the state. In all cases, finality is
3808 treated as a transition to a super-final state.
3811 ifst: The input FST.
3812 npath: The number of random paths to generate.
3813 seed: An optional seed value for random path generation; if zero, the
3814 current time and process ID is used.
3815 select: A string matching a known random arc selection type; one of:
3816 "uniform", "log_prob", "fast_log_prob".
3817 max_length: The maximum length of each random path.
3818 weighted: Should the output be weighted by path count?
3819 remove_total_weight: Should the total weight be removed (ignored when
3820 `weighted` is False)?
3823 An FST containing one or more random paths.
3825 See also: `randequivalent`.
3827 cdef fst.RandArcSelection ras = _get_rand_arc_selection(tostring(select))
3828 cdef unique_ptr[fst.RandGenOptions[fst.RandArcSelection]] opts
3829 opts.reset(new fst.RandGenOptions[fst.RandArcSelection](ras, max_length,
3831 remove_total_weight))
3832 cdef unique_ptr[fst.VectorFstClass] tfst
3833 tfst.reset(new fst.VectorFstClass(ifst.arc_type()))
3835 seed = time(NULL) + getpid()
3836 fst.RandGen(deref(ifst._fst), tfst.get(), seed, deref(opts))
3837 return _init_MutableFst(tfst.release())
3840 cpdef _MutableFst replace(pairs,
3841 call_arc_labeling=b"input",
3842 return_arc_labeling=b"neither",
3843 bool epsilon_on_replace=False,
3844 int64 return_label=0):
3846 replace(pairs, call_arc_labeling="input", return_arc_labeling="neither",
3847 epsilon_on_replace=False, return_label=0)
3849 Recursively replaces arcs in the FST with other FST(s).
3851 This operation performs the dynamic replacement of arcs in one FST with
3852 another FST, allowing the definition of FSTs analogous to RTNs. It takes as
3853 input a set of pairs of a set of pairs formed by a non-terminal label and
3854 its corresponding FST, and a label identifying the root FST in that set.
3855 The resulting FST is obtained by taking the root FST and recursively replacing
3856 each arc having a nonterminal as output label by its corresponding FST. More
3857 precisely, an arc from state s to state d with (nonterminal) output label n in
3858 this FST is replaced by redirecting this "call" arc to the initial state of a
3859 copy F of the FST for n, and adding "return" arcs from each final state of F
3860 to d. Optional arguments control how the call and return arcs are labeled; by
3861 default, the only non-epsilon label is placed on the call arc.
3865 pairs: An iterable of (nonterminal label, FST) pairs, where the former is an
3866 unsigned integer and the latter is an Fst instance.
3867 call_arc_labeling: A string indicating which call arc labels should be
3868 non-epsilon. One of: "input" (default), "output", "both", "neither".
3869 This value is set to "neither" if epsilon_on_replace is True.
3870 return_arc_labeling: A string indicating which return arc labels should be
3871 non-epsilon. One of: "input", "output", "both", "neither" (default).
3872 This value is set to "neither" if epsilon_on_replace is True.
3873 epsilon_on_replace: Should call and return arcs be epsilon arcs? If True,
3874 this effectively overrides call_arc_labeling and return_arc_labeling,
3875 setting both to "neither".
3876 return_label: The integer label for return arcs.
3879 An FST resulting from expanding the input RTN.
3881 cdef vector[fst.LabelFstClassPair] _pairs
3882 cdef int64 root_label
3886 (root_label, ifst) = next(it)
3887 _pairs.push_back(fst.LabelFstClassPair(root_label, ifst._fst.get()))
3888 cdef unique_ptr[fst.VectorFstClass] tfst
3889 tfst.reset(new fst.VectorFstClass(ifst.arc_type()))
3890 for (label, ifst) in it:
3891 _pairs.push_back(fst.LabelFstClassPair(label, ifst._fst.get()))
3892 cdef fst.ReplaceLabelType cal = _get_replace_label_type(
3893 tostring(call_arc_labeling), epsilon_on_replace)
3894 cdef fst.ReplaceLabelType ral = _get_replace_label_type(
3895 tostring(return_arc_labeling), epsilon_on_replace)
3896 cdef unique_ptr[fst.ReplaceOptions] opts
3897 opts.reset(new fst.ReplaceOptions(root_label, cal, ral, return_label))
3898 fst.Replace(_pairs, tfst.get(), deref(opts))
3899 return _init_MutableFst(tfst.release())
3902 cpdef _MutableFst reverse(_Fst ifst, bool require_superinitial=True):
3904 reverse(ifst, require_superinitial=True)
3906 Constructively reverses an FST's transduction.
3908 This operation reverses an FST. If A transduces string x to y with weight a,
3909 then the reverse of A transduces the reverse of x to the reverse of y with
3910 weight a.Reverse(). (Typically, a = a.Reverse() and Arc = RevArc, e.g.,
3911 TropicalWeight and LogWeight.) In general, e.g., when the weights only form a
3912 left or right semiring, the output arc type must match the input arc type.
3915 ifst: The input FST.
3916 require_superinitial: Should a superinitial state be created?
3921 cdef unique_ptr[fst.VectorFstClass] tfst
3922 tfst.reset(new fst.VectorFstClass(ifst.arc_type()))
3923 fst.Reverse(deref(ifst._fst), tfst.get(), require_superinitial)
3924 return _init_MutableFst(tfst.release())
3927 cpdef _MutableFst rmepsilon(_Fst ifst,
3929 float delta=fst.kDelta,
3930 int64 nstate=fst.kNoStateId,
3935 rmepsilon(ifst, connect=True, delta=0.0009765625, nstate=NO_STATE_ID,
3936 queue_type="auto", reverse=False, weight=None)
3938 Constructively removes epsilon transitions from an FST.
3940 This operation removes epsilon transitions (those where both input and output
3941 labels are epsilon) from an FST.
3944 ifst: The input FST.
3945 connect: Should output be trimmed?
3946 delta: Comparison/quantization delta.
3947 nstate: State number threshold.
3948 queue_type: A string matching a known queue type; one of: "auto", "fifo",
3949 "lifo", "shortest", "state", "top".
3950 reverse: Should epsilon transitions be removed in reverse order?
3951 weight: A string indicating the desired weight threshold; paths with
3952 weights below this threshold will be pruned.
3955 An equivalent FST with no epsilon transitions.
3957 cdef fst.WeightClass wc = _get_WeightClass_or_Zero(ifst.weight_type(), weight)
3958 cdef unique_ptr[fst.RmEpsilonOptions] opts
3959 opts.reset(new fst.RmEpsilonOptions(_get_queue_type(tostring(queue_type)),
3960 delta, connect, wc, nstate))
3961 cdef unique_ptr[fst.VectorFstClass] tfst
3962 tfst.reset(new fst.VectorFstClass(ifst.arc_type()))
3963 fst.RmEpsilon(deref(ifst._fst), tfst.get(), reverse, deref(opts))
3964 return _init_MutableFst(tfst.release())
3967 # Pure C++ helper for shortestdistance.
3970 cdef vector[fst.WeightClass] *_shortestdistance(_Fst ifst,
3971 float delta=fst.kDelta, int64 nstate=fst.kNoStateId, queue_type=b"auto",
3972 bool reverse=False) except *:
3973 cdef unique_ptr[vector[fst.WeightClass]] distance
3974 distance.reset(new vector[fst.WeightClass]())
3975 # For scoping reasons, these have to be declared here even though they may
3976 # not be used in all cases.
3977 cdef unique_ptr[fst.ShortestDistanceOptions] opts
3979 # Only the simpler signature supports shortest distance to final states;
3980 # `nstate` and `queue_type` arguments are ignored.
3981 fst.ShortestDistance(deref(ifst._fst), distance.get(), True, delta)
3983 opts.reset(new fst.ShortestDistanceOptions(
3984 _get_queue_type(tostring(queue_type)), fst.ANY_ARC_FILTER, nstate,
3986 fst.ShortestDistance(deref(ifst._fst), distance.get(), deref(opts))
3987 return distance.release()
3990 def shortestdistance(_Fst ifst,
3991 float delta=fst.kDelta,
3992 int64 nstate=fst.kNoStateId,
3994 bool reverse=False):
3996 shortestdistance(ifst, delta=0.0009765625, nstate=NO_STATE_ID,
3997 queue_type="auto", reverse=False)
3999 Compute the shortest distance from the initial or final state.
4001 This operation computes the shortest distance from the initial state (when
4002 `reverse` is False) or from every state to the final state (when `reverse` is
4003 True). The shortest distance from p to q is the \otimes-sum of the weights of
4004 all the paths between p and q. The weights must be right (if `reverse` is
4005 False) or left (if `reverse` is True) distributive, and k-closed (i.e., 1
4006 \otimes x \otimes x^2 \otimes ... \otimes x^{k + 1} = 1 \otimes x \otimes x^2
4007 \otimes ... \otimes x^k; e.g., TropicalWeight).
4010 ifst: The input FST.
4011 delta: Comparison/quantization delta.
4012 nstate: State number threshold (ignored if `reverse` is True).
4013 queue_type: A string matching a known queue type; one of: "auto", "fifo",
4014 "lifo", "shortest", "state", "top" (ignored if `reverse` is True).
4015 reverse: Should the reverse distance (from each state to the final state)
4019 A list of Weight objects representing the shortest distance for each state.
4021 cdef unique_ptr[vector[fst.WeightClass]] distance
4022 distance.reset(_shortestdistance(ifst, delta, nstate, queue_type, reverse))
4023 # Packs the distances, as strings, into a Python list.
4024 cdef string weight_type = ifst.weight_type()
4026 # This is just the Cython version of the normal vector iteration idiom.
4027 cdef vector[fst.WeightClass].iterator it = distance.get().begin()
4028 while it != distance.get().end():
4029 result.append(Weight(weight_type, deref(it).ToString()))
4034 cpdef _MutableFst shortestpath(_Fst ifst,
4035 float delta=fst.kDelta,
4037 int64 nstate=fst.kNoStateId,
4042 shortestpath(ifst, delta=0.0009765625, nshortest=1, nstate=NO_STATE_ID,
4043 queue_type="auto", unique=False, weight=None)
4045 Construct an FST containing the shortest path(s) in the input FST.
4047 This operation produces an FST containing the n-shortest paths in the input
4048 FST. The n-shortest paths are the n-lowest weight paths w.r.t. the natural
4049 semiring order. The single path that can be read from the ith of at most n
4050 transitions leaving the initial state of the resulting FST is the ith
4051 shortest path. The weights need to be right distributive and have the path
4052 property. They also need to be left distributive as well for n-shortest with
4053 n > 1 (e.g., TropicalWeight).
4056 ifst: The input FST.
4057 delta: Comparison/quantization delta.
4058 nshortest: The number of paths to return.
4059 nstate: State number threshold.
4060 queue_type: A string matching a known queue type; one of: "auto", "fifo",
4061 "lifo", "shortest", "state", "top".
4062 unique: Should the resulting FST only contain distinct paths? (Requires
4063 the input FST to be an acceptor; epsilons are treated as if they are
4065 weight: A Weight or weight string indicating the desired weight threshold
4066 below which paths are pruned; if omitted, no paths are pruned.
4069 An FST containing the n-shortest paths.
4071 cdef unique_ptr[fst.VectorFstClass] tfst
4072 tfst.reset(new fst.VectorFstClass(ifst.arc_type()))
4073 cdef unique_ptr[vector[fst.WeightClass]] distance
4074 distance.reset(new vector[fst.WeightClass]())
4075 # Threshold is set to semiring Zero (no pruning) if no weight is specified.
4076 cdef fst.WeightClass wc = _get_WeightClass_or_Zero(ifst.weight_type(), weight)
4077 cdef unique_ptr[fst.ShortestPathOptions] opts
4078 opts.reset(new fst.ShortestPathOptions(_get_queue_type(tostring(queue_type)),
4079 nshortest, unique, False, delta,
4081 fst.ShortestPath(deref(ifst._fst), tfst.get(), distance.get(), deref(opts))
4082 return _init_MutableFst(tfst.release())
4085 cpdef _Fst statemap(_Fst ifst, map_type):
4087 state_map(ifst, map_type)
4089 Constructively applies a transform to all states.
4091 This operation transforms each state according to the requested map type.
4092 Note that currently, only one state-mapping operation is supported.
4095 ifst: The input FST.
4096 map_type: A string matching a known mapping operation; one of: "arc_sum"
4097 (sum weights of identically-labeled multi-arcs), "arc_unique" (deletes
4098 non-unique identically-labeled multi-arcs).
4101 An FST with states remapped.
4104 FstArgError: Unknown map type.
4108 return _map(ifst, fst.kDelta, map_type, None)
4111 cpdef _MutableFst synchronize(_Fst ifst):
4115 Constructively synchronizes an FST.
4117 This operation synchronizes a transducer. The result will be an equivalent
4118 FST that has the property that during the traversal of a path, the delay is
4119 either zero or strictly increasing, where the delay is the difference between
4120 the number of non-epsilon output labels and input labels along the path. For
4121 the algorithm to terminate, the input transducer must have bounded delay,
4122 i.e., the delay of every cycle must be zero.
4125 ifst: The input FST.
4128 An equivalent synchronized FST.
4130 cdef unique_ptr[fst.VectorFstClass] tfst
4131 tfst.reset(new fst.VectorFstClass(ifst.arc_type()))
4132 fst.Synchronize(deref(ifst._fst), tfst.get())
4133 return _init_MutableFst(tfst.release())
4139 cdef class Compiler(object):
4142 Compiler(fst_type="vector", arc_type="standard", isymbols=None,
4143 osymbols=None, ssymbols=None, acceptor=False, keep_isymbols=False,
4144 keep_osymbols=False, keep_state_numbering=False,
4145 allow_negative_labels=False)
4147 Class used to compile FSTs from strings.
4149 This class is used to compile FSTs specified using the AT&T FSM library
4150 format described here:
4152 http://web.eecs.umich.edu/~radev/NLP-fall2015/resources/fsm_archive/fsm.5.html
4154 This is the same format used by the `fstcompile` executable.
4156 Compiler options (symbol tables, etc.) are set at construction time.
4158 compiler = fst.Compiler(isymbols=ascii_syms, osymbols=ascii_syms)
4160 Once constructed, Compiler instances behave like a file handle opened for
4164 print >> compiler, "0 1 50 50"
4165 print >> compiler, "1 2 49 49"
4166 print >> compiler, "2 2 49 49"
4167 print >> compiler, "2"
4169 The `compile` method returns an actual FST instance:
4171 sheep_machine = compiler.compile()
4173 Compilation flushes the internal buffer, so the compiler instance can be
4174 reused to compile new machines with the same symbol tables (etc.)
4177 fst_type: A string indicating the container type for the compiled FST.
4178 arc_type: A string indicating the arc type for the compiled FST.
4179 isymbols: An optional SymbolTable used to label input symbols.
4180 osymbols: An optional SymbolTable used to label output symbols.
4181 ssymbols: An optional SymbolTable used to label states.
4182 acceptor: Should the FST be rendered in acceptor format if possible?
4183 keep_isymbols: Should the input symbol table be stored in the FST?
4184 keep_osymbols: Should the output symbol table be stored in the FST?
4185 keep_state_numbering: Should the state numbering be preserved?
4186 allow_negative_labels: Should negative labels be allowed? (Not
4187 recommended; may cause conflicts).
4191 string fst_type=b"vector",
4192 string arc_type=b"standard",
4193 SymbolTable isymbols=None,
4194 SymbolTable osymbols=None,
4195 SymbolTable ssymbols=None,
4196 bool acceptor=False,
4197 bool keep_isymbols=False,
4198 bool keep_osymbols=False,
4199 bool keep_state_numbering=False,
4200 bool allow_negative_labels=False):
4201 self._sstrm.reset(new stringstream())
4202 self._fst_type = tostring(fst_type)
4203 self._arc_type = tostring(arc_type)
4204 self._isymbols = NULL
4205 if isymbols is not None:
4206 self._isymbols = isymbols._table
4207 self._osymbols = NULL
4208 if osymbols is not None:
4209 self._osymbols = osymbols._table
4210 self._ssymbols = NULL
4211 if ssymbols is not None:
4212 self._ssymbols = ssymbols._table
4213 self._acceptor = acceptor
4214 self._keep_isymbols = keep_isymbols
4215 self._keep_osymbols = keep_osymbols
4216 self._keep_state_numbering = keep_state_numbering
4217 self._allow_negative_labels = allow_negative_labels
4219 cpdef _Fst compile(self):
4223 Compiles the FST in the compiler string buffer.
4225 This method compiles the FST and returns the resulting machine.
4228 The FST described by the compiler string buffer.
4231 FstOpError: Compilation failed.
4233 cdef unique_ptr[fst.FstClass] tfst
4234 tfst.reset(fst.CompileFstInternal(deref(self._sstrm),
4235 b"<pywrapfst>", self._fst_type, self._arc_type, self._isymbols,
4236 self._osymbols, self._ssymbols, self._acceptor, self._keep_isymbols,
4237 self._keep_osymbols, self._keep_state_numbering,
4238 self._allow_negative_labels))
4239 self._sstrm.reset(new stringstream())
4240 if tfst.get() == NULL:
4241 raise FstOpError("Compilation failed")
4242 return _init_XFst(tfst.release())
4244 cpdef void write(self, expression):
4248 Writes a string into the compiler string buffer.
4250 This method adds a line to the compiler string buffer. It is normally
4251 invoked using the right shift operator, like so:
4253 compiler = fst.Compiler()
4254 print >> compiler, "0 0 49 49"
4255 print >> compiler, "0"
4258 expression: A string expression to add to compiler string buffer.
4260 deref(self._sstrm) << tostring(expression)
4263 ## FarReader and FarWriter.
4266 cdef class FarReader(object):
4271 FAR ("Fst ARchive") reader object.
4273 This class is used to read a FAR from disk. FARs contain one or more FSTs (of
4274 the same arc type) indexed by a unique string key. To construct a FarReader
4275 object, use the `open` class method.
4278 arc_type: A string indicating the arc type.
4279 far_type: A string indicating the FAR type.
4283 raise FstDeletedConstructorError(
4284 "Cannot construct {}".format(self.__class__.__name__))
4287 return "<{} FarReader at 0x{:x}>".format(self.far_type(), id(self))
4290 def open(cls, *filenames):
4292 FarReader.open(*filenames)
4294 Creates a FarReader object.
4296 This class method creates a FarReader given the string location of one or
4297 more FAR files on disk.
4300 *filenames: The string location of one or more input FAR files.
4303 A new FarReader instance.
4306 FstIOError: Read failed.
4308 filenames = [tostring(filename) for filename in filenames]
4309 cdef unique_ptr[fst.FarReaderClass] tfar
4310 tfar.reset(fst.FarReaderClass.Open(filenames))
4311 if tfar.get() == NULL:
4312 raise FstIOError("Read failed: {!r}".format(filenames))
4313 cdef FarReader result = FarReader.__new__(FarReader)
4314 result._reader.reset(tfar.release())
4317 # This just registers this class as a possible iterator.
4321 # Magic method used to get a Pythonic API out of the C++ API.
4325 cdef string k = self.get_key()
4326 cdef _Fst f = self.get_fst()
4330 cpdef string arc_type(self):
4334 Returns a string indicating the arc type.
4336 return self._reader.get().ArcType()
4338 cpdef bool done(self):
4342 Indicates whether the iterator is exhausted or not.
4345 True if the iterator is exhausted, False otherwise.
4347 return self._reader.get().Done()
4349 cpdef bool error(self):
4353 Indicates whether the FarReader has encountered an error.
4356 True if the FarReader is in an errorful state, False otherwise.
4358 return self._reader.get().Error()
4360 cpdef string far_type(self):
4361 return fst.GetFarTypeString(self._reader.get().Type())
4363 cpdef bool find(self, key):
4367 Sets the current position to the first entry greater than or equal to the
4368 key (a string) and indicates whether or not a match was found.
4374 True if the key was found, False otherwise.
4376 return self._reader.get().Find(tostring(key))
4378 cpdef _Fst get_fst(self):
4382 Returns the FST at the current position.
4385 A copy of the FST at the current position.
4387 return _init_XFst(new fst.FstClass(
4388 deref(self._reader.get().GetFstClass())))
4390 cpdef string get_key(self):
4394 Returns the string key at the current position.
4397 The string key at the current position.
4399 return self._reader.get().GetKey()
4401 cpdef void next(self):
4405 Advances the iterator.
4407 self._reader.get().Next()
4409 cpdef void reset(self):
4413 Resets the iterator to the initial position.
4415 self._reader.get().Reset()
4417 # Dictionary-like access by combining `find` and `get_fst`.
4418 def __getitem__(self, key):
4419 if not self.find(key):
4421 return self.get_fst()
4424 cdef class FarWriter(object):
4429 FAR ("Fst ARchive") writer object.
4431 This class is used to write FSTs (of the same arc type) to a FAR on disk. To
4432 construct a FarWriter, use the `create` class method.
4434 Note that the data is not guaranteed to flush to disk until the FarWriter
4435 is garbage-collected. If a FarWriter has been assigned to only one variable,
4436 then calling `del` on that variable should decrement the object's reference
4437 count from 1 to 0, triggering a flush to disk on the next GC cycle.
4440 arc_type: A string indicating the arc type.
4441 far_type: A string indicating the FAR type.
4445 raise FstDeletedConstructorError(
4446 "Cannot construct {}".format(self.__class__.__name__))
4449 return "<{} FarWriter at 0x{:x}>".format(self.far_type(), id(self))
4452 def create(cls, filename, arc_type=b"standard", far_type=b"default"):
4456 Creates a FarWriter object.
4458 This class method creates a FarWriter given the desired output location,
4459 arc type, and FAR type.
4462 filename: The string location for the output FAR files.
4463 arc_type: A string indicating the arc type.
4464 far_type: A string indicating the FAR type; one of: "fst", "stlist",
4465 "sttable", "sstable", "default".
4468 A new FarWriter instance.
4471 FstIOError: Read failed.
4473 cdef fst.FarType ft = fst.GetFarType(tostring(far_type))
4474 cdef fst.FarWriterClass *tfar = fst.FarWriterClass.Create(
4475 tostring(filename), tostring(arc_type), ft)
4477 raise FstIOError("Open failed: {!r}".format(filename))
4478 cdef FarWriter result = FarWriter.__new__(FarWriter)
4479 result._writer.reset(tfar)
4482 # NB: Invoking this method is DANGEROUS: calling any other method on the
4483 # instance after this is invoked may result in a null dereference.
4484 cdef void _close(self):
4485 self._writer.reset()
4487 cpdef void add(self, key, _Fst ifst) except *:
4489 add(self, key, ifst)
4491 Adds an FST to the FAR.
4493 This method adds an FST to the FAR which can be retrieved with the
4494 specified string key.
4497 key: The string used to key the input FST.
4498 ifst: The FST to write to the FAR.
4501 FstArgError: Key out of order.
4502 FstOpError: Incompatible or invalid arc type.
4504 # Failure here results from passing an FST with a different arc type than
4505 # used by the FAR was initialized to use.
4506 if not self._writer.get().Add(tostring(key), deref(ifst._fst)):
4507 raise FstOpError("Incompatible or invalid arc type")
4508 # An error here usually indicates a key out of order.
4509 if self._writer.get().Error():
4510 raise FstArgError("Key out of order")
4512 cpdef string arc_type(self):
4516 Returns a string indicating the arc type.
4518 return self._writer.get().ArcType()
4520 cpdef bool error(self):
4524 Indicates whether the FarWriter has encountered an error.
4527 True if the FarWriter is in an errorful state, False otherwise.
4529 return self._writer.get().Error()
4531 cpdef string far_type(self):
4535 Returns a string indicating the FAR type.
4537 return fst.GetFarTypeString(self._writer.get().Type())
4539 # Dictionary-like assignment.
4540 def __setitem__(self, key, _Fst fst):
4544 ## Cleanup operations for module entrance and exit.
4547 # Masks fst_error_fatal flags while this module is running, returning to the
4548 # previous state upon module exit.
4551 _fst_error_fatal_old = fst.FLAGS_fst_error_fatal
4552 fst.FLAGS_fst_error_fatal = False
4556 def _reset_fst_error_fatal():
4557 fst.FLAGS_fst_error_fatal = _fst_error_fatal_old