OpenFst: Release 1.6
+ * Optimized label lookup in SymbolTable (1.6.9)
* Fixed PROGRAM_FLAGS documentation string in binaries (1.6.8)
* Fixed handling of symbol tables in EpsNormalize (1.6.8)
* Fixed HashMatcher issues with SetState() and Find() consistency (1.6.8)
-OpenFst: Release 1.6.8.
+OpenFst: Release 1.6.9.
OpenFst is a library for constructing, combining, optimizing, and searching
weighted finite-state transducers (FSTs).
#! /bin/sh
# Guess values for system-dependent variables and create Makefiles.
-# Generated by GNU Autoconf 2.69 for OpenFst 1.6.8.
+# Generated by GNU Autoconf 2.69 for OpenFst 1.6.9.
#
# Report bugs to <help@www.openfst.org>.
#
# Identity of this package.
PACKAGE_NAME='OpenFst'
PACKAGE_TARNAME='openfst'
-PACKAGE_VERSION='1.6.8'
-PACKAGE_STRING='OpenFst 1.6.8'
+PACKAGE_VERSION='1.6.9'
+PACKAGE_STRING='OpenFst 1.6.9'
PACKAGE_BUGREPORT='help@www.openfst.org'
PACKAGE_URL=''
# Omit some internal or obsolete options to make the list less imposing.
# This message is too long to be a string in the A/UX 3.1 sh.
cat <<_ACEOF
-\`configure' configures OpenFst 1.6.8 to adapt to many kinds of systems.
+\`configure' configures OpenFst 1.6.9 to adapt to many kinds of systems.
Usage: $0 [OPTION]... [VAR=VALUE]...
if test -n "$ac_init_help"; then
case $ac_init_help in
- short | recursive ) echo "Configuration of OpenFst 1.6.8:";;
+ short | recursive ) echo "Configuration of OpenFst 1.6.9:";;
esac
cat <<\_ACEOF
test -n "$ac_init_help" && exit $ac_status
if $ac_init_version; then
cat <<\_ACEOF
-OpenFst configure 1.6.8
+OpenFst configure 1.6.9
generated by GNU Autoconf 2.69
Copyright (C) 2012 Free Software Foundation, Inc.
This file contains any messages produced by compilers while
running configure, to aid debugging if configure makes a mistake.
-It was created by OpenFst $as_me 1.6.8, which was
+It was created by OpenFst $as_me 1.6.9, which was
generated by GNU Autoconf 2.69. Invocation command line was
$ $0 $@
# Define the identity of the package.
PACKAGE='openfst'
- VERSION='1.6.8'
+ VERSION='1.6.9'
cat >>confdefs.h <<_ACEOF
# report actual input values of CONFIG_FILES etc. instead of their
# values after options handling.
ac_log="
-This file was extended by OpenFst $as_me 1.6.8, which was
+This file was extended by OpenFst $as_me 1.6.9, which was
generated by GNU Autoconf 2.69. Invocation command line was
CONFIG_FILES = $CONFIG_FILES
cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
ac_cs_config="`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`"
ac_cs_version="\\
-OpenFst config.status 1.6.8
+OpenFst config.status 1.6.9
configured by $0, generated by GNU Autoconf 2.69,
with options \\"\$ac_cs_config\\"
-AC_INIT([OpenFst], [1.6.8], [help@www.openfst.org])
+AC_INIT([OpenFst], [1.6.9], [help@www.openfst.org])
AM_INIT_AUTOMAKE([foreign nostdinc -Wall -Werror subdir-objects])
AM_PROG_AR
lib_LTLIBRARIES = libfstcompact.la
libfstcompact_la_SOURCES = compact8_acceptor-fst.cc compact8_string-fst.cc compact8_unweighted-fst.cc compact8_unweighted_acceptor-fst.cc compact8_weighted_string-fst.cc compact16_acceptor-fst.cc compact16_string-fst.cc compact16_unweighted-fst.cc compact16_unweighted_acceptor-fst.cc compact16_weighted_string-fst.cc compact64_acceptor-fst.cc compact64_string-fst.cc compact64_unweighted-fst.cc compact64_unweighted_acceptor-fst.cc compact64_weighted_string-fst.cc
-libfstcompact_la_LDFLAGS = -version-info 11:0:0
+libfstcompact_la_LDFLAGS = -version-info 13:0:0
libfstcompact_la_LIBADD = ../../lib/libfst.la -lm $(DL_LIBS)
compact8_acceptor_fst_la_SOURCES = compact8_acceptor-fst.cc
libfst_LTLIBRARIES = compact8_acceptor-fst.la compact8_string-fst.la compact8_unweighted-fst.la compact8_unweighted_acceptor-fst.la compact8_weighted_string-fst.la compact16_acceptor-fst.la compact16_string-fst.la compact16_unweighted-fst.la compact16_unweighted_acceptor-fst.la compact16_weighted_string-fst.la compact64_acceptor-fst.la compact64_string-fst.la compact64_unweighted-fst.la compact64_unweighted_acceptor-fst.la compact64_weighted_string-fst.la
lib_LTLIBRARIES = libfstcompact.la
libfstcompact_la_SOURCES = compact8_acceptor-fst.cc compact8_string-fst.cc compact8_unweighted-fst.cc compact8_unweighted_acceptor-fst.cc compact8_weighted_string-fst.cc compact16_acceptor-fst.cc compact16_string-fst.cc compact16_unweighted-fst.cc compact16_unweighted_acceptor-fst.cc compact16_weighted_string-fst.cc compact64_acceptor-fst.cc compact64_string-fst.cc compact64_unweighted-fst.cc compact64_unweighted_acceptor-fst.cc compact64_weighted_string-fst.cc
-libfstcompact_la_LDFLAGS = -version-info 11:0:0
+libfstcompact_la_LDFLAGS = -version-info 13:0:0
libfstcompact_la_LIBADD = ../../lib/libfst.la -lm $(DL_LIBS)
compact8_acceptor_fst_la_SOURCES = compact8_acceptor-fst.cc
compact8_acceptor_fst_la_LDFLAGS = -module
if HAVE_SCRIPT
libfstcompressscript_la_SOURCES = compress-script.cc
-libfstcompressscript_la_LDFLAGS = -version-info 11:0:0
+libfstcompressscript_la_LDFLAGS = -version-info 13:0:0
libfstcompressscript_la_LIBADD = \
../../script/libfstscript.la \
../../lib/libfst.la -lz -lm $(DL_LIBS)
@HAVE_BIN_TRUE@fstcompress_SOURCES = fstcompress.cc
@HAVE_BIN_TRUE@fstrandmod_SOURCES = fstrandmod.cc
@HAVE_SCRIPT_TRUE@libfstcompressscript_la_SOURCES = compress-script.cc
-@HAVE_SCRIPT_TRUE@libfstcompressscript_la_LDFLAGS = -version-info 11:0:0
+@HAVE_SCRIPT_TRUE@libfstcompressscript_la_LDFLAGS = -version-info 13:0:0
@HAVE_SCRIPT_TRUE@libfstcompressscript_la_LIBADD = \
@HAVE_SCRIPT_TRUE@ ../../script/libfstscript.la \
@HAVE_SCRIPT_TRUE@ ../../lib/libfst.la -lz -lm $(DL_LIBS)
lib_LTLIBRARIES = libfstconst.la
libfstconst_la_SOURCES = const8-fst.cc const16-fst.cc const64-fst.cc
-libfstconst_la_LDFLAGS = -version-info 11:0:0 -lm $(DL_LIBS)
+libfstconst_la_LDFLAGS = -version-info 13:0:0 -lm $(DL_LIBS)
libfstconst_la_LIBADD = ../../lib/libfst.la -lm $(DL_LIBS)
const8_fst_la_SOURCES = const8-fst.cc
libfst_LTLIBRARIES = const8-fst.la const16-fst.la const64-fst.la
lib_LTLIBRARIES = libfstconst.la
libfstconst_la_SOURCES = const8-fst.cc const16-fst.cc const64-fst.cc
-libfstconst_la_LDFLAGS = -version-info 11:0:0 -lm $(DL_LIBS)
+libfstconst_la_LDFLAGS = -version-info 13:0:0 -lm $(DL_LIBS)
libfstconst_la_LIBADD = ../../lib/libfst.la -lm $(DL_LIBS)
const8_fst_la_SOURCES = const8-fst.cc
const8_fst_la_LDFLAGS = -module
endif
libfstfar_la_SOURCES = sttable.cc stlist.cc
-libfstfar_la_LDFLAGS = -version-info 11:0:0
+libfstfar_la_LDFLAGS = -version-info 13:0:0
libfstfar_la_LIBADD = ../../lib/libfst.la -lm $(DL_LIBS)
if HAVE_SCRIPT
libfstfarscript_la_SOURCES = far-class.cc farscript.cc getters.cc script-impl.cc \
strings.cc
-libfstfarscript_la_LDFLAGS = -version-info 11:0:0
+libfstfarscript_la_LDFLAGS = -version-info 13:0:0
libfstfarscript_la_LIBADD = \
libfstfar.la ../../script/libfstscript.la \
../../lib/libfst.la -lm $(DL_LIBS)
@HAVE_SCRIPT_FALSE@lib_LTLIBRARIES = libfstfar.la
@HAVE_SCRIPT_TRUE@lib_LTLIBRARIES = libfstfar.la libfstfarscript.la
libfstfar_la_SOURCES = sttable.cc stlist.cc
-libfstfar_la_LDFLAGS = -version-info 11:0:0
+libfstfar_la_LDFLAGS = -version-info 13:0:0
libfstfar_la_LIBADD = ../../lib/libfst.la -lm $(DL_LIBS)
@HAVE_SCRIPT_TRUE@libfstfarscript_la_SOURCES = far-class.cc farscript.cc getters.cc script-impl.cc \
@HAVE_SCRIPT_TRUE@ strings.cc
-@HAVE_SCRIPT_TRUE@libfstfarscript_la_LDFLAGS = -version-info 11:0:0
+@HAVE_SCRIPT_TRUE@libfstfarscript_la_LDFLAGS = -version-info 13:0:0
@HAVE_SCRIPT_TRUE@libfstfarscript_la_LIBADD = \
@HAVE_SCRIPT_TRUE@ libfstfar.la ../../script/libfstscript.la \
@HAVE_SCRIPT_TRUE@ ../../lib/libfst.la -lm $(DL_LIBS)
LOG(ERROR) << "FarReaderClass::Open: No files specified";
return nullptr;
}
- auto it = filenames.cbegin();
- const auto arc_type = LoadArcTypeFromFar(*it);
+ const auto arc_type = LoadArcTypeFromFar(filenames.front());
if (arc_type.empty()) return nullptr;
- // FIXME(kbg): Is any of this really necessary? I am doing this purely
- // to conform to what I did with fst::script::Replace.
- ++it;
- for (; it != filenames.cend(); ++it) {
- const string other_arc_type = LoadArcTypeFromFar(*it);
- if (other_arc_type.empty()) return nullptr;
- if (arc_type != other_arc_type) {
- LOG(ERROR) << "FarReaderClass::Open: Trying to open FARs with "
- << "non-matching arc types:\n\t" << arc_type << " and "
- << other_arc_type;
- return nullptr;
- }
- }
OpenFarReaderClassArgs2 args(filenames);
args.retval = nullptr;
Apply<Operation<OpenFarReaderClassArgs2>>("OpenFarReaderClass", arc_type,
if HAVE_SCRIPT
libfstlinearscript_la_SOURCES = linearscript.cc
-libfstlinearscript_la_LDFLAGS = -version-info 11:0:0 -lm $(DL_LIBS)
+libfstlinearscript_la_LDFLAGS = -version-info 13:0:0 -lm $(DL_LIBS)
libfstlinearscript_la_LIBADD = ../../script/libfstscript.la \
../../lib/libfst.la -lm $(DL_LIBS)
endif
@HAVE_BIN_TRUE@fstlinear_SOURCES = fstlinear.cc
@HAVE_BIN_TRUE@fstloglinearapply_SOURCES = fstloglinearapply.cc
@HAVE_SCRIPT_TRUE@libfstlinearscript_la_SOURCES = linearscript.cc
-@HAVE_SCRIPT_TRUE@libfstlinearscript_la_LDFLAGS = -version-info 11:0:0 -lm $(DL_LIBS)
+@HAVE_SCRIPT_TRUE@libfstlinearscript_la_LDFLAGS = -version-info 13:0:0 -lm $(DL_LIBS)
@HAVE_SCRIPT_TRUE@libfstlinearscript_la_LIBADD = ../../script/libfstscript.la \
@HAVE_SCRIPT_TRUE@ ../../lib/libfst.la -lm $(DL_LIBS)
libfstlookahead_la_SOURCES = arc_lookahead-fst.cc ilabel_lookahead-fst.cc \
olabel_lookahead-fst.cc
-libfstlookahead_la_LDFLAGS = -version-info 11:0:0
+libfstlookahead_la_LDFLAGS = -version-info 13:0:0
libfstlookahead_la_LIBADD = ../../lib/libfst.la -lm $(DL_LIBS)
arc_lookahead_fst_la_SOURCES = arc_lookahead-fst.cc
libfstlookahead_la_SOURCES = arc_lookahead-fst.cc ilabel_lookahead-fst.cc \
olabel_lookahead-fst.cc
-libfstlookahead_la_LDFLAGS = -version-info 11:0:0
+libfstlookahead_la_LDFLAGS = -version-info 13:0:0
libfstlookahead_la_LIBADD = ../../lib/libfst.la -lm $(DL_LIBS)
arc_lookahead_fst_la_SOURCES = arc_lookahead-fst.cc
arc_lookahead_fst_la_LDFLAGS = -module
if HAVE_SCRIPT
lib_LTLIBRARIES = libfstmpdtscript.la
libfstmpdtscript_la_SOURCES = mpdtscript.cc
-libfstmpdtscript_la_LDFLAGS = -version-info 11:0:0
+libfstmpdtscript_la_LDFLAGS = -version-info 13:0:0
libfstmpdtscript_la_LIBADD = ../../script/libfstscript.la \
../../lib/libfst.la -lm $(DL_LIBS)
endif
@HAVE_BIN_TRUE@mpdtreverse_SOURCES = mpdtreverse.cc
@HAVE_SCRIPT_TRUE@lib_LTLIBRARIES = libfstmpdtscript.la
@HAVE_SCRIPT_TRUE@libfstmpdtscript_la_SOURCES = mpdtscript.cc
-@HAVE_SCRIPT_TRUE@libfstmpdtscript_la_LDFLAGS = -version-info 11:0:0
+@HAVE_SCRIPT_TRUE@libfstmpdtscript_la_LDFLAGS = -version-info 13:0:0
@HAVE_SCRIPT_TRUE@libfstmpdtscript_la_LIBADD = ../../script/libfstscript.la \
@HAVE_SCRIPT_TRUE@ ../../lib/libfst.la -lm $(DL_LIBS)
ngram_fst_la_LDFLAGS = -module
libfstngram_la_SOURCES = bitmap-index.cc ngram-fst.cc nthbit.cc
-libfstngram_la_LDFLAGS = -version-info 11:0:0
+libfstngram_la_LDFLAGS = -version-info 13:0:0
libfstngram_la_LIBADD = ../../lib/libfst.la -lm $(DL_LIBS)
ngram_fst_la_SOURCES = bitmap-index.cc ngram-fst.cc nthbit.cc
ngram_fst_la_LDFLAGS = -module
libfstngram_la_SOURCES = bitmap-index.cc ngram-fst.cc nthbit.cc
-libfstngram_la_LDFLAGS = -version-info 11:0:0
+libfstngram_la_LDFLAGS = -version-info 13:0:0
libfstngram_la_LIBADD = ../../lib/libfst.la -lm $(DL_LIBS)
all: all-am
if HAVE_SCRIPT
lib_LTLIBRARIES = libfstpdtscript.la
libfstpdtscript_la_SOURCES = getters.cc pdtscript.cc
-libfstpdtscript_la_LDFLAGS = -version-info 11:0:0
+libfstpdtscript_la_LDFLAGS = -version-info 13:0:0
libfstpdtscript_la_LIBADD = ../../script/libfstscript.la \
../../lib/libfst.la -lm $(DL_LIBS)
endif
@HAVE_BIN_TRUE@pdtshortestpath_SOURCES = pdtshortestpath.cc
@HAVE_SCRIPT_TRUE@lib_LTLIBRARIES = libfstpdtscript.la
@HAVE_SCRIPT_TRUE@libfstpdtscript_la_SOURCES = getters.cc pdtscript.cc
-@HAVE_SCRIPT_TRUE@libfstpdtscript_la_LDFLAGS = -version-info 11:0:0
+@HAVE_SCRIPT_TRUE@libfstpdtscript_la_LDFLAGS = -version-info 13:0:0
@HAVE_SCRIPT_TRUE@libfstpdtscript_la_LIBADD = ../../script/libfstscript.la \
@HAVE_SCRIPT_TRUE@ ../../lib/libfst.la -lm $(DL_LIBS)
self._reader.get().Reset()
def __getitem__(self, key):
- cdef string ckey = tostring(key)
- if self.get_key() == ckey or self._reader.get().Find(ckey):
+ if self._reader.get().Find(tostring(key)):
return self.get_fst()
else:
raise KeyError(key)
lib_LTLIBRARIES = libfstspecial.la
libfstspecial_la_SOURCES = phi-fst.cc rho-fst.cc sigma-fst.cc
-libfstspecial_la_LDFLAGS = -version-info 11:0:0 -lm $(DL_LIBS)
+libfstspecial_la_LDFLAGS = -version-info 13:0:0 -lm $(DL_LIBS)
libfstspecial_la_LIBADD = ../../lib/libfst.la -lm $(DL_LIBS)
phi_fst_la_SOURCES = phi-fst.cc
libfst_LTLIBRARIES = phi-fst.la rho-fst.la sigma-fst.la
lib_LTLIBRARIES = libfstspecial.la
libfstspecial_la_SOURCES = phi-fst.cc rho-fst.cc sigma-fst.cc
-libfstspecial_la_LDFLAGS = -version-info 11:0:0 -lm $(DL_LIBS)
+libfstspecial_la_LDFLAGS = -version-info 13:0:0 -lm $(DL_LIBS)
libfstspecial_la_LIBADD = ../../lib/libfst.la -lm $(DL_LIBS)
phi_fst_la_SOURCES = phi-fst.cc
phi_fst_la_LDFLAGS = -module
if (keys_[i].empty()) {
if (!has_stdin_) {
streams_[i] = &std::cin;
- // sources_[i] = "stdin";
has_stdin_ = true;
} else {
FSTERROR() << "FstFarReader::FstFarReader: standard input should "
private:
// This is non-static because the constructor for non-idempotent weights will
- // result in a an error.
+ // result in an error.
const NaturalLess<Weight> less_{};
};
}
}
-// An A* estimate is a function object that maps from a state ID to a an
+// An A* estimate is a function object that maps from a state ID to an
// estimate of the shortest distance to the final states.
// A trivial A* estimate, yielding a queue which behaves the same in Dijkstra's
private:
// This is non-static because the constructor for non-idempotent weights will
- // result in a an error.
+ // result in an error.
const NaturalLess<Weight> less_{};
};
using Label = typename Arc::Label;
const auto props = fst->Properties(kFstProperties, false);
// Constructs label-to-label maps.
- std::unordered_map<Label, Label> input_map;
- for (auto &ipair : ipairs) input_map[ipair.first] = ipair.second;
- std::unordered_map<Label, Label> output_map;
- for (auto &opair : opairs) output_map[opair.first] = opair.second;
+ const std::unordered_map<Label, Label>
+ input_map(ipairs.begin(), ipairs.end());
+ const std::unordered_map<Label, Label>
+ output_map(opairs.begin(), opairs.end());
for (StateIterator<MutableFst<Arc>> siter(*fst); !siter.Done();
siter.Next()) {
for (MutableArcIterator<MutableFst<Arc>> aiter(fst, siter.Value());
const RelabelFstOptions &opts)
: CacheImpl<Arc>(opts),
fst_(fst.Copy()),
- relabel_input_(false),
- relabel_output_(false) {
+ input_map_(ipairs.begin(), ipairs.end()),
+ output_map_(opairs.begin(), opairs.end()),
+ relabel_input_(!ipairs.empty()),
+ relabel_output_(!opairs.empty()) {
SetProperties(RelabelProperties(fst.Properties(kCopyProperties, false)));
SetType("relabel");
- // Creates input label map.
- if (!ipairs.empty()) {
- for (auto &ipair : ipairs) input_map_[ipair.first] = ipair.second;
- relabel_input_ = true;
- }
- // Creates output label map.
- if (!opairs.empty()) {
- for (auto &opair : opairs) output_map_[opair.first] = opair.second;
- relabel_output_ = true;
- }
}
- RelabelFstImpl(const Fst<Arc> &fst, const SymbolTable *old_isymbols,
+ RelabelFstImpl(const Fst<Arc> &fst,
+ const SymbolTable *old_isymbols,
const SymbolTable *new_isymbols,
const SymbolTable *old_osymbols,
- const SymbolTable *new_osymbols, const RelabelFstOptions &opts)
+ const SymbolTable *new_osymbols,
+ const RelabelFstOptions &opts)
: CacheImpl<Arc>(opts),
fst_(fst.Copy()),
relabel_input_(false),
ShortestDistanceOptions(Queue *state_queue, ArcFilter arc_filter,
StateId source = kNoStateId,
- float delta = kShortestDelta)
+ float delta = kShortestDelta,
+ bool first_path = false)
: state_queue(state_queue),
arc_filter(arc_filter),
source(source),
delta(delta),
- first_path(false) {}
+ first_path(first_path) {}
};
namespace internal {
//
// This computes the shortest distance from the opts.source state to each
// visited state S and stores the value in the distance vector. An
-// nvisited state S has distance Zero(), which will be stored in the
+// unvisited state S has distance Zero(), which will be stored in the
// distance vector if S is less than the maximum visited state. The state
// queue discipline, arc filter, and convergence delta are taken in the
// options argument. The distance vector will contain a unique element for
#ifndef FST_SYMBOL_TABLE_H_
#define FST_SYMBOL_TABLE_H_
-#include <cstring>
#include <functional>
#include <ios>
#include <iostream>
namespace internal {
-// List of symbols with a dense hash for looking up symbol index.
-// Hash uses linear probe, rehashes at 0.75% occupancy, avg 6 bytes overhead
-// per entry. Rehash in place from symbol list.
-//
-// Symbols are stored as c strings to avoid adding memory overhead, but the
-// performance penalty for this is high because rehash must call strlen on
-// every symbol. AddSymbol can be another 2x faster if symbol lengths were
-// stored.
+// List of symbols with a dense hash for looking up symbol index, rehashing at
+// 75% occupancy.
class DenseSymbolMap {
public:
DenseSymbolMap();
DenseSymbolMap(const DenseSymbolMap &x);
- ~DenseSymbolMap();
-
std::pair<int64, bool> InsertOrFind(const string &key);
int64 Find(const string &key) const;
size_t Size() const { return symbols_.size(); }
- string GetSymbol(size_t idx) const {
- return string(symbols_[idx], strlen(symbols_[idx]));
- }
+ const string &GetSymbol(size_t idx) const { return symbols_[idx]; }
void RemoveSymbol(size_t idx);
// num_buckets must be power of 2.
void Rehash(size_t num_buckets);
- const char* NewSymbol(const string &sym);
-
int64 empty_;
- std::vector<const char *> symbols_;
+ std::vector<string> symbols_;
std::hash<string> str_hash_;
std::vector<int64> buckets_;
uint64 hash_mask_;
return Read(strm, filename);
}
- //--------------------------------------------------------
- // Derivable Interface (final)
- //--------------------------------------------------------
+ // DERIVABLE INTERFACE
+
// Creates a reference counted copy.
virtual SymbolTable *Copy() const { return new SymbolTable(*this); }
// value. Adding symbol tables do not result in changes in the base table.
virtual void AddTable(const SymbolTable &table);
- virtual void RemoveSymbol(int64 key) {
- MutateCheck();
- return impl_->RemoveSymbol(key);
+ // Returns the current available key (i.e., highest key + 1) in the symbol
+ // table.
+ virtual int64 AvailableKey() const { return impl_->AvailableKey(); }
+
+ // Return the label-agnostic MD5 check-sum for this table. All new symbols
+ // added to the table will result in an updated checksum. Deprecated.
+ virtual const string &CheckSum() const { return impl_->CheckSum(); }
+
+ virtual int64 GetNthKey(ssize_t pos) const { return impl_->GetNthKey(pos); }
+
+ // Returns the string associated with the key; if the key is out of
+ // range (<0, >max), returns an empty string.
+ virtual string Find(int64 key) const { return impl_->Find(key); }
+
+ // Returns the key associated with the symbol; if the symbol does not exist,
+ // kNoSymbol is returned.
+ virtual int64 Find(const string &symbol) const { return impl_->Find(symbol); }
+
+ // Same as CheckSum(), but returns an label-dependent version.
+ virtual const string &LabeledCheckSum() const {
+ return impl_->LabeledCheckSum();
+ }
+
+ virtual bool Member(int64 key) const { return impl_->Member(key); }
+
+ virtual bool Member(const string &symbol) const {
+ return impl_->Member(symbol);
}
// Returns the name of the symbol table.
virtual const string &Name() const { return impl_->Name(); }
+ // Returns the current number of symbols in table (not necessarily equal to
+ // AvailableKey()).
+ virtual size_t NumSymbols() const { return impl_->NumSymbols(); }
+
+ virtual void RemoveSymbol(int64 key) {
+ MutateCheck();
+ return impl_->RemoveSymbol(key);
+ }
+
// Sets the name of the symbol table.
virtual void SetName(const string &new_name) {
MutateCheck();
impl_->SetName(new_name);
}
- // Return the label-agnostic MD5 check-sum for this table. All new symbols
- // added to the table will result in an updated checksum. Deprecated.
- virtual const string &CheckSum() const { return impl_->CheckSum(); }
-
- // Same as CheckSum(), but returns an label-dependent version.
- virtual const string &LabeledCheckSum() const {
- return impl_->LabeledCheckSum();
- }
-
virtual bool Write(std::ostream &strm) const { return impl_->Write(strm); }
- bool Write(const string &filename) const {
+ virtual bool Write(const string &filename) const {
std::ofstream strm(filename,
std::ios_base::out | std::ios_base::binary);
if (!strm.good()) {
const SymbolTableTextOptions &opts = SymbolTableTextOptions()) const;
// Dump an text representation of the symbol table.
- bool WriteText(const string &filename) const {
+ virtual bool WriteText(const string &filename) const {
std::ofstream strm(filename);
if (!strm.good()) {
LOG(ERROR) << "SymbolTable::WriteText: Can't open file " << filename;
return WriteText(strm);
}
- // Returns the string associated with the key; if the key is out of
- // range (<0, >max), returns an empty string.
- virtual string Find(int64 key) const { return impl_->Find(key); }
-
- // Returns the key associated with the symbol; if the symbol does not exist,
- // kNoSymbol is returned.
- virtual int64 Find(const string &symbol) const { return impl_->Find(symbol); }
-
- // Returns the key associated with the symbol; if the symbol does not exist,
- // kNoSymbol is returned.
- virtual int64 Find(const char *symbol) const { return impl_->Find(symbol); }
-
- virtual bool Member(int64 key) const { return impl_->Member(key); }
-
- virtual bool Member(const string &symbol) const {
- return impl_->Member(symbol);
- }
-
- // Returns the current available key (i.e., highest key + 1) in the symbol
- // table.
- virtual int64 AvailableKey() const { return impl_->AvailableKey(); }
-
- // Returns the current number of symbols in table (not necessarily equal to
- // AvailableKey()).
- virtual size_t NumSymbols() const { return impl_->NumSymbols(); }
-
- virtual int64 GetNthKey(ssize_t pos) const { return impl_->GetNthKey(pos); }
-
private:
explicit SymbolTable(internal::SymbolTableImpl *impl) : impl_(impl) {}
template <class Label>
SymbolTable *RelabelSymbolTable(const SymbolTable *table,
const std::vector<std::pair<Label, Label>> &pairs) {
- auto new_table = new SymbolTable(table->Name().empty() ?
- string() : (string("relabeled_") + table->Name()));
+ auto *new_table = new SymbolTable(
+ table->Name().empty() ? string()
+ : (string("relabeled_") + table->Name()));
for (const auto &pair : pairs) {
new_table->AddSymbol(table->Find(pair.first), pair.second);
}
libfst_la_SOURCES = compat.cc flags.cc fst.cc fst-types.cc mapped-file.cc \
properties.cc symbol-table.cc symbol-table-ops.cc \
weight.cc util.cc
-libfst_la_LDFLAGS = -version-info 11:0:0
+libfst_la_LDFLAGS = -version-info 13:0:0
libfst_la_LIBADD = $(DL_LIBS)
properties.cc symbol-table.cc symbol-table-ops.cc \
weight.cc util.cc
-libfst_la_LDFLAGS = -version-info 11:0:0
+libfst_la_LDFLAGS = -version-info 13:0:0
libfst_la_LIBADD = $(DL_LIBS)
all: all-am
namespace fst {
+SymbolTableTextOptions::SymbolTableTextOptions(bool allow_negative_labels)
+ : allow_negative_labels(allow_negative_labels),
+ fst_field_separator(FLAGS_fst_field_separator) {}
+
+namespace internal {
+
// Maximum line length in textual symbols file.
static constexpr int kLineLen = 8096;
// Identifies stream data as a symbol table (and its endianity).
static constexpr int32 kSymbolTableMagicNumber = 2125658996;
-SymbolTableTextOptions::SymbolTableTextOptions(bool allow_negative_labels)
- : allow_negative_labels(allow_negative_labels),
- fst_field_separator(FLAGS_fst_field_separator) {}
+DenseSymbolMap::DenseSymbolMap()
+ : empty_(-1), buckets_(1 << 4), hash_mask_(buckets_.size() - 1) {
+ std::uninitialized_fill(buckets_.begin(), buckets_.end(), empty_);
+}
-namespace internal {
+DenseSymbolMap::DenseSymbolMap(const DenseSymbolMap &other)
+ : empty_(-1),
+ symbols_(other.symbols_),
+ buckets_(other.buckets_),
+ hash_mask_(other.hash_mask_) {}
+
+std::pair<int64, bool> DenseSymbolMap::InsertOrFind(const string &key) {
+ static constexpr float kMaxOccupancyRatio = 0.75; // Grows when 75% full.
+ if (Size() >= kMaxOccupancyRatio * buckets_.size()) {
+ Rehash(buckets_.size() * 2);
+ }
+ size_t idx = str_hash_(key) & hash_mask_;
+ while (buckets_[idx] != empty_) {
+ const auto stored_value = buckets_[idx];
+ if (symbols_[stored_value] == key) return {stored_value, false};
+ idx = (idx + 1) & hash_mask_;
+ }
+ const auto next = Size();
+ buckets_[idx] = next;
+ symbols_.emplace_back(key);
+ return {next, true};
+}
+
+int64 DenseSymbolMap::Find(const string &key) const {
+ size_t idx = str_hash_(key) & hash_mask_;
+ while (buckets_[idx] != empty_) {
+ const auto stored_value = buckets_[idx];
+ if (symbols_[stored_value] == key) return stored_value;
+ idx = (idx + 1) & hash_mask_;
+ }
+ return buckets_[idx];
+}
+
+void DenseSymbolMap::Rehash(size_t num_buckets) {
+ buckets_.resize(num_buckets);
+ hash_mask_ = buckets_.size() - 1;
+ std::uninitialized_fill(buckets_.begin(), buckets_.end(), empty_);
+ for (size_t i = 0; i < Size(); ++i) {
+ size_t idx = str_hash_(string(symbols_[i])) & hash_mask_;
+ while (buckets_[idx] != empty_) {
+ idx = (idx + 1) & hash_mask_;
+ }
+ buckets_[idx] = i;
+ }
+}
+
+void DenseSymbolMap::RemoveSymbol(size_t idx) {
+ symbols_.erase(symbols_.begin() + idx);
+ Rehash(buckets_.size());
+}
SymbolTableImpl *SymbolTableImpl::ReadText(std::istream &strm,
const string &filename,
int64 SymbolTableImpl::AddSymbol(const string &symbol, int64 key) {
if (key == kNoSymbol) return key;
- const std::pair<int64, bool> &insert_key = symbols_.InsertOrFind(symbol);
+ const auto insert_key = symbols_.InsertOrFind(symbol);
if (!insert_key.second) {
- auto key_already = GetNthKey(insert_key.first);
+ const auto key_already = GetNthKey(insert_key.first);
if (key_already == key) return key;
VLOG(1) << "SymbolTable::AddSymbol: symbol = " << symbol
<< " already in symbol_map_ with key = " << key_already
return true;
}
-namespace internal {
-
-DenseSymbolMap::DenseSymbolMap()
- : empty_(-1), buckets_(1 << 4), hash_mask_(buckets_.size() - 1) {
- std::uninitialized_fill(buckets_.begin(), buckets_.end(), empty_);
-}
-
-DenseSymbolMap::DenseSymbolMap(const DenseSymbolMap &x)
- : empty_(-1),
- symbols_(x.Size()),
- buckets_(x.buckets_),
- hash_mask_(x.hash_mask_) {
- for (size_t i = 0; i < Size(); ++i) {
- const auto sz = strlen(x.symbols_[i]) + 1;
- auto *cpy = new char[sz];
- memcpy(cpy, x.symbols_[i], sz);
- symbols_[i] = cpy;
- }
-}
-
-DenseSymbolMap::~DenseSymbolMap() {
- for (size_t i = 0; i < Size(); ++i) {
- delete[] symbols_[i];
- }
-}
-
-std::pair<int64, bool> DenseSymbolMap::InsertOrFind(const string &key) {
- static constexpr float kMaxOccupancyRatio = 0.75; // Grows when 75% full.
- if (Size() >= kMaxOccupancyRatio * buckets_.size()) {
- Rehash(buckets_.size() * 2);
- }
- size_t idx = str_hash_(key) & hash_mask_;
- while (buckets_[idx] != empty_) {
- const auto stored_value = buckets_[idx];
- if (!strcmp(symbols_[stored_value], key.c_str())) {
- return {stored_value, false};
- }
- idx = (idx + 1) & hash_mask_;
- }
- const auto next = Size();
- buckets_[idx] = next;
- symbols_.push_back(NewSymbol(key));
- return {next, true};
-}
-
-int64 DenseSymbolMap::Find(const string &key) const {
- size_t idx = str_hash_(key) & hash_mask_;
- while (buckets_[idx] != empty_) {
- const auto stored_value = buckets_[idx];
- if (!strcmp(symbols_[stored_value], key.c_str())) {
- return stored_value;
- }
- idx = (idx + 1) & hash_mask_;
- }
- return buckets_[idx];
-}
-
-void DenseSymbolMap::Rehash(size_t num_buckets) {
- buckets_.resize(num_buckets);
- hash_mask_ = buckets_.size() - 1;
- std::uninitialized_fill(buckets_.begin(), buckets_.end(), empty_);
- for (size_t i = 0; i < Size(); ++i) {
- size_t idx = str_hash_(string(symbols_[i])) & hash_mask_;
- while (buckets_[idx] != empty_) {
- idx = (idx + 1) & hash_mask_;
- }
- buckets_[idx] = i;
- }
-}
-
-const char *DenseSymbolMap::NewSymbol(const string &sym) {
- auto num = sym.size() + 1;
- auto newstr = new char[num];
- memcpy(newstr, sym.c_str(), num);
- return newstr;
-}
-
-void DenseSymbolMap::RemoveSymbol(size_t idx) {
- delete[] symbols_[idx];
- symbols_.erase(symbols_.begin() + idx);
- Rehash(buckets_.size());
-}
-
-} // namespace internal
-
bool CompatSymbols(const SymbolTable *syms1, const SymbolTable *syms2,
bool warning) {
// Flag can explicitly override this check.
text-io.cc topsort.cc union.cc weight-class.cc verify.cc
libfstscript_la_LIBADD = ../lib/libfst.la -lm $(DL_LIBS)
-libfstscript_la_LDFLAGS = -version-info 11:0:0
+libfstscript_la_LDFLAGS = -version-info 13:0:0
endif
@HAVE_SCRIPT_TRUE@text-io.cc topsort.cc union.cc weight-class.cc verify.cc
@HAVE_SCRIPT_TRUE@libfstscript_la_LIBADD = ../lib/libfst.la -lm $(DL_LIBS)
-@HAVE_SCRIPT_TRUE@libfstscript_la_LDFLAGS = -version-info 11:0:0
+@HAVE_SCRIPT_TRUE@libfstscript_la_LDFLAGS = -version-info 13:0:0
all: all-am
.SUFFIXES: