3 * Copyright (c) 2009 Kov Chai <tchaikov@gmail.com>
5 * The contents of this file are subject to the terms of either the GNU Lesser
6 * General Public License Version 2.1 only ("LGPL") or the Common Development and
7 * Distribution License ("CDDL")(collectively, the "License"). You may not use this
8 * file except in compliance with the License. You can obtain a copy of the CDDL at
9 * http://www.opensource.org/licenses/cddl1.php and a copy of the LGPLv2.1 at
10 * http://www.opensource.org/licenses/lgpl-license.php. See the License for the
11 * specific language governing permissions and limitations under the License. When
12 * distributing the software, include this License Header Notice in each file and
13 * include the full text of the License in the License file as well as the
16 * NOTICE PURSUANT TO SECTION 9 OF THE COMMON DEVELOPMENT AND DISTRIBUTION LICENSE
18 * For Covered Software in this distribution, this License shall be governed by the
19 * laws of the State of California (excluding conflict-of-law provisions).
20 * Any litigation relating to this License shall be subject to the jurisdiction of
21 * the Federal Courts of the Northern District of California and the state courts
22 * of the State of California, with venue lying in Santa Clara County, California.
26 * If you wish your version of this file to be governed by only the CDDL or only
27 * the LGPL Version 2.1, indicate your decision by adding "[Contributor]" elects to
28 * include this software in this distribution under the [CDDL or LGPL Version 2.1]
29 * license." If you don't indicate a single choice of license, a recipient has the
30 * option to distribute your version of this file under either the CDDL or the LGPL
31 * Version 2.1, or to extend the choice of license to its licensees as provided
32 * above. However, if you add LGPL Version 2.1 code and therefore, elected the LGPL
33 * Version 2 license, then the option applies only if the new code is made subject
34 * to such option by the copyright holder.
47 /* the ARPA style representation of sunpinyin's SLM */
51 TSIMWordId hw[N_GRAM];
57 void load(istream&, const TLexicon&);
58 int load_words(char* buf, const TLexicon& lexicon);
59 TLeaf() : wid(0), pr(.0), ch(0), bon(0), bol(0) {}
62 struct TNode : public TLeaf {
64 void load(istream&, const TLexicon&);
65 void load_level0(istream&);
68 typedef std::vector<TNode> TNodeLevel;
69 typedef std::vector<TLeaf> TLeafLevel;
72 TNodeLevel m_levels[N_GRAM + 1]; /* [0..N_GRAM] */
73 TLeafLevel m_lastLevel;
74 const bool m_usingLogPr;
78 /* XXX, ARPA file does not provide these information.
79 so we assume this SLM is trigram, and does not use LogPr */
80 CArpaSlm() : m_usingLogPr(false), m_N(N_GRAM) {}
81 bool good() const { return m_levels[0].size() != 0; }
82 unsigned getN() const { return m_N; }
83 bool usingLogPr() const { return m_usingLogPr; }
84 const TNodeLevel& getLevel(unsigned lvl) const { return m_levels[lvl]; }
85 const TLeafLevel& getLastLevel() const { return m_lastLevel; }
86 unsigned getLevelSize(unsigned lvl) const {
89 return m_levels[lvl].size();
91 return m_lastLevel.size();
95 * initialize the `ch' and `wid' fields of each node in levels
98 void load(const char* filename, const TLexicon& lexicon);
102 * find out the first child of a given node in its next level
103 * @param lvl the level where node belongs to
104 * @param node the node
105 * @param last_child the child index of previous node
106 * @return the index of the found child
108 unsigned find_1st_child(unsigned lvl, const TNode& node, int last_child);