2 * Copyright (c) 2009 Kov Chai <tchaikov@gmail.com>
4 * The contents of this file are subject to the terms of either the GNU Lesser
5 * General Public License Version 2.1 only ("LGPL") or the Common Development and
6 * Distribution License ("CDDL")(collectively, the "License"). You may not use this
7 * file except in compliance with the License. You can obtain a copy of the CDDL at
8 * http://www.opensource.org/licenses/cddl1.php and a copy of the LGPLv2.1 at
9 * http://www.opensource.org/licenses/lgpl-license.php. See the License for the
10 * specific language governing permissions and limitations under the License. When
11 * distributing the software, include this License Header Notice in each file and
12 * include the full text of the License in the License file as well as the
15 * NOTICE PURSUANT TO SECTION 9 OF THE COMMON DEVELOPMENT AND DISTRIBUTION LICENSE
17 * For Covered Software in this distribution, this License shall be governed by the
18 * laws of the State of California (excluding conflict-of-law provisions).
19 * Any litigation relating to this License shall be subject to the jurisdiction of
20 * the Federal Courts of the Northern District of California and the state courts
21 * of the State of California, with venue lying in Santa Clara County, California.
25 * If you wish your version of this file to be governed by only the CDDL or only
26 * the LGPL Version 2.1, indicate your decision by adding "[Contributor]" elects to
27 * include this software in this distribution under the [CDDL or LGPL Version 2.1]
28 * license." If you don't indicate a single choice of license, a recipient has the
29 * option to distribute your version of this file under either the CDDL or the LGPL
30 * Version 2.1, or to extend the choice of license to its licensees as provided
31 * above. However, if you add LGPL Version 2.1 code and therefore, elected the LGPL
32 * Version 2 license, then the option applies only if the new code is made subject
33 * to such option by the copyright holder.
38 #include "arpa_conv.h"
42 // convert CArpaSlm::TLeaf to CThreadSlm::TLeaf
46 const bool usingLogPr;
47 CompressedTable& m_pr_table;
48 RealIndexMap& m_pr_map;
51 CArpaLeafConv(bool usingLogPr_,
53 CompressedTable* pr_table) :
54 usingLogPr(usingLogPr_),
55 m_pr_table(*pr_table),
61 operator()(const CArpaSlm::TLeaf& leaf)
63 CThreadSlm::TLeaf tleaf;
64 tleaf.set_wid(leaf.wid);
65 tleaf.set_bon(leaf.bon);
66 tleaf.set_bol(leaf.bol);
67 unsigned pr_idx = get_pr_index(leaf.pr);
73 // lookup the Real/Effective value in the RealIndexMap for its index
74 // in the CompressedTable
77 get_pr_index(float pr)
79 std::map<float, int>::iterator prit = m_pr_map.find(pr);
80 if (prit == m_pr_map.end()) { // This could be caused by precision error
81 double val = EffectivePr(pr);
82 val = OriginalPr(val);
83 prit = m_pr_map.find(val);
84 assert(prit != m_pr_map.end());
86 int idx_pr = prit->second;
88 (m_pr_table[idx_pr] > 0.0 && m_pr_table[idx_pr] < 1.0));
89 assert(!usingLogPr || m_pr_table[idx_pr] > 0.0);
95 // convert CArpaSlm::TNode to CThreadSlm::TNode
99 const bool usingLogPr;
100 CArpaLeafConv m_leaf_conv;
101 CompressedTable& m_bow_table;
102 RealIndexMap& m_bow_map;
105 CArpaNodeConv(bool usingLogPr_,
106 RealIndexMap* pr_map,
107 CompressedTable* pr_table,
108 RealIndexMap* bow_map,
109 CompressedTable* bow_table) :
110 usingLogPr(usingLogPr_),
111 m_leaf_conv(usingLogPr, pr_map, pr_table),
112 m_bow_table(*bow_table),
118 operator()(const CArpaSlm::TNode& node)
120 CThreadSlm::TNode tnode;
121 tnode.set_wid(node.wid);
122 tnode.set_bon(node.bon);
123 tnode.set_bol(node.bol);
124 tnode.set_ch(node.ch);
125 unsigned pr_idx = m_leaf_conv.get_pr_index(node.pr);
126 tnode.set_pr(pr_idx);
127 unsigned bow_idx = get_bow_index(node.bow);
128 tnode.set_bow(bow_idx);
133 get_bow_index(float bow)
135 FreqMap::iterator bowit = m_bow_map.find(bow);
136 if (bowit == m_bow_map.end()) {
137 double val = EffectiveBow(bow);
138 val = OriginalBow(val);
139 bowit = m_bow_map.find(val);
140 assert(bowit != m_bow_map.end());
142 return bowit->second;
147 compress(const CArpaSlm& slm,
148 CompressedTable& pr_table, RealIndexMap& pr_map,
149 CompressedTable& bow_table, RealIndexMap& bow_map,
150 TNodeLevels& nodeLevels, CThreadSlm::TLeaf*& leafLevel)
152 CArpaLeafConv leaf_conv(slm.usingLogPr(), &pr_map, &pr_table);
153 CArpaNodeConv node_conv(
154 slm.usingLogPr(), &pr_map, &pr_table, &bow_map, &bow_table);
155 const int N = slm.getN();
156 TNodeLevels node_levels(N);
157 for (int lvl = 0; lvl < N; ++lvl) {
158 const CArpaSlm::TNodeLevel& level = slm.getLevel(lvl);
159 unsigned len = level.size();
160 node_levels[lvl] = new CThreadSlm::TNode[len + 1];
161 std::transform(level.begin(), level.end(),
162 node_levels[lvl], node_conv);
163 memset(&node_levels[lvl][len], 0, sizeof(CThreadSlm::TNode));
164 node_levels[lvl][len].set_ch(slm.getLevelSize(lvl + 1));
167 const CArpaSlm::TLeafLevel& level = slm.getLastLevel();
168 unsigned len = level.size();
169 CThreadSlm::TLeaf* leaf_level = new CThreadSlm::TLeaf[len + 1];
170 std::transform(level.begin(), level.end(),
171 leaf_level, leaf_conv);
172 memset(&leaf_level[len], 0, sizeof(CThreadSlm::TLeaf));
173 nodeLevels = node_levels;
174 leafLevel = leaf_level;