2 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER.
4 * Copyright (c) 2007 Sun Microsystems, Inc. All Rights Reserved.
6 * The contents of this file are subject to the terms of either the GNU Lesser
7 * General Public License Version 2.1 only ("LGPL") or the Common Development and
8 * Distribution License ("CDDL")(collectively, the "License"). You may not use this
9 * file except in compliance with the License. You can obtain a copy of the CDDL at
10 * http://www.opensource.org/licenses/cddl1.php and a copy of the LGPLv2.1 at
11 * http://www.opensource.org/licenses/lgpl-license.php. See the License for the
12 * specific language governing permissions and limitations under the License. When
13 * distributing the software, include this License Header Notice in each file and
14 * include the full text of the License in the License file as well as the
17 * NOTICE PURSUANT TO SECTION 9 OF THE COMMON DEVELOPMENT AND DISTRIBUTION LICENSE
19 * For Covered Software in this distribution, this License shall be governed by the
20 * laws of the State of California (excluding conflict-of-law provisions).
21 * Any litigation relating to this License shall be subject to the jurisdiction of
22 * the Federal Courts of the Northern District of California and the state courts
23 * of the State of California, with venue lying in Santa Clara County, California.
27 * If you wish your version of this file to be governed by only the CDDL or only
28 * the LGPL Version 2.1, indicate your decision by adding "[Contributor]" elects to
29 * include this software in this distribution under the [CDDL or LGPL Version 2.1]
30 * license." If you don't indicate a single choice of license, a recipient has the
31 * option to distribute your version of this file under either the CDDL or the LGPL
32 * Version 2.1, or to extend the choice of license to its licensees as provided
33 * above. However, if you add LGPL Version 2.1 code and therefore, elected the LGPL
34 * Version 2 license, then the option applies only if the new code is made subject
35 * to such option by the copyright holder.
38 #include "pinyin_data.h"
39 #include "lattice_states.h"
42 const CPinyinTrie::TWordIdInfo*
43 TLexiconState::getWords(unsigned &num)
47 if (!m_words.empty()) {
52 if (m_bPinyin && m_pPYNode) {
53 num = m_pPYNode->m_nWordId;
54 return m_pPYNode->getWordIdPtr();
61 TLexiconState::print(std::string prefix) const
63 printf("%s", prefix.c_str());
64 printf("from frame[%d] ", m_start);
67 printf("%sdict ", m_pPYNode ? "sys" : "usr");
68 if (!m_syls.empty()) {
70 CSyllables::const_iterator it = m_syls.begin();
71 for (; it != m_syls.end(); ++it)
72 printf("%x:%x:%x ", it->initial, it->final, it->tone);
75 printf("seg_ranges: (");
76 for (std::vector<unsigned>::const_iterator it = m_seg_path.begin();
77 it != m_seg_path.end();
83 printf("%d", m_words.front().m_id);
90 TLatticeState::print(std::string prefix) const
92 printf("%s", prefix.c_str());
94 m_score.toString(valbuf);
95 printf("<State(%d:%d), from word %d, score %s>\n", m_slmState.getLevel(),
96 m_slmState.getIdx(), m_backTraceWordId, valbuf);
99 const unsigned CLatticeStates::beam_width = 48;
100 const TSentenceScore CLatticeStates::filter_ratio_l1 = TSentenceScore(0.12);
101 const TSentenceScore CLatticeStates::filter_ratio_l2 = TSentenceScore(0.02);
102 const TSentenceScore CLatticeStates::filter_threshold_exp =
103 TSentenceScore(-40, -1.0);
106 CTopLatticeStates::push(const TLatticeState& state)
109 if (size() >= m_threshold) {
110 if (m_heap[0] < state) return false;
111 std::pop_heap(m_heap.begin(), m_heap.end());
115 m_heap.push_back(state);
116 std::push_heap(m_heap.begin(), m_heap.end());
121 CTopLatticeStates::pop()
123 std::pop_heap(m_heap.begin(), m_heap.end());
128 CLatticeStates::clear()
136 std::vector<TLatticeState>
137 CLatticeStates::getSortedResult()
139 std::vector<TLatticeState> res;
140 for (CLatticeStates::iterator it = begin(); it != end(); ++it) {
143 std::sort(res.begin(), res.end());
147 std::vector<TLatticeState>
148 CLatticeStates::getFilteredResult()
150 std::vector<TLatticeState> sorted = getSortedResult();
151 std::vector<TLatticeState> res;
152 if (sorted.size() == 0) {
155 res.push_back(sorted[0]);
156 TSentenceScore max_score = sorted[0].m_score;
157 for (size_t i = 1; i < sorted.size(); i++) {
158 TSentenceScore current_score = sorted[i].m_score;
159 if (filter_threshold_exp < current_score
160 && current_score / max_score < filter_ratio_l1) {
163 if (current_score / max_score < filter_ratio_l2) {
166 res.push_back(sorted[i]);
172 CLatticeStates::add(const TLatticeState& state)
174 CSlmState slmState = state.m_slmState;
175 state_map::iterator it = m_stateMap.find(slmState);
176 bool inserted = false;
178 if (it == m_stateMap.end()) {
179 CTopLatticeStates topstates(m_maxBest);
180 inserted = topstates.push(state);
181 m_stateMap.insert(std::make_pair(slmState, topstates));
182 _pushScoreHeap(state.m_score, slmState);
184 inserted = it->second.push(state);
185 slmState = it->second.top().m_slmState;
186 _adjustDown(m_heapIdx[slmState]);
188 if (inserted) m_size++;
190 if (m_size > beam_width) {
191 slmState = m_scoreHeap[0].second;
192 it = m_stateMap.find(slmState);
194 // pop one node from it, and if it's empty, remove it from map and heap
196 if (it->second.size() == 0) {
197 m_stateMap.erase(it);
200 m_scoreHeap[0].first = it->second.top().m_score;
208 CLatticeStates::_pushScoreHeap(TSentenceScore score, CSlmState slmState)
210 m_scoreHeap.push_back(std::make_pair(score, slmState));
211 _adjustUp(m_scoreHeap.size() - 1);
215 CLatticeStates::_popScoreHeap()
217 m_heapIdx.erase(m_scoreHeap[0].second);
218 m_scoreHeap[0] = m_scoreHeap[m_scoreHeap.size() - 1];
219 m_scoreHeap.pop_back();
220 if (m_scoreHeap.size() > 0) {
227 CLatticeStates::_refreshHeapIdx(int heapIdx)
229 m_heapIdx[m_scoreHeap[heapIdx].second] = heapIdx;
233 CLatticeStates::_adjustUp(int node)
235 int parent = (node - 1) / 2;
236 while (parent >= 0) {
237 if (m_scoreHeap[parent].first < m_scoreHeap[node].first) {
238 std::swap(m_scoreHeap[parent], m_scoreHeap[node]);
239 _refreshHeapIdx(parent);
241 parent = (node - 1) / 2;
243 _refreshHeapIdx(node);
250 CLatticeStates::_adjustDown(int node)
252 int left = node * 2 + 1;
253 int right = node * 2 + 2;
254 while (left < (int) m_scoreHeap.size()) {
256 if (m_scoreHeap[node].first < m_scoreHeap[left].first) {
258 } else if (right < (int) m_scoreHeap.size()
259 && m_scoreHeap[node].first < m_scoreHeap[right].first) {
262 _refreshHeapIdx(node);
265 std::swap(m_scoreHeap[node], m_scoreHeap[child]);
266 _refreshHeapIdx(child);
269 right = node * 2 + 2;
273 CLatticeStates::iterator
274 CLatticeStates::begin()
276 CLatticeStates::iterator it;
277 it.m_mainIt = m_stateMap.begin();
278 it.m_mainEnd = m_stateMap.end();
279 it.m_childIt = it.m_mainIt->second.begin();
283 CLatticeStates::iterator
284 CLatticeStates::end()
286 CLatticeStates::iterator it;
287 it.m_mainEnd = it.m_mainIt = m_stateMap.end();
292 CLatticeStates::iterator::operator++()
295 if (m_childIt == m_mainIt->second.end()) {
297 if (m_mainIt != m_mainEnd)
298 m_childIt = m_mainIt->second.begin();
303 CLatticeStates::iterator::operator!=(const CLatticeStates::iterator& rhs)
305 if (m_mainIt == m_mainEnd || rhs.m_mainIt == rhs.m_mainEnd) {
306 return m_mainIt != rhs.m_mainIt;
308 return m_mainIt != rhs.m_mainIt && m_childIt != rhs.m_childIt;
313 CLatticeStates::iterator::operator*()
315 return m_childIt.operator*();
319 CLatticeStates::iterator::operator->()
321 return m_childIt.operator->();