2 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER.
4 * Copyright (c) 2007 Sun Microsystems, Inc. All Rights Reserved.
6 * The contents of this file are subject to the terms of either the GNU Lesser
7 * General Public License Version 2.1 only ("LGPL") or the Common Development and
8 * Distribution License ("CDDL")(collectively, the "License"). You may not use this
9 * file except in compliance with the License. You can obtain a copy of the CDDL at
10 * http://www.opensource.org/licenses/cddl1.php and a copy of the LGPLv2.1 at
11 * http://www.opensource.org/licenses/lgpl-license.php. See the License for the
12 * specific language governing permissions and limitations under the License. When
13 * distributing the software, include this License Header Notice in each file and
14 * include the full text of the License in the License file as well as the
17 * NOTICE PURSUANT TO SECTION 9 OF THE COMMON DEVELOPMENT AND DISTRIBUTION LICENSE
19 * For Covered Software in this distribution, this License shall be governed by the
20 * laws of the State of California (excluding conflict-of-law provisions).
21 * Any litigation relating to this License shall be subject to the jurisdiction of
22 * the Federal Courts of the Northern District of California and the state courts
23 * of the State of California, with venue lying in Santa Clara County, California.
27 * If you wish your version of this file to be governed by only the CDDL or only
28 * the LGPL Version 2.1, indicate your decision by adding "[Contributor]" elects to
29 * include this software in this distribution under the [CDDL or LGPL Version 2.1]
30 * license." If you don't indicate a single choice of license, a recipient has the
31 * option to distribute your version of this file under either the CDDL or the LGPL
32 * Version 2.1, or to extend the choice of license to its licensees as provided
33 * above. However, if you add LGPL Version 2.1 code and therefore, elected the LGPL
34 * Version 2 license, then the option applies only if the new code is made subject
35 * to such option by the copyright holder.
40 #include <sys/types.h>
48 #ifdef HAVE_SYS_MMAN_H
50 #elif defined(BEOS_OS)
51 #include <be/kernel/OS.h>
55 CThreadSlm::load(const char* fname, bool MMap)
57 int fd = open(fname, O_RDONLY);
59 fprintf(stderr, "open %s: %s\n", fname, strerror(errno));
63 m_bufSize = lseek(fd, 0, SEEK_END);
64 lseek(fd, 0, SEEK_SET);
68 #ifdef HAVE_SYS_MMAN_H
69 void* p = mmap(NULL, m_bufSize, PROT_READ, MAP_SHARED, fd, 0);
70 if (p == MAP_FAILED) {
75 #elif defined(BEOS_OS)
77 area_id area = create_area("tmp", (void**)&p, B_ANY_ADDRESS,
79 (B_PAGE_SIZE - 1)) & ~(B_PAGE_SIZE - 1),
80 B_NO_LOCK, B_READ_AREA | B_WRITE_AREA);
87 for (ssize_t len = m_bufSize; len > 0; ) {
88 ssize_t n = read(fd, p, len);
94 #error "No implementation for mmap()"
95 #endif // HAVE_SYS_MMAN_H
97 if ((m_buf = new char[m_bufSize]) == NULL) {
101 if (read(fd, m_buf, m_bufSize) != m_bufSize) {
103 delete [] m_buf; m_buf = NULL;
110 m_N = *(unsigned*)m_buf;
111 m_UseLogPr = *(((unsigned*)m_buf) + 1);
112 m_LevelSizes = ((unsigned*)m_buf) + 2;
114 (float*)(m_buf + 2 * sizeof(unsigned) + (m_N + 1) * sizeof(unsigned));
115 m_bowTable = m_prTable + (1 << BITS_PR);
117 TNode* pn = (TNode*)(m_bowTable + (1 << BITS_BOW));
119 //Solaris CC would cause error in runtime if using some thing like
120 //following even using (void**) conversion. So add PtrVoid definition
121 //m_Levels = new (void*) [m_N + 1];
122 m_Levels = new PtrVoid[m_N + 1];
124 for (size_t lvl = 0; lvl <= m_N; ++lvl) {
125 m_Levels[lvl] = (void*)pn;
126 pn += m_LevelSizes[lvl];
139 #ifdef HAVE_SYS_MMAN_H
140 munmap(m_buf, m_bufSize);
141 #elif defined(BEOS_OS)
142 delete_area(area_for(m_buf));
144 #error "No implementation for munmap()"
145 #endif // HAVE_SYS_MMAN_H
154 template<class NodeT>
156 find_id(NodeT* base, unsigned int h, unsigned int t, unsigned int id)
158 unsigned int tail = t;
160 int m = h + (t - h) / 2;
161 NodeT* pm = base + m;
162 unsigned int thisId = pm->wid();
165 else if (thisId > id)
174 * return value as the model suggested. The history state must be historified
175 * or the history's level should be 0. when level == 0 but idx != 0, the
176 * history is a psuedo unigram state used for this model to combine another
177 * bigram cache language model
180 CThreadSlm::rawTransfer(TState history, unsigned int wid, TState& result)
182 unsigned int lvl = history.getLevel();
183 unsigned int pos = history.getIdx();
185 double cost = (m_UseLogPr) ? 0.0 : 1.0;
187 // NON_Word id must be dealed with special, let it transfer to root
189 if (ID_NOT_WORD == wid) {
195 //for psuedo cache model unigram state
196 TNode* pn = ((TNode*)m_Levels[lvl]) + ((lvl) ? pos : 0);
198 unsigned int t = (pn + 1)->ch();
201 TNode* pBase = (TNode*)m_Levels[lvl + 1];
202 unsigned int idx = find_id(pBase, pn->ch(), t, wid);
205 result.setLevel(lvl + 1);
206 double pr = m_prTable[pBase[idx].pr()];
207 return (m_UseLogPr) ? (cost + pr) : (cost * pr);
210 TLeaf* pBase = (TLeaf*)m_Levels[lvl + 1];
211 unsigned int idx = find_id(pBase, pn->ch(), t, wid);
214 result.setLevel(lvl + 1);
215 double pr = m_prTable[pBase[idx].pr()];
216 return (m_UseLogPr) ? (cost + pr) : (cost * pr);
221 cost += m_bowTable[pn->bow()];
223 cost *= m_bowTable[pn->bow()];
232 return cost + m_prTable[((TNode*)m_Levels[0])->pr()];
234 return cost * m_prTable[((TNode*)m_Levels[0])->pr()];
238 CThreadSlm::transferNegLog(TState history, unsigned int wid, TState& result)
240 double cost = rawTransfer(history, wid, result);
248 CThreadSlm::transfer(TState history, unsigned int wid, TState& result)
250 double cost = rawTransfer(history, wid, result);
258 CThreadSlm::lastWordId(TState st)
260 unsigned int lvl = st.getLevel();
262 const TLeaf* pn = ((const TLeaf*)m_Levels[m_N]) + st.getIdx();
264 } else if (lvl > 0) {
265 const TNode *pn = ((const TNode*)m_Levels[st.getLevel()]) + st.getIdx();
268 unsigned int idx = st.getIdx();
270 const TNode *pn = ((const TNode*)m_Levels[st.getLevel()]) +
274 return idx; // return the psuedo state word id
279 CThreadSlm::history_state_of(TState st)
281 if (st.getLevel() >= m_N) {
282 TLeaf* pl = ((TLeaf*)m_Levels[m_N]) + st.getIdx();
283 return TState(pl->bol(), pl->bon());
285 TNode* pn = ((TNode*)m_Levels[st.getLevel()]) + st.getIdx();
286 if (pn->ch() == (pn + 1)->ch())
287 return TState(pn->bol(), pn->bon());
294 CThreadSlm::historify(TState& st)
296 if (st.getLevel() >= m_N) {
297 TLeaf* pl = ((TLeaf*)m_Levels[m_N]) + st.getIdx();
298 st.setLevel(pl->bol());
299 st.setIdx(pl->bon());
301 TNode* pn = ((TNode*)m_Levels[st.getLevel()]) + st.getIdx();
302 if (pn->ch() == (pn + 1)->ch()) {
303 st.setLevel(pn->bol());
304 st.setIdx(pn->bon());