1 # DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER.
3 # Copyright (c) 2007 Sun Microsystems, Inc. All Rights Reserved.
5 # The contents of this file are subject to the terms of either the GNU Lesser
6 # General Public License Version 2.1 only ("LGPL") or the Common Development and
7 # Distribution License ("CDDL")(collectively, the "License"). You may not use this
8 # file except in compliance with the License. You can obtain a copy of the CDDL at
9 # http://www.opensource.org/licenses/cddl1.php and a copy of the LGPLv2.1 at
10 # http://www.opensource.org/licenses/lgpl-license.php. See the License for the
11 # specific language governing permissions and limitations under the License. When
12 # distributing the software, include this License Header Notice in each file and
13 # include the full text of the License in the License file as well as the
16 # NOTICE PURSUANT TO SECTION 9 OF THE COMMON DEVELOPMENT AND DISTRIBUTION LICENSE
18 # For Covered Software in this distribution, this License shall be governed by the
19 # laws of the State of California (excluding conflict-of-law provisions).
20 # Any litigation relating to this License shall be subject to the jurisdiction of
21 # the Federal Courts of the Northern District of California and the state courts
22 # of the State of California, with venue lying in Santa Clara County, California.
26 # If you wish your version of this file to be governed by only the CDDL or only
27 # the LGPL Version 2.1, indicate your decision by adding "[Contributor]" elects to
28 # include this software in this distribution under the [CDDL or LGPL Version 2.1]
29 # license." If you don't indicate a single choice of license, a recipient has the
30 # option to distribute your version of this file under either the CDDL or the LGPL
31 # Version 2.1, or to extend the choice of license to its licensees as provided
32 # above. However, if you add LGPL Version 2.1 code and therefore, elected the LGPL
33 # Version 2 license, then the option applies only if the new code is made subject
34 # to such option by the copyright holder.
36 cdef extern from "Python.h":
37 ctypedef struct PyUnicodeObject:
39 ctypedef unsigned wchar_t
40 ctypedef wchar_t * const_wchar_t_ptr "const wchar_t *"
41 object PyUnicode_FromWideChar (wchar_t *, Py_ssize_t)
42 Py_ssize_t PyUnicode_AsWideChar (PyUnicodeObject *, wchar_t *, Py_ssize_t)
44 cdef extern from "portability.h":
45 ctypedef unsigned TWCHAR
46 ctypedef TWCHAR * const_TWCHAR_ptr "const TWCHAR *"
47 unsigned WCSLEN (const_TWCHAR_ptr ws)
49 cdef extern from "pytrie.h":
50 ctypedef struct CPinyinTrie_TWord "CPinyinTrie::TWordIdInfo":
57 ctypedef struct CPinyinTrie_TNode "CPinyinTrie::TNode":
59 CPinyinTrie_TWord * getWordIdPtr ()
61 ctypedef struct CPinyinTrie "CPinyinTrie":
62 bint load(char *filename)
65 CPinyinTrie_TNode * getRootNode ()
66 CPinyinTrie_TNode * transfer (CPinyinTrie_TNode *, unsigned)
67 const_TWCHAR_ptr getitem "operator []" (unsigned)
68 int getSymbolId (const_TWCHAR_ptr)
69 bint isValid (CPinyinTrie_TNode*, bint, unsigned)
71 CPinyinTrie *new_CPinyinTrie "new CPinyinTrie" ()
72 void del_CPinyinTrie "delete" (CPinyinTrie *pytrie)
76 cdef public int wid, cost, length, charset_level
78 def __cinit__ (self, wid, seen=True, cost=0, length=0, charset_level=0):
83 self.charset_level = charset_level
86 return "wid=%d, seen=%d, cost=%d, length=%d, charset_level=%d" % \
87 (self.wid, self.seen, self.cost, self.length, self.charset_level)
89 cdef class PinyinTrieNode:
90 cdef CPinyinTrie_TNode *pnode
94 cdef CPinyinTrie_TWord *p= <CPinyinTrie_TWord*> self.pnode.getWordIdPtr ()
95 for i in xrange (self.pnode.m_nWordId):
96 words.append (WordInfo(p[i].m_id, p[i].m_bSeen, p[i].m_cost, p[i].m_len, p[i].m_csLevel))
99 cdef class PinyinTrie:
100 cdef CPinyinTrie *thisptr
102 def __cinit__ (self):
103 self.thisptr = new_CPinyinTrie ()
105 def __dealloc__ (self):
106 del_CPinyinTrie (self.thisptr)
108 def load (self, fname):
109 return self.thisptr.load (fname)
114 def get_word_count (self):
115 return self.thisptr.getWordCount()
117 def get_root_node (self):
118 cdef CPinyinTrie_TNode * pnode = <CPinyinTrie_TNode*> self.thisptr.getRootNode ()
119 node = PinyinTrieNode ()
120 (<PinyinTrieNode>node).pnode = pnode
123 def transfer (self, node, syllable):
124 cdef CPinyinTrie_TNode *pnode = (<PinyinTrieNode>node).pnode
125 pnode = <CPinyinTrie_TNode*> self.thisptr.transfer (pnode, <int>syllable)
129 node = PinyinTrieNode ()
130 (<PinyinTrieNode>node).pnode = pnode
133 def __getitem__ (self, idx):
134 if idx<0 or idx>=self.thisptr.getWordCount(): return ''
135 cdef const_TWCHAR_ptr cwstr = self.thisptr.getitem (idx)
136 return PyUnicode_FromWideChar (<const_wchar_t_ptr>cwstr, WCSLEN(cwstr))
138 def get_symbol_id (self, symbol):
140 if len (symbol) != 1: return 0
141 PyUnicode_AsWideChar (<PyUnicodeObject*> symbol, buf, sizeof(buf))
142 return self.thisptr.getSymbolId (<const_TWCHAR_ptr> buf)
144 def is_valid (self, node, allowNonComplete=True, csLevel=0):
145 cdef CPinyinTrie_TNode *pnode = (<PinyinTrieNode>node).pnode
146 return self.thisptr.isValid (pnode, allowNonComplete, csLevel)