3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER.
5 * Copyright (c) 2007 Sun Microsystems, Inc. All Rights Reserved.
7 * The contents of this file are subject to the terms of either the GNU Lesser
8 * General Public License Version 2.1 only ("LGPL") or the Common Development and
9 * Distribution License ("CDDL")(collectively, the "License"). You may not use this
10 * file except in compliance with the License. You can obtain a copy of the CDDL at
11 * http://www.opensource.org/licenses/cddl1.php and a copy of the LGPLv2.1 at
12 * http://www.opensource.org/licenses/lgpl-license.php. See the License for the
13 * specific language governing permissions and limitations under the License. When
14 * distributing the software, include this License Header Notice in each file and
15 * include the full text of the License in the License file as well as the
18 * NOTICE PURSUANT TO SECTION 9 OF THE COMMON DEVELOPMENT AND DISTRIBUTION LICENSE
20 * For Covered Software in this distribution, this License shall be governed by the
21 * laws of the State of California (excluding conflict-of-law provisions).
22 * Any litigation relating to this License shall be subject to the jurisdiction of
23 * the Federal Courts of the Northern District of California and the state courts
24 * of the State of California, with venue lying in Santa Clara County, California.
28 * If you wish your version of this file to be governed by only the CDDL or only
29 * the LGPL Version 2.1, indicate your decision by adding "[Contributor]" elects to
30 * include this software in this distribution under the [CDDL or LGPL Version 2.1]
31 * license." If you don't indicate a single choice of license, a recipient has the
32 * option to distribute your version of this file under either the CDDL or the LGPL
33 * Version 2.1, or to extend the choice of license to its licensees as provided
34 * above. However, if you add LGPL Version 2.1 code and therefore, elected the LGPL
35 * Version 2 license, then the option applies only if the new code is made subject
36 * to such option by the copyright holder.
39 #ifndef SUNPY_PINYIN_SEG_H
40 #define SUNPY_PINYIN_SEG_H
42 #include "portability.h"
43 #include "segmentor.h"
44 #include "pinyin_data.h"
46 #include "ime-core/utils.h"
51 class EXPORTED CGetCorrectionPairOp : private CNonCopyable
54 typedef std::pair<std::string, std::string> CCorrectionPair;
55 typedef std::vector<CCorrectionPair> CCorrectionPairVec;
57 CGetCorrectionPairOp () : m_bEnabled(false) { m_correctionPairs.reserve(8); }
59 void setEnable(bool value = true) { m_bEnabled = value; }
60 bool isEnabled() { return m_bEnabled; }
62 void setCorrectionPairs(const string_pairs& pairs)
63 { m_correctionPairs = pairs; }
65 const char * operator ()(std::string& pystr, unsigned& matched_len);
68 CCorrectionPairVec m_correctionPairs;
72 class EXPORTED CGetFuzzySegmentsOp : private CNonCopyable
75 typedef std::map<unsigned,
76 std::pair<unsigned, unsigned> > CInnerFuzzyFinalMap;
77 typedef std::map<unsigned, std::pair<char, unsigned> > CFuzzySyllableMap;
79 CGetFuzzySegmentsOp () : m_bEnabled(false),
80 m_bInnerFuzzyEnabled(false) { _initMaps(); }
81 unsigned operator ()(IPySegmentor::TSegmentVec&,
82 IPySegmentor::TSegmentVec&,
85 void setEnable(bool value = true) { m_bEnabled = value; }
86 void setInnerFuzzyEnable(bool value = true) { m_bInnerFuzzyEnabled = value; }
87 bool isEnabled() { return m_bEnabled; }
91 unsigned _invalidateSegments(IPySegmentor::TSegmentVec&,
92 IPySegmentor::TSegment&);
95 bool m_bInnerFuzzyEnabled;
96 CInnerFuzzyFinalMap m_fuzzyFinalMap;
97 CFuzzySyllableMap m_fuzzyPreMap;
98 CFuzzySyllableMap m_fuzzyProMap;
101 class EXPORTED CQuanpinSegmentor : public IPySegmentor
104 CQuanpinSegmentor ();
106 virtual TSegmentVec& getSegments(bool req_aux_segs){
107 if (req_aux_segs && m_pGetFuzzySegmentsOp &&
108 m_pGetFuzzySegmentsOp->isEnabled()) {
109 m_merged_segs.clear();
110 std::merge(m_segs.begin(), m_segs.end(),
111 m_fuzzy_segs.begin(), m_fuzzy_segs.end(),
112 back_inserter(m_merged_segs));
113 return m_merged_segs;
119 virtual const wstring& getInputBuffer() { return m_inputBuf; }
121 virtual const char* getSylSeps() { return "'"; }
123 virtual unsigned push(unsigned ch);
124 virtual unsigned pop();
125 virtual unsigned insertAt(unsigned idx, unsigned ch);
126 virtual unsigned deleteAt(unsigned idx, bool backward = true);
127 virtual unsigned clear(unsigned from = 0);
129 virtual unsigned updatedFrom() { return m_updatedFrom; }
131 bool load(const char * pyTrieFileName);
133 void setGetFuzzySyllablesOp(CGetFuzzySyllablesOp<CPinyinData> *op) {
134 m_pGetFuzzySyllablesOp = op; }
135 void setGetCorrectionPairOp(CGetCorrectionPairOp *op) {
136 m_pGetCorrectionPairOp = op; }
137 void setGetFuzzySegmentsOp(CGetFuzzySegmentsOp *op) {
138 m_pGetFuzzySegmentsOp = op; }
141 inline unsigned _push(unsigned ch);
142 inline unsigned _clear(unsigned from);
143 inline void _addFuzzySyllables(TSegment &seg);
144 inline unsigned _updateWith(const std::string& new_pystr,
145 unsigned from = UINT_MAX);
146 inline void _locateSegment(unsigned idx, unsigned &strIdx, unsigned &segIdx);
148 CGetFuzzySyllablesOp<CPinyinData> *m_pGetFuzzySyllablesOp;
149 CGetCorrectionPairOp *m_pGetCorrectionPairOp;
150 CGetFuzzySegmentsOp *m_pGetFuzzySegmentsOp;
152 CDATrie<short> m_pytrie;
156 TSegmentVec m_fuzzy_segs;
157 TSegmentVec m_merged_segs;
159 unsigned m_updatedFrom;
162 #endif /* SUNPY_PINYIN_SEG_H */