3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER.
5 * Copyright (c) 2007 Sun Microsystems, Inc. All Rights Reserved.
7 * The contents of this file are subject to the terms of either the GNU Lesser
8 * General Public License Version 2.1 only ("LGPL") or the Common Development and
9 * Distribution License ("CDDL")(collectively, the "License"). You may not use this
10 * file except in compliance with the License. You can obtain a copy of the CDDL at
11 * http://www.opensource.org/licenses/cddl1.php and a copy of the LGPLv2.1 at
12 * http://www.opensource.org/licenses/lgpl-license.php. See the License for the
13 * specific language governing permissions and limitations under the License. When
14 * distributing the software, include this License Header Notice in each file and
15 * include the full text of the License in the License file as well as the
18 * NOTICE PURSUANT TO SECTION 9 OF THE COMMON DEVELOPMENT AND DISTRIBUTION LICENSE
20 * For Covered Software in this distribution, this License shall be governed by the
21 * laws of the State of California (excluding conflict-of-law provisions).
22 * Any litigation relating to this License shall be subject to the jurisdiction of
23 * the Federal Courts of the Northern District of California and the state courts
24 * of the State of California, with venue lying in Santa Clara County, California.
28 * If you wish your version of this file to be governed by only the CDDL or only
29 * the LGPL Version 2.1, indicate your decision by adding "[Contributor]" elects to
30 * include this software in this distribution under the [CDDL or LGPL Version 2.1]
31 * license." If you don't indicate a single choice of license, a recipient has the
32 * option to distribute your version of this file under either the CDDL or the LGPL
33 * Version 2.1, or to extend the choice of license to its licensees as provided
34 * above. However, if you add LGPL Version 2.1 code and therefore, elected the LGPL
35 * Version 2 license, then the option applies only if the new code is made subject
36 * to such option by the copyright holder.
39 #ifndef _SIM_SENTENCIZER_H
40 #define _SIM_SENTENCIZER_H
42 #include "../portability.h"
49 class SIMCharReaderIterator {
51 SIMCharReaderIterator(CSIMCharReader* the_reader = NULL, int the_idx = 0)
52 : reader(the_reader), idx(the_idx) { }
53 SIMCharReaderIterator& operator++();
54 SIMCharReaderIterator operator+(int i);
55 TWCHAR & operator*() const;
58 CSIMCharReader* reader;
62 class CSIMCharReader {
63 friend class SIMCharReaderIterator;
66 typedef SIMCharReaderIterator iterator;
69 CSIMCharReader(FILE* afp) : fp(afp), buf() { }
70 iterator begin() { return iterator(this, 0); }
74 std::deque<TWCHAR> buf;
77 template <class iterator>
78 bool ReadSentence(wstring& wstr, iterator& first, bool bIgnoreCRLF = false){
80 TWCHAR ch, chnext, ch2;
82 for (; (ch = *first) != WCH_NULL; ++first) {
83 chnext = *(first + 1);
85 if ((ch == WCH_RETURN) && bIgnoreCRLF && (chnext != WCH_RETURN))
87 if (ch == WCH_JUHAO || ch == WCH_WENHAO || ch == WCH_TANHAO ||
88 ch == WCH_TANHAO || ch == WCH_SHENGLUEHAO)
90 else if (ch == WCH_DOUHAO && chnext == WCH_DOUHAO)
92 else if (ch == WCH_RETURN || ch == WCH_TAB)
94 else if (ch == WCH_SPACE) {
95 if (chnext == WCH_RETURN) {
96 if (ch2 == WCH_SPACE || ch2 == WCH_TAB || ch2 == WCH_RETURN)
98 } else if (chnext == WCH_SPACE || chnext == WCH_TAB)
105 if (brk == 2 && wstr.size() == 0) {
107 wstr.push_back(*first);
109 } while (ch == WCH_SPACE || ch == WCH_TAB || ch == WCH_RETURN);
110 } else if (brk == 1) {
112 wstr.push_back(*first);
114 } while (ch == WCH_JUHAO || ch == WCH_WENHAO || ch == WCH_TANHAO ||
115 ch == WCH_FENHAO || ch == WCH_MAOHAO || ch == WCH_SHENGLUEHAO);
117 return(wstr.size() > 0);