2 **********************************************************************
3 * Copyright (C) 2005-2013, International Business Machines
4 * Corporation and others. All Rights Reserved.
5 **********************************************************************
11 #include "unicode/uobject.h"
13 #if !UCONFIG_NO_CONVERSION
19 class NGramParser : public UMemory
23 const int32_t *ngramList;
30 const uint8_t *charMap;
32 void addByte(int32_t b);
35 NGramParser(const int32_t *theNgramList, const uint8_t *theCharMap);
39 * Binary search for value in table, which must have exactly 64 entries.
41 int32_t search(const int32_t *table, int32_t value);
43 void lookup(int32_t thisNgram);
45 virtual int32_t nextByte(InputText *det);
46 virtual void parseCharacters(InputText *det);
49 int32_t parse(InputText *det);
53 class NGramParser_IBM420 : public NGramParser
57 int32_t isLamAlef(int32_t b);
58 int32_t nextByte(InputText *det);
59 void parseCharacters(InputText *det);
62 NGramParser_IBM420(const int32_t *theNgramList, const uint8_t *theCharMap);
66 class CharsetRecog_sbcs : public CharsetRecognizer
70 virtual ~CharsetRecog_sbcs();
71 virtual const char *getName() const = 0;
72 virtual UBool match(InputText *det, CharsetMatch *results) const = 0;
73 virtual int32_t match_sbcs(InputText *det, const int32_t ngrams[], const uint8_t charMap[]) const;
76 class CharsetRecog_8859_1 : public CharsetRecog_sbcs
79 virtual ~CharsetRecog_8859_1();
80 const char *getName() const;
81 virtual UBool match(InputText *det, CharsetMatch *results) const;
84 class CharsetRecog_8859_2 : public CharsetRecog_sbcs
87 virtual ~CharsetRecog_8859_2();
88 const char *getName() const;
89 virtual UBool match(InputText *det, CharsetMatch *results) const;
92 class CharsetRecog_8859_5 : public CharsetRecog_sbcs
95 virtual ~CharsetRecog_8859_5();
96 const char *getName() const;
99 class CharsetRecog_8859_6 : public CharsetRecog_sbcs
102 virtual ~CharsetRecog_8859_6();
104 const char *getName() const;
107 class CharsetRecog_8859_7 : public CharsetRecog_sbcs
110 virtual ~CharsetRecog_8859_7();
112 const char *getName() const;
115 class CharsetRecog_8859_8 : public CharsetRecog_sbcs
118 virtual ~CharsetRecog_8859_8();
120 virtual const char *getName() const;
123 class CharsetRecog_8859_9 : public CharsetRecog_sbcs
126 virtual ~CharsetRecog_8859_9();
128 const char *getName() const;
133 class CharsetRecog_8859_5_ru : public CharsetRecog_8859_5
136 virtual ~CharsetRecog_8859_5_ru();
138 const char *getLanguage() const;
140 virtual UBool match(InputText *det, CharsetMatch *results) const;
143 class CharsetRecog_8859_6_ar : public CharsetRecog_8859_6
146 virtual ~CharsetRecog_8859_6_ar();
148 const char *getLanguage() const;
150 virtual UBool match(InputText *det, CharsetMatch *results) const;
153 class CharsetRecog_8859_7_el : public CharsetRecog_8859_7
156 virtual ~CharsetRecog_8859_7_el();
158 const char *getLanguage() const;
160 virtual UBool match(InputText *det, CharsetMatch *results) const;
163 class CharsetRecog_8859_8_I_he : public CharsetRecog_8859_8
166 virtual ~CharsetRecog_8859_8_I_he();
168 const char *getName() const;
170 const char *getLanguage() const;
172 virtual UBool match(InputText *det, CharsetMatch *results) const;
175 class CharsetRecog_8859_8_he : public CharsetRecog_8859_8
178 virtual ~CharsetRecog_8859_8_he ();
180 const char *getLanguage() const;
182 virtual UBool match(InputText *det, CharsetMatch *results) const;
185 class CharsetRecog_8859_9_tr : public CharsetRecog_8859_9
188 virtual ~CharsetRecog_8859_9_tr ();
190 const char *getLanguage() const;
192 virtual UBool match(InputText *det, CharsetMatch *results) const;
195 class CharsetRecog_windows_1256 : public CharsetRecog_sbcs
198 virtual ~CharsetRecog_windows_1256();
200 const char *getName() const;
202 const char *getLanguage() const;
204 virtual UBool match(InputText *det, CharsetMatch *results) const;
207 class CharsetRecog_windows_1251 : public CharsetRecog_sbcs
210 virtual ~CharsetRecog_windows_1251();
212 const char *getName() const;
214 const char *getLanguage() const;
216 virtual UBool match(InputText *det, CharsetMatch *results) const;
220 class CharsetRecog_KOI8_R : public CharsetRecog_sbcs
223 virtual ~CharsetRecog_KOI8_R();
225 const char *getName() const;
227 const char *getLanguage() const;
229 virtual UBool match(InputText *det, CharsetMatch *results) const;
232 class CharsetRecog_IBM424_he : public CharsetRecog_sbcs
235 virtual ~CharsetRecog_IBM424_he();
237 const char *getLanguage() const;
240 class CharsetRecog_IBM424_he_rtl : public CharsetRecog_IBM424_he {
242 virtual ~CharsetRecog_IBM424_he_rtl();
244 const char *getName() const;
246 virtual UBool match(InputText *det, CharsetMatch *results) const;
249 class CharsetRecog_IBM424_he_ltr : public CharsetRecog_IBM424_he {
250 virtual ~CharsetRecog_IBM424_he_ltr();
252 const char *getName() const;
254 virtual UBool match(InputText *det, CharsetMatch *results) const;
257 class CharsetRecog_IBM420_ar : public CharsetRecog_sbcs
260 virtual ~CharsetRecog_IBM420_ar();
262 const char *getLanguage() const;
263 int32_t match_sbcs(InputText *det, const int32_t ngrams[], const uint8_t charMap[]) const;
267 class CharsetRecog_IBM420_ar_rtl : public CharsetRecog_IBM420_ar {
269 virtual ~CharsetRecog_IBM420_ar_rtl();
271 const char *getName() const;
273 virtual UBool match(InputText *det, CharsetMatch *results) const;
276 class CharsetRecog_IBM420_ar_ltr : public CharsetRecog_IBM420_ar {
277 virtual ~CharsetRecog_IBM420_ar_ltr();
279 const char *getName() const;
281 virtual UBool match(InputText *det, CharsetMatch *results) const;
286 #endif /* !UCONFIG_NO_CONVERSION */
287 #endif /* __CSRSBCS_H */