1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
4 **********************************************************************
5 * Copyright (C) 2005-2015, International Business Machines
6 * Corporation and others. All Rights Reserved.
7 **********************************************************************
13 #include "unicode/uobject.h"
15 #if !UCONFIG_NO_CONVERSION
21 class NGramParser : public UMemory
25 const int32_t *ngramList;
32 const uint8_t *charMap;
34 void addByte(int32_t b);
37 NGramParser(const int32_t *theNgramList, const uint8_t *theCharMap);
38 virtual ~NGramParser();
42 * Binary search for value in table, which must have exactly 64 entries.
44 int32_t search(const int32_t *table, int32_t value);
46 void lookup(int32_t thisNgram);
48 virtual int32_t nextByte(InputText *det);
49 virtual void parseCharacters(InputText *det);
52 int32_t parse(InputText *det);
56 #if !UCONFIG_ONLY_HTML_CONVERSION
57 class NGramParser_IBM420 : public NGramParser
60 NGramParser_IBM420(const int32_t *theNgramList, const uint8_t *theCharMap);
61 ~NGramParser_IBM420();
65 int32_t isLamAlef(int32_t b);
66 int32_t nextByte(InputText *det);
67 void parseCharacters(InputText *det);
72 class CharsetRecog_sbcs : public CharsetRecognizer
76 virtual ~CharsetRecog_sbcs();
77 virtual const char *getName() const = 0;
78 virtual UBool match(InputText *det, CharsetMatch *results) const = 0;
79 virtual int32_t match_sbcs(InputText *det, const int32_t ngrams[], const uint8_t charMap[]) const;
82 class CharsetRecog_8859_1 : public CharsetRecog_sbcs
85 virtual ~CharsetRecog_8859_1();
86 const char *getName() const;
87 virtual UBool match(InputText *det, CharsetMatch *results) const;
90 class CharsetRecog_8859_2 : public CharsetRecog_sbcs
93 virtual ~CharsetRecog_8859_2();
94 const char *getName() const;
95 virtual UBool match(InputText *det, CharsetMatch *results) const;
98 class CharsetRecog_8859_5 : public CharsetRecog_sbcs
101 virtual ~CharsetRecog_8859_5();
102 const char *getName() const;
105 class CharsetRecog_8859_6 : public CharsetRecog_sbcs
108 virtual ~CharsetRecog_8859_6();
110 const char *getName() const;
113 class CharsetRecog_8859_7 : public CharsetRecog_sbcs
116 virtual ~CharsetRecog_8859_7();
118 const char *getName() const;
121 class CharsetRecog_8859_8 : public CharsetRecog_sbcs
124 virtual ~CharsetRecog_8859_8();
126 virtual const char *getName() const;
129 class CharsetRecog_8859_9 : public CharsetRecog_sbcs
132 virtual ~CharsetRecog_8859_9();
134 const char *getName() const;
139 class CharsetRecog_8859_5_ru : public CharsetRecog_8859_5
142 virtual ~CharsetRecog_8859_5_ru();
144 const char *getLanguage() const;
146 virtual UBool match(InputText *det, CharsetMatch *results) const;
149 class CharsetRecog_8859_6_ar : public CharsetRecog_8859_6
152 virtual ~CharsetRecog_8859_6_ar();
154 const char *getLanguage() const;
156 virtual UBool match(InputText *det, CharsetMatch *results) const;
159 class CharsetRecog_8859_7_el : public CharsetRecog_8859_7
162 virtual ~CharsetRecog_8859_7_el();
164 const char *getLanguage() const;
166 virtual UBool match(InputText *det, CharsetMatch *results) const;
169 class CharsetRecog_8859_8_I_he : public CharsetRecog_8859_8
172 virtual ~CharsetRecog_8859_8_I_he();
174 const char *getName() const;
176 const char *getLanguage() const;
178 virtual UBool match(InputText *det, CharsetMatch *results) const;
181 class CharsetRecog_8859_8_he : public CharsetRecog_8859_8
184 virtual ~CharsetRecog_8859_8_he ();
186 const char *getLanguage() const;
188 virtual UBool match(InputText *det, CharsetMatch *results) const;
191 class CharsetRecog_8859_9_tr : public CharsetRecog_8859_9
194 virtual ~CharsetRecog_8859_9_tr ();
196 const char *getLanguage() const;
198 virtual UBool match(InputText *det, CharsetMatch *results) const;
201 class CharsetRecog_windows_1256 : public CharsetRecog_sbcs
204 virtual ~CharsetRecog_windows_1256();
206 const char *getName() const;
208 const char *getLanguage() const;
210 virtual UBool match(InputText *det, CharsetMatch *results) const;
213 class CharsetRecog_windows_1251 : public CharsetRecog_sbcs
216 virtual ~CharsetRecog_windows_1251();
218 const char *getName() const;
220 const char *getLanguage() const;
222 virtual UBool match(InputText *det, CharsetMatch *results) const;
226 class CharsetRecog_KOI8_R : public CharsetRecog_sbcs
229 virtual ~CharsetRecog_KOI8_R();
231 const char *getName() const;
233 const char *getLanguage() const;
235 virtual UBool match(InputText *det, CharsetMatch *results) const;
238 #if !UCONFIG_ONLY_HTML_CONVERSION
239 class CharsetRecog_IBM424_he : public CharsetRecog_sbcs
242 virtual ~CharsetRecog_IBM424_he();
244 const char *getLanguage() const;
247 class CharsetRecog_IBM424_he_rtl : public CharsetRecog_IBM424_he {
249 virtual ~CharsetRecog_IBM424_he_rtl();
251 const char *getName() const;
253 virtual UBool match(InputText *det, CharsetMatch *results) const;
256 class CharsetRecog_IBM424_he_ltr : public CharsetRecog_IBM424_he {
257 virtual ~CharsetRecog_IBM424_he_ltr();
259 const char *getName() const;
261 virtual UBool match(InputText *det, CharsetMatch *results) const;
264 class CharsetRecog_IBM420_ar : public CharsetRecog_sbcs
267 virtual ~CharsetRecog_IBM420_ar();
269 const char *getLanguage() const;
270 int32_t match_sbcs(InputText *det, const int32_t ngrams[], const uint8_t charMap[]) const;
274 class CharsetRecog_IBM420_ar_rtl : public CharsetRecog_IBM420_ar {
276 virtual ~CharsetRecog_IBM420_ar_rtl();
278 const char *getName() const;
280 virtual UBool match(InputText *det, CharsetMatch *results) const;
283 class CharsetRecog_IBM420_ar_ltr : public CharsetRecog_IBM420_ar {
284 virtual ~CharsetRecog_IBM420_ar_ltr();
286 const char *getName() const;
288 virtual UBool match(InputText *det, CharsetMatch *results) const;
294 #endif /* !UCONFIG_NO_CONVERSION */
295 #endif /* __CSRSBCS_H */