change ukegine so filename, resize icon
[platform/core/uifw/ise-engine-unikey.git] / ukengine / charset.cpp
1 // -*- coding:unix; mode:c++; tab-width:4; c-basic-offset:4; indent-tabs-mode:nil -*-
2 /*------------------------------------------------------------------------------
3 VnConv: Vietnamese Encoding Converter Library
4 UniKey Project: http://unikey.sourceforge.net
5 Copyleft (C) 1998-2002 Pham Kim Long
6 Contact: longp@cslab.felk.cvut.cz
7
8 This program is free software; you can redistribute it and/or
9 modify it under the terms of the GNU General Public License
10 as published by the Free Software Foundation; either version 2
11 of the License, or (at your option) any later version.
12
13 This program is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16 GNU General Public License for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with this program; if not, write to the Free Software
20 Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
21 --------------------------------------------------------------------------------*/
22
23 #include <stddef.h>
24 #include <search.h>
25 #include <memory.h>
26 #include <ctype.h>
27 #include <stdlib.h>
28
29 #include "charset.h"
30 #include "data.h"
31
32 int LoVowel['z'-'a'+1];
33 int HiVowel['Z'-'A'+1];
34
35 #define IS_VOWEL(x) ((x >= 'a' && x <= 'z' && LoVowel[x-'a']) || (x >= 'A' && x <= 'Z' && HiVowel[x-'A']))
36
37 SingleByteCharset *SgCharsets[CONV_TOTAL_SINGLE_CHARSETS];
38 DoubleByteCharset *DbCharsets[CONV_TOTAL_DOUBLE_CHARSETS];
39
40 DllExport CVnCharsetLib VnCharsetLibObj;
41
42 //////////////////////////////////////////////////////
43 // Generic VnCharset class
44 //////////////////////////////////////////////////////
45 int VnCharset::elementSize()
46 {
47     return 1;
48 }
49
50 //-------------------------------------------
51 int VnInternalCharset::nextInput(ByteInStream & is, StdVnChar & stdChar, int & bytesRead)
52 {
53     if (!is.getNextDW(stdChar)) {
54         bytesRead = 0;
55         return 0;
56     }
57     bytesRead = sizeof(UKDWORD);
58     return 1;
59 }
60
61 //-------------------------------------------
62 int VnInternalCharset::putChar(ByteOutStream & os, StdVnChar stdChar, int & outLen)
63 {
64   outLen = sizeof(StdVnChar);
65   UKWORD *pWord = (UKWORD *)&stdChar;
66   os.putW(*pWord);
67   pWord++;
68   return os.putW(*pWord);
69 }
70
71 //-------------------------------------------
72 int VnInternalCharset::elementSize()
73 {
74     return 4;
75 }
76
77 //-------------------------------------------
78 SingleByteCharset::SingleByteCharset(unsigned char * vnChars)
79 {
80         int i;
81         m_vnChars = vnChars;
82         memset(m_stdMap, 0, 256*sizeof(UKWORD));
83         for (i=0; i<TOTAL_VNCHARS; i++) {
84                 if (vnChars[i] != 0 && (i==TOTAL_VNCHARS-1 || vnChars[i] != vnChars[i+1]))
85                         m_stdMap[vnChars[i]] = i + 1;
86         }
87 }
88
89 //-------------------------------------------
90 int SingleByteCharset::nextInput(ByteInStream & is, StdVnChar & stdChar, int & bytesRead)
91 {
92         unsigned char ch;
93         if (!is.getNext(ch)) {
94                 bytesRead = 0;
95                 return 0;
96         }
97
98         stdChar = (m_stdMap[ch])? (VnStdCharOffset + m_stdMap[ch] - 1) : ch;
99         bytesRead = 1;
100         return 1;
101 }
102
103
104 //-------------------------------------------
105 int SingleByteCharset::putChar(ByteOutStream & os, StdVnChar stdChar, int & outLen)
106 {
107         int ret;
108         unsigned char ch;
109         if (stdChar >= VnStdCharOffset) {
110                 outLen = 1;
111                 ch = m_vnChars[stdChar - VnStdCharOffset];
112                 if (ch == 0)
113                         ch = (stdChar == StdStartQuote)? PadStartQuote :
114                           ((stdChar == StdEndQuote)? PadEndQuote :
115                                    ((stdChar == StdEllipsis)? PadEllipsis: PadChar) );
116                 ret = os.putB(ch);
117         }
118         else {
119                 if (stdChar > 255 || m_stdMap[stdChar]) { 
120                         //this character is missing in the charset
121                         // output padding character
122                         outLen = 1;
123                         ret = os.putB(PadChar);
124                 }
125                 else {
126                         outLen = 1;
127                         ret = os.putB((UKBYTE)stdChar);
128                 }
129         }
130         return ret;
131 }
132
133 //-------------------------------------------
134 int wideCharCompare(const void *ele1, const void *ele2)
135 {
136         UKWORD ch1 = LOWORD(*((UKDWORD *)ele1));
137         UKWORD ch2 = LOWORD(*((UKDWORD *)ele2));
138         return (ch1 == ch2)? 0 : ((ch1 > ch2)? 1 : -1);
139 }
140
141 //-------------------------------------------
142 UnicodeCharset::UnicodeCharset(UnicodeChar *vnChars)
143 {
144         UKDWORD i;
145         m_toUnicode = vnChars;
146         for (i=0; i<TOTAL_VNCHARS; i++)
147                 m_vnChars[i] = (i << 16) + vnChars[i]; // high word is used for index
148         qsort(m_vnChars, TOTAL_VNCHARS, sizeof(UKDWORD), wideCharCompare);
149 }
150
151 //-------------------------------------------
152 int UnicodeCharset::nextInput(ByteInStream & is, StdVnChar & stdChar, int & bytesRead)
153 {
154         UnicodeChar uniCh;
155         if (!is.getNextW(uniCh)) {
156                 bytesRead = 0;
157                 return 0;
158         }
159         bytesRead = sizeof(UnicodeChar);
160         UKDWORD key = uniCh;
161         UKDWORD *pChar = (UKDWORD *)bsearch(&key, m_vnChars, TOTAL_VNCHARS, sizeof(UKDWORD), wideCharCompare);
162         if (pChar)
163                 stdChar = VnStdCharOffset + HIWORD(*pChar);
164         else
165                 stdChar = uniCh;
166         return 1;
167 }
168
169 //-------------------------------------------
170 int UnicodeCharset::putChar(ByteOutStream & os, StdVnChar stdChar, int & outLen)
171 {
172         outLen = sizeof(UnicodeChar);
173         return os.putW((stdChar >= VnStdCharOffset)? 
174                                m_toUnicode[stdChar-VnStdCharOffset] : (UnicodeChar)stdChar);
175 }
176
177 //-------------------------------------------
178 int UnicodeCharset::elementSize()
179 {
180     return 2;
181 }
182
183 ////////////////////////////////////////
184 // Unicode decomposed
185 ////////////////////////////////////////
186 //-------------------------------------------
187 int uniCompInfoCompare(const void *ele1, const void *ele2)
188 {
189         UKDWORD ch1 = ((UniCompCharInfo *)ele1)->compChar;
190         UKDWORD ch2 = ((UniCompCharInfo *)ele2)->compChar;
191         return (ch1 == ch2)? 0 : ((ch1 > ch2)? 1 : -1);
192 }
193
194 UnicodeCompCharset::UnicodeCompCharset(UnicodeChar *uniChars, UKDWORD *uniCompChars)
195 {
196   int i,k;
197         m_uniCompChars = uniCompChars;
198         m_totalChars = 0;
199         for (i=0; i<TOTAL_VNCHARS; i++) {
200                 m_info[i].compChar = uniCompChars[i];
201                 m_info[i].stdIndex = i;
202                 m_totalChars++;
203         }
204
205         for (k=0, i=TOTAL_VNCHARS; k<TOTAL_VNCHARS; k++)
206                 if (uniChars[k] != uniCompChars[k]) {
207                         m_info[i].compChar = uniChars[k];
208                         m_info[i].stdIndex = k;
209                         m_totalChars++;
210                         i++;
211                 }
212
213         qsort(m_info, m_totalChars, sizeof(UniCompCharInfo), uniCompInfoCompare);
214 }
215
216 //---------------------------------------------
217 int UnicodeCompCharset::nextInput(ByteInStream & is, StdVnChar & stdChar, int & bytesRead)
218 {
219         // read first char
220
221         UniCompCharInfo key;
222         UKWORD w;
223         if (!is.getNextW(w)) {
224                 bytesRead = 0;
225                 return 0;
226         }
227         key.compChar = w;
228         bytesRead = 2;
229
230         UniCompCharInfo *pInfo = (UniCompCharInfo *)bsearch(&key, m_info, m_totalChars, 
231                                                                 sizeof(UniCompCharInfo), uniCompInfoCompare);
232         if (!pInfo)
233                 stdChar = key.compChar;
234         else {
235                 stdChar = pInfo->stdIndex + VnStdCharOffset;
236                 if (is.peekNextW(w)) {
237                         UKDWORD hi = w;
238                         if (hi > 0) {
239                                 key.compChar += hi << 16;
240                                 pInfo = (UniCompCharInfo *)bsearch(&key, m_info, m_totalChars,
241                                                        sizeof(UniCompCharInfo), uniCompInfoCompare);
242                                 if (pInfo) {
243                                         stdChar = pInfo->stdIndex + VnStdCharOffset;
244                                         bytesRead += 2;
245                                         is.getNextW(w);
246                                 }
247                         }
248                 }
249         }
250         return 1;
251 }
252
253 //---------------------------------------------
254 int UnicodeCompCharset::putChar(ByteOutStream & os, StdVnChar stdChar, int & outLen)
255 {
256         int ret;
257         if (stdChar     >= VnStdCharOffset) {
258                 UKDWORD uniCompCh = m_uniCompChars[stdChar-VnStdCharOffset];
259                 UKWORD lo = LOWORD(uniCompCh);
260                 UKWORD hi = HIWORD(uniCompCh);
261                 outLen = 2;
262                 ret = os.putW(lo);
263                 if (hi > 0) {
264                         outLen += 2;
265                         ret = os.putW(hi);
266                 }
267         }
268         else {
269                 outLen = 2;
270                 ret = os.putW((UKWORD)stdChar);
271         }
272         return ret;
273 }
274
275 //-------------------------------------------
276 int UnicodeCompCharset::elementSize()
277 {
278     return 2;
279 }
280
281 ////////////////////////////////
282 // Unicode UTF-8              //
283 ////////////////////////////////
284 int UnicodeUTF8Charset::nextInput(ByteInStream & is, StdVnChar & stdChar, int & bytesRead)
285 {
286         UKWORD w1, w2, w3;
287         UKBYTE first, second, third;
288         UnicodeChar uniCh;
289
290         bytesRead = 0;
291         if (!is.getNext(first))
292                 return 0;
293         bytesRead = 1;
294
295         if (first < 0x80) 
296                 uniCh = first; // 1-byte sequence
297         else if ((first & 0xE0) == 0xC0) {
298                 //2-byte sequence
299                 if (!is.peekNext(second))
300                         return 0;
301                 if ((second & 0xC0) != 0x80) {
302                         stdChar = INVALID_STD_CHAR;
303                         return 1;
304                 }
305                 is.getNext(second);
306                 bytesRead = 2;
307                 w1 = first;
308                 w2 = second;
309                 uniCh = ((w1 & 0x001F) << 6) | (w2 & 0x3F);
310         }
311         else if ((first & 0xF0) == 0xE0) {
312                 //3-byte sequence
313                 if (!is.peekNext(second))
314                         return 0;
315                 if ((second & 0xC0) != 0x80) {
316                         stdChar = INVALID_STD_CHAR;
317                         return 1;
318                 }
319                 is.getNext(second);
320                 bytesRead = 2;
321                 if (!is.peekNext(third))
322                         return 0;
323                 if ((third & 0xC0) != 0x80) {
324                         stdChar = INVALID_STD_CHAR;
325                         return 1;
326                 }
327                 is.getNext(third);
328                 bytesRead = 3;
329                 w1 = first;
330                 w2 = second;
331                 w3 = third;
332                 uniCh = ((w1 & 0x000F) << 12) | ((w2 & 0x003F) << 6) | (w3 & 0x003F);
333         }
334         else {
335                 stdChar = INVALID_STD_CHAR;
336                 return 1;
337         }
338
339         // translate to StdVnChar
340         UKDWORD key = uniCh;
341         UKDWORD *pChar = (UKDWORD *)bsearch(&key, m_vnChars, TOTAL_VNCHARS, sizeof(UKDWORD), wideCharCompare);
342         if (pChar)
343                 stdChar = VnStdCharOffset + HIWORD(*pChar);
344         else stdChar = uniCh;
345         return 1;
346 }
347
348 //-------------------------------------------
349 int UnicodeUTF8Charset::putChar(ByteOutStream & os, StdVnChar stdChar, int & outLen)
350 {
351         UnicodeChar uChar = (stdChar < VnStdCharOffset)? 
352                                 (UnicodeChar)stdChar : m_toUnicode[stdChar-VnStdCharOffset];
353         int ret;
354         if (uChar < 0x0080) {
355                 outLen = 1;
356                 ret = os.putB((UKBYTE)uChar);
357         } else if (uChar < 0x0800) {
358                 outLen = 2;
359                 os.putB(0xC0 | (UKBYTE)(uChar >> 6));
360                 ret = os.putB(0x80 | (UKBYTE)(uChar & 0x003F));
361         } else {
362                 outLen = 3;
363                 os.putB(0xE0 | (UKBYTE)(uChar >> 12));
364                 os.putB(0x80 | (UKBYTE)((uChar >> 6) & 0x003F));
365                 ret = os.putB(0x80 | (UKBYTE)(uChar & 0x003F));
366         }
367         return ret;
368 }
369
370 ////////////////////////////////////////
371 // Unicode character reference &#D;   //
372 ////////////////////////////////////////
373 int hexDigitValue(unsigned char digit)
374 {
375         if (digit >= 'a' && digit <= 'f')
376                 return digit-'a'+10;
377         if (digit >= 'A' && digit <= 'F')
378                 return digit-'A'+10;
379         if (digit >= '0' && digit <= '9')
380                 return digit-'0';
381         return 0;
382 }
383
384
385 //--------------------------------------
386 int UnicodeRefCharset::nextInput(ByteInStream & is, StdVnChar & stdChar, int & bytesRead)
387 {
388         unsigned char ch;
389         UnicodeChar uniCh;
390         bytesRead = 0;
391         if (!is.getNext(ch))
392                 return 0;
393         bytesRead = 1;
394         uniCh = ch;
395         if (ch == '&') {
396                 if (is.peekNext(ch) && ch == '#') {
397                         is.getNext(ch);
398                         bytesRead++;
399                         if (!is.eos()) {
400                                 is.peekNext(ch);
401                                 if (ch != 'x' && ch != 'X') {
402                                         UKWORD code = 0;
403                                         int digits = 0;
404                                         while (is.peekNext(ch) && isdigit(ch) && digits < 5) {
405                                                 is.getNext(ch);
406                                                 bytesRead++;
407                                                 code = code*10 + (ch - '0');
408                                                 digits++;
409                                         }
410                                         if (is.peekNext(ch) && ch == ';') {
411                                                 is.getNext(ch);
412                                                 bytesRead++;
413                                                 uniCh = code;
414                                         }
415                                 }
416                                 else {
417                                         is.getNext(ch);
418                                         bytesRead++;
419                                         UKWORD code = 0;
420                                         int digits = 0;
421                                         while (is.peekNext(ch) && isxdigit(ch) && digits < 4) {
422                                                 is.getNext(ch);
423                                                 bytesRead++;
424                                                 code = (code << 4) + hexDigitValue(ch);
425                                                 digits++;
426                                         }
427                                         if (is.peekNext(ch) && ch == ';') {
428                                                 is.getNext(ch);
429                                                 bytesRead++;
430                                                 uniCh = code;
431                                         }
432                                 } // hex digits
433                         }
434                 }
435         }
436
437         // translate to StdVnChar
438         UKDWORD key = uniCh;
439         UKDWORD *pChar = (UKDWORD *)bsearch(&key, m_vnChars, TOTAL_VNCHARS, sizeof(UKDWORD), wideCharCompare);
440         if (pChar)
441                 stdChar = VnStdCharOffset + HIWORD(*pChar);
442         else stdChar = uniCh;
443         return 1;
444 }
445
446
447 //--------------------------------
448 int UnicodeRefCharset::putChar(ByteOutStream & os, StdVnChar stdChar, int & outLen)
449 {
450         UnicodeChar uChar = (stdChar < VnStdCharOffset)? 
451                                 (UnicodeChar)stdChar : m_toUnicode[stdChar-VnStdCharOffset];
452         int ret;
453         if (uChar < 128) {
454                 outLen = 1;
455                 ret = os.putB((UKBYTE)uChar);
456         }
457         else {
458                 outLen = 2;
459                 os.putB((UKBYTE)'&');
460                 os.putB((UKBYTE)'#');
461
462                 int i, digit, prev, base;
463                 prev = 0;
464                 base = 10000;
465                 for (i=0; i < 5; i++) {
466                         digit = uChar / base;
467                         if (digit || prev) {
468                                 prev = 1;
469                                 outLen++;
470                                 os.putB('0' + (unsigned char)digit);
471                         }
472                         uChar %= base;
473                         base /= 10;
474                 }
475                 ret = os.putB((UKBYTE)';');
476                 outLen++;
477         }
478         return ret;
479 }
480
481 #define HEX_DIGIT(x) ((x < 10)? ('0'+x) : ('A'+x-10))
482
483 //--------------------------------
484 int UnicodeHexCharset::putChar(ByteOutStream & os, StdVnChar stdChar, int & outLen)
485 {
486         UnicodeChar uChar = (stdChar < VnStdCharOffset)? 
487                                 (UnicodeChar)stdChar : m_toUnicode[stdChar-VnStdCharOffset];
488         int ret;
489         if (uChar < 256) {
490                 outLen = 1;
491                 ret = os.putB((UKBYTE)uChar);
492         }
493         else {
494                 outLen = 3;
495                 os.putB('&');
496                 os.putB('#');
497                 os.putB('x');
498
499                 int i, digit;
500                 int prev = 0;
501                 int shifts = 12;
502
503                 for (i=0; i < 4; i++) {
504                         digit = ((uChar >> shifts) & 0x000F);
505                         if (digit > 0 || prev) {
506                                 prev = 1;
507                                 outLen++;
508                                 os.putB((UKBYTE)HEX_DIGIT(digit));
509                         }
510                         shifts -= 4;
511                 }
512                 ret = os.putB(';');
513                 outLen++;
514         }
515         return ret;
516 }
517
518
519 /////////////////////////////////
520 // Class UnicodeCStringCharset  /
521 /////////////////////////////////
522 void UnicodeCStringCharset::startInput()
523 {
524         m_prevIsHex = 0;
525 }
526
527 //----------------------------------------
528 int UnicodeCStringCharset::nextInput(ByteInStream & is, StdVnChar & stdChar, int & bytesRead)
529 {
530         unsigned char ch;
531         UnicodeChar uniCh;
532         bytesRead = 0;
533         if (!is.getNext(ch))
534                 return 0;
535         bytesRead = 1;
536         uniCh = ch;
537         if (ch == '\\') {
538                 if (is.peekNext(ch) && (ch=='x' || ch=='X')) {
539                         is.getNext(ch);
540                         bytesRead++;
541                         UKWORD code = 0;
542                         int digits = 0;
543                         while (is.peekNext(ch) && isxdigit(ch) && digits < 4) {
544                                 is.getNext(ch);
545                                 bytesRead++;
546                                 code = (code << 4) + hexDigitValue(ch);
547                                 digits++;
548                         }
549                         uniCh = code;
550                 }
551         }
552
553         // translate to StdVnChar
554         UKDWORD key = uniCh;
555         UKDWORD *pChar = (UKDWORD *)bsearch(&key, m_vnChars, TOTAL_VNCHARS, sizeof(UKDWORD), wideCharCompare);
556         if (pChar)
557                 stdChar = VnStdCharOffset + HIWORD(*pChar);
558         else stdChar = uniCh;
559         return 1;
560 }
561
562 //------------------------------------
563 int UnicodeCStringCharset::putChar(ByteOutStream & os, StdVnChar stdChar, int & outLen)
564 {
565         UnicodeChar uChar = (stdChar < VnStdCharOffset)? 
566                                 (UnicodeChar)stdChar : m_toUnicode[stdChar-VnStdCharOffset];
567         int ret;
568         if (uChar < 128 && !isxdigit(uChar) && uChar != 'x' && uChar != 'X') {
569                 outLen = 1;
570                 ret = os.putB((UKBYTE)uChar);
571         }
572         else {
573                 outLen = 2;
574                 os.putB('\\');
575                 os.putB('x');
576
577                 int i, digit;
578                 int prev = 0;
579                 int shifts = 12;
580
581                 for (i=0; i < 4; i++) {
582                         digit = ((uChar >> shifts) & 0x000F);
583                         if (digit > 0 || prev) {
584                                 prev = 1;
585                                 outLen++;
586                                 os.putB((UKBYTE)HEX_DIGIT(digit));
587                         }
588                         shifts -= 4;
589                 }
590                 ret = os.isOK();
591                 m_prevIsHex = 1;
592         }
593         return ret;
594 }
595
596 /////////////////////////////////
597 // Double-byte charsets        //
598 /////////////////////////////////
599 DoubleByteCharset::DoubleByteCharset(UKWORD *vnChars)
600 {
601         m_toDoubleChar = vnChars;
602         memset(m_stdMap, 0, 256*sizeof(UKWORD));
603         for (int i=0; i<TOTAL_VNCHARS; i++) {
604                 if (vnChars[i] >> 8) // a 2-byte character
605                         m_stdMap[vnChars[i] >> 8] = 0xFFFF; //INVALID_STD_CHAR;
606                 else if (m_stdMap[vnChars[i]] == 0)
607                         m_stdMap[vnChars[i]] = i+1;
608                 m_vnChars[i] = (i << 16) + vnChars[i]; // high word is used for StdChar index
609         }
610         qsort(m_vnChars, TOTAL_VNCHARS, sizeof(UKDWORD), wideCharCompare);
611 }
612
613 //---------------------------------------------
614 int DoubleByteCharset::nextInput(ByteInStream & is, StdVnChar & stdChar, int & bytesRead)
615 {
616         unsigned char ch;
617
618         // read first byte
619         bytesRead = 0;
620         if (!is.getNext(ch))
621                 return 0;
622         bytesRead = 1;
623         stdChar = m_stdMap[ch];
624         if (stdChar == 0)
625                 stdChar = ch;
626         else if (stdChar == 0xFFFF)
627                 stdChar = INVALID_STD_CHAR;
628         else {
629                 stdChar += VnStdCharOffset - 1;
630                 UKBYTE hi;
631                 if (is.peekNext(hi) && hi > 0) {
632                         //test if a double-byte character is encountered
633                         UKDWORD key = MAKEWORD(ch,hi);
634                         UKDWORD *pChar = (UKDWORD *)bsearch(&key, m_vnChars, TOTAL_VNCHARS, sizeof(UKDWORD), wideCharCompare);
635                         if (pChar) {
636                                 stdChar = VnStdCharOffset + HIWORD(*pChar);
637                                 bytesRead = 2;
638                                 is.getNext(hi);
639                         }
640                 }
641         }
642         return 1;
643 }
644
645 //---------------------------------------------
646 int DoubleByteCharset::putChar(ByteOutStream & os, StdVnChar stdChar, int & outLen)
647 {
648         int ret;
649         if (stdChar     >= VnStdCharOffset) {
650                 UKWORD wCh = m_toDoubleChar[stdChar-VnStdCharOffset];
651
652                 if (wCh & 0xFF00) {
653                         outLen = 2;
654                         os.putB((UKBYTE)(wCh & 0x00FF));
655                         ret = os.putB((UKBYTE)(wCh >> 8));
656                 }
657                 else {
658                         unsigned char b = (unsigned char)wCh;
659                         if (m_stdMap[b] == 0xFFFF)
660                                 b = PadChar;
661                         outLen = 1;
662                         ret = os.putB(b);
663                 }
664 /*
665                 outLen = 1;
666                 ret = os.putB((UKBYTE)(wCh & 0x00FF));
667                 if (wCh & 0xFF00) {
668                         outLen = 2;
669                         ret = os.putB((UKBYTE)(wCh >> 8));
670                 }
671 */
672         }
673         else {
674                 if (stdChar > 255 || m_stdMap[stdChar]) {
675                         outLen = 1;
676                         ret = os.putB((UKBYTE)PadChar);
677                 }
678                 else {
679                         outLen = 1;
680                         ret = os.putB((UKBYTE)stdChar);
681                 }
682         }
683         return ret;
684 }
685
686 /////////////////////////////////////////////
687 // Class: VIQRCharset                      //
688 /////////////////////////////////////////////
689
690 unsigned char VIQRTones[] = {'\'','`','?','~','.'};
691
692 const char *VIQREscapes[] = {
693         "://",
694         "/",
695         "@",
696         "mailto:",
697         "email:",
698         "news:",
699         "www",
700         "ftp"
701 };
702
703 const int VIQREscCount = sizeof(VIQREscapes) / sizeof(char*);
704
705 VIQRCharset::VIQRCharset(UKDWORD *vnChars)
706 {
707         memset(m_stdMap, 0, 256*sizeof(UKWORD));
708         int i;
709         UKDWORD dw;
710         m_vnChars = vnChars;
711         for (i=0; i<TOTAL_VNCHARS; i++) {
712                 dw = m_vnChars[i];
713                 if (!(dw & 0xffffff00)) { //single byte
714                         //ch = (unsigned char)(dw & 0xff);
715                         m_stdMap[dw] = i+256;
716                 }
717         }
718
719         // set offset from base characters according to tone marks
720         m_stdMap[(unsigned char)'\''] = 2;
721         m_stdMap[(unsigned char)'`'] = 4;
722         m_stdMap[(unsigned char)'?'] = 6;
723         m_stdMap[(unsigned char)'~'] = 8;
724         m_stdMap[(unsigned char)'.'] = 10;
725         m_stdMap[(unsigned char)'^'] = 12;
726
727         m_stdMap[(unsigned char)'('] = 24;
728         m_stdMap[(unsigned char)'+'] = 26;
729         m_stdMap[(unsigned char)'*'] = 26;
730 }
731
732 //---------------------------------------------------
733 void VIQRCharset::startInput()
734 {
735         m_suspicious = 0;
736         m_atWordBeginning = 1;
737         m_gotTone = 0;
738         m_escAll = 0;
739         if (VnCharsetLibObj.m_options.viqrEsc)
740                 VnCharsetLibObj.m_VIQREscPatterns.reset();
741 }
742
743 //---------------------------------------------------
744 int VIQRCharset::nextInput(ByteInStream & is, StdVnChar & stdChar, int & bytesRead)
745 {
746         unsigned char ch1;
747         bytesRead = 0;
748
749         if (!is.getNext(ch1))
750                 return 0;
751         bytesRead = 1;
752         stdChar = m_stdMap[ch1];
753
754         if (VnCharsetLibObj.m_options.viqrEsc) {
755                 if (VnCharsetLibObj.m_VIQREscPatterns.foundAtNextChar(ch1)!=-1) {
756                         m_escAll = 1;
757                 }
758         }
759
760         if (m_escAll && (ch1==' ' || ch1=='\t' || ch1=='\r' || ch1=='\n'))
761                 m_escAll = 0;
762         
763         if (ch1 == '\\') {
764                 // ecape character , try to read next
765                 if (!is.getNext(ch1)) {
766                         bytesRead++;
767                         stdChar = m_stdMap[ch1];
768                 }
769         }
770
771         if (stdChar < 256) {
772                 stdChar = ch1;
773         }
774         else if (!m_escAll && !is.eos()) {
775                 // try to read the next byte
776                 unsigned char ch2;
777                 is.peekNext(ch2);
778                 unsigned char upper = toupper(ch1);
779         if ((!VnCharsetLibObj.m_options.smartViqr || m_atWordBeginning) &&
780              upper == 'D' && (ch2 == 'd' || ch2 == 'D')) 
781         {
782                         is.getNext(ch2);
783                         bytesRead++;
784                         stdChar += 2; // dd is 2 positions after d.
785                 }
786                 else {
787                         StdVnChar index = m_stdMap[ch2];
788
789                         int cond;
790                         if (m_suspicious) {
791                                 cond = IS_VOWEL(ch1) &&
792                              ( index == 2 || index == 4 || index == 8 || //not accepting ? . in suspicious mode
793                                    (index == 12 &&  (upper == 'A' || upper == 'E' || upper == 'O')) ||
794                                    (m_stdMap[ch2] == 24 && upper== 'A') ||
795                                    (m_stdMap[ch2] == 26 && (upper == 'O' || upper == 'U')) );
796                                 if (cond)
797                                         m_suspicious = 0;
798                         }
799                         else
800                                 cond = IS_VOWEL(ch1) &&
801                                   ((index <= 10  && index > 0 && (!m_gotTone || (index!=6 && index!=10)) ) ||
802                                    (index == 12 &&  (upper == 'A' || upper == 'E' || upper == 'O')) ||
803                                    (m_stdMap[ch2] == 24 && upper== 'A') ||
804                                    (m_stdMap[ch2] == 26 && (upper == 'O' || upper == 'U')) );
805
806                         if (cond) {
807                                 if (index > 0)
808                                         m_gotTone = 1; //we have a tone/breve/hook in the current word
809
810                                 // ok, take this byte
811                                 is.getNext(ch2);
812                                 bytesRead++;
813                                 int offset = m_stdMap[ch2];
814                                 if (offset == 26) offset = 24;
815                                 if (offset == 24 && (ch1 == 'u' || ch1 == 'U'))
816                                         offset = 12;
817                                 stdChar += offset;
818                                 // check next byte
819                                 if (is.peekNext(ch2)) {
820                                         if (index > 10 && m_stdMap[ch2] > 0 && m_stdMap[ch2] <= 10) {
821                                                 // ok, take one more byte
822                                                 is.getNext(ch2);
823                                                 bytesRead++;
824                                                 stdChar += m_stdMap[ch2];
825                                         }
826                                 }
827                         }
828                 }
829         }
830         m_atWordBeginning = (stdChar < 256);
831         if (stdChar < 256) {
832                 m_gotTone = 0; //reset this flag because we are at the beginning of a new word
833         }
834
835         // adjust stdChar
836         if (stdChar >= 256)
837                 stdChar += VnStdCharOffset - 256;
838         return 1;
839 }
840
841 //---------------------------------------------------
842 void VIQRCharset::startOutput()
843 {
844         m_escapeBowl = 0;
845         m_escapeRoof = 0;
846         m_escapeHook = 0;
847         m_escapeTone = 0;
848         m_noOutEsc = 0;
849         VnCharsetLibObj.m_VIQROutEscPatterns.reset();
850 }
851
852 //---------------------------------------------------
853 int VIQRCharset::putChar(ByteOutStream & os, StdVnChar stdChar, int & outLen)
854 {
855         int ret;
856         UKBYTE b;
857         if (stdChar >= VnStdCharOffset) {
858                 outLen = 1;
859                 UKDWORD dw = m_vnChars[stdChar-VnStdCharOffset];
860
861                 unsigned char first = (unsigned char)dw;
862                 unsigned char firstUpper = toupper(first);
863
864                 b = (UKBYTE)dw;
865                 ret = os.putB(b);
866                 if (VnCharsetLibObj.m_VIQROutEscPatterns.foundAtNextChar(b) != -1)
867                   m_noOutEsc = 1;
868
869                 if (m_noOutEsc && (b==' ' || b=='\t' || b=='\r' || b=='\n'))
870                   m_noOutEsc = 0;
871
872                 if (dw & 0x0000FF00) {
873                         // second byte is present
874                         unsigned char second = (UKBYTE)(dw >> 8);
875                         outLen++;
876                         ret = os.putB(second);
877
878                         if (dw & 0x00FF0000) {
879                                 //third byte is present
880                                 outLen++;
881                                 ret = os.putB((UKBYTE)(dw >> 16));
882                                 m_escapeTone = 0;
883                         }
884                         else {
885                                 UKWORD index = m_stdMap[second];
886                                 m_escapeTone = (index == 12 || index == 24 || index == 26);
887                         }
888
889                         VnCharsetLibObj.m_VIQROutEscPatterns.reset();
890
891                         m_escapeBowl = 0;
892                         m_escapeHook = 0;
893                         m_escapeRoof = 0;
894                 }
895                 else {
896                         m_escapeTone = IS_VOWEL(first);
897                         m_escapeBowl = (firstUpper == 'A');
898                         m_escapeHook = (firstUpper == 'U' || firstUpper == 'O');
899                         m_escapeRoof = (firstUpper == 'A' || firstUpper == 'E' || firstUpper == 'O');
900                 }
901         }
902         else {
903                 if (stdChar > 255) {
904                         outLen = 1;
905                         ret = os.putB((UKBYTE)PadChar);
906                         if (VnCharsetLibObj.m_VIQROutEscPatterns.foundAtNextChar((UKBYTE)PadChar) != -1)
907                           m_noOutEsc = 1;
908                 }
909                 else {
910                         outLen = 1;
911                         UKWORD index = m_stdMap[stdChar];
912                         if (!VnCharsetLibObj.m_options.viqrMixed && !m_noOutEsc &&
913                                    (stdChar=='\\' || 
914                                         (index > 0 && index <= 10 && m_escapeTone) ||
915                                         (index == 12 && m_escapeRoof) ||
916                                         (index == 24 && m_escapeBowl) ||
917                                         (index == 26 && m_escapeHook))) {
918                                 //(m_stdMap[stdChar] > 0 && m_stdMap[stdChar] <= 26)) {
919                                 // tone mark, needs an escape character
920                                 outLen++;
921                                 ret = os.putB('\\');
922                                 if (VnCharsetLibObj.m_VIQROutEscPatterns.foundAtNextChar('\\') != -1)
923                                   m_noOutEsc = 1;
924                         }
925                         b = (UKBYTE)stdChar;
926                         ret = os.putB(b);
927                         if (VnCharsetLibObj.m_VIQROutEscPatterns.foundAtNextChar(b) != -1)
928                           m_noOutEsc = 1;
929                         if (m_noOutEsc && (b==' ' || b=='\t' || b=='\r' || b=='\n'))
930                           m_noOutEsc = 0;
931                 }
932                 // reset escape marks
933                 m_escapeBowl = 0;
934                 m_escapeRoof = 0;
935                 m_escapeHook = 0;
936                 m_escapeTone = 0;
937         }
938         return ret;
939 }
940
941 /////////////////////////////////////////////
942 // Class: UTF8VIQRCharset                  //
943 /////////////////////////////////////////////
944
945 //-----------------------------------------
946 UTF8VIQRCharset::UTF8VIQRCharset(UnicodeUTF8Charset *pUtf, VIQRCharset *pViqr)
947 {
948   m_pUtf = pUtf;
949   m_pViqr = pViqr;
950 }
951
952 //-----------------------------------------
953 void UTF8VIQRCharset::startInput()
954 {
955   m_pUtf->startInput();
956   m_pViqr->startInput();
957 }
958
959 //-----------------------------------------
960 void UTF8VIQRCharset::startOutput()
961 {
962   m_pUtf->startOutput();
963   m_pViqr->startOutput();
964 }
965
966 //-----------------------------------------
967 int UTF8VIQRCharset::nextInput(ByteInStream & is, StdVnChar & stdChar, int & bytesRead)
968 {
969         UKBYTE ch;
970
971         if (!is.peekNext(ch))
972                 return 0;
973
974         if (ch > 0xBF && ch < 0xFE) {
975                 m_pViqr->startInput(); // just to reset the VIQR object state
976                 m_pViqr->m_suspicious = 1;
977                 return m_pUtf->nextInput(is, stdChar, bytesRead);
978         }
979
980         return m_pViqr->nextInput(is, stdChar, bytesRead);
981 }
982
983 //-----------------------------------------
984 int UTF8VIQRCharset::putChar(ByteOutStream & os, StdVnChar stdChar, int & outLen)
985 {
986   return m_pViqr->putChar(os, stdChar, outLen);
987 }
988
989
990 //-----------------------------------------
991 CVnCharsetLib::CVnCharsetLib()
992 {
993         unsigned char ch;
994         for (ch = 'a'; ch < 'z'; ch++)
995                 LoVowel[ch-'a'] = 0;
996         LoVowel['a'-'a'] = 1;
997         LoVowel['e'-'a'] = 1;
998         LoVowel['i'-'a'] = 1;
999         LoVowel['o'-'a'] = 1;
1000         LoVowel['u'-'a'] = 1;
1001         LoVowel['y'-'a'] = 1;
1002
1003         for (ch = 'A'; ch < 'Z'; ch++)
1004                 HiVowel[ch-'A'] = 0;
1005         HiVowel['A'-'A'] = 1;
1006         HiVowel['E'-'A'] = 1;
1007         HiVowel['I'-'A'] = 1;
1008         HiVowel['O'-'A'] = 1;
1009         HiVowel['U'-'A'] = 1;
1010         HiVowel['Y'-'A'] = 1;
1011
1012         m_pUniCharset = NULL;
1013         m_pUniCompCharset = NULL;
1014         m_pUniUTF8 = NULL;
1015         m_pUniRef = NULL;
1016         m_pUniHex = NULL;
1017         m_pVIQRCharObj = NULL;
1018         m_pUVIQRCharObj = NULL;
1019         m_pWinCP1258 = NULL;
1020         m_pVnIntCharset = NULL;
1021
1022         int i;
1023         for (i = 0; i < CONV_TOTAL_SINGLE_CHARSETS; i++)
1024                 m_sgCharsets[i] = NULL;
1025
1026         for (i = 0; i < CONV_TOTAL_DOUBLE_CHARSETS; i++)
1027                 m_dbCharsets[i] = NULL;
1028
1029         VnConvResetOptions(&m_options);
1030         m_VIQREscPatterns.init((char**)VIQREscapes, VIQREscCount);
1031         m_VIQROutEscPatterns.init((char**)VIQREscapes, VIQREscCount);
1032 }
1033
1034
1035 //-----------------------------------------
1036 CVnCharsetLib::~CVnCharsetLib()
1037 {
1038         if (m_pUniCharset)
1039                 delete m_pUniCharset;
1040         if (m_pUniUTF8)
1041                 delete m_pUniUTF8;
1042         if (m_pUniRef)
1043                 delete m_pUniRef;
1044         if (m_pUniHex)
1045                 delete m_pUniHex;
1046         if (m_pVIQRCharObj)
1047                 delete m_pVIQRCharObj;
1048         if (m_pUVIQRCharObj)
1049                 delete m_pUVIQRCharObj;
1050         if (m_pWinCP1258)
1051                 delete m_pWinCP1258;
1052         if (m_pUniCString)
1053                 delete m_pUniCString;
1054         if (m_pVnIntCharset)
1055                 delete m_pVnIntCharset;
1056
1057         int i;
1058         for (i = 0; i < CONV_TOTAL_SINGLE_CHARSETS; i++)
1059                 if (m_sgCharsets[i]) delete m_sgCharsets[i];
1060
1061         for (i = 0; i < CONV_TOTAL_DOUBLE_CHARSETS; i++)
1062                 if (m_dbCharsets[i]) delete m_dbCharsets[i];
1063
1064 }
1065
1066 //-----------------------------------------
1067 VnCharset * CVnCharsetLib::getVnCharset(int charsetIdx)
1068 {
1069         switch (charsetIdx) {
1070
1071         case CONV_CHARSET_UNICODE:
1072                 if (m_pUniCharset == NULL)
1073                         m_pUniCharset = new UnicodeCharset(UnicodeTable);
1074                 return m_pUniCharset;
1075         case CONV_CHARSET_UNIDECOMPOSED:
1076                 if (m_pUniCompCharset == NULL)
1077                         m_pUniCompCharset = new UnicodeCompCharset(UnicodeTable, UnicodeComposite);
1078                 return m_pUniCompCharset;
1079         case CONV_CHARSET_UNIUTF8:
1080   case CONV_CHARSET_XUTF8:
1081                 if (m_pUniUTF8 == NULL)
1082                         m_pUniUTF8 = new UnicodeUTF8Charset(UnicodeTable);
1083                 return m_pUniUTF8;
1084         
1085         case CONV_CHARSET_UNIREF:
1086                 if (m_pUniRef == NULL)
1087                         m_pUniRef = new UnicodeRefCharset(UnicodeTable);
1088                 return m_pUniRef;
1089
1090         case CONV_CHARSET_UNIREF_HEX:
1091                 if (m_pUniHex == NULL)
1092                         m_pUniHex = new UnicodeHexCharset(UnicodeTable);
1093                 return m_pUniHex;
1094
1095         case CONV_CHARSET_UNI_CSTRING:
1096                 if (m_pUniCString == NULL)
1097                         m_pUniCString = new UnicodeCStringCharset(UnicodeTable);
1098                 return m_pUniCString;
1099
1100         case CONV_CHARSET_WINCP1258:
1101                 if (m_pWinCP1258 == NULL)
1102                         m_pWinCP1258 = new WinCP1258Charset(WinCP1258, WinCP1258Pre);
1103                 return m_pWinCP1258;
1104
1105         case CONV_CHARSET_VIQR:
1106                 if (m_pVIQRCharObj == NULL)
1107                         m_pVIQRCharObj = new VIQRCharset(VIQRTable);
1108                 return m_pVIQRCharObj;
1109
1110         case CONV_CHARSET_VNSTANDARD:
1111                 if (m_pVnIntCharset == NULL)
1112                         m_pVnIntCharset = new VnInternalCharset();
1113                 return m_pVnIntCharset;
1114
1115         case CONV_CHARSET_UTF8VIQR:
1116           if (m_pUVIQRCharObj == NULL) {
1117             if (m_pVIQRCharObj == NULL)
1118               m_pVIQRCharObj = new VIQRCharset(VIQRTable);
1119
1120             if (m_pUniUTF8 == NULL)
1121               m_pUniUTF8 = new UnicodeUTF8Charset(UnicodeTable);
1122             m_pUVIQRCharObj = new UTF8VIQRCharset(m_pUniUTF8, m_pVIQRCharObj);
1123           }
1124           return m_pUVIQRCharObj;
1125
1126         default:
1127                 if (IS_SINGLE_BYTE_CHARSET(charsetIdx)) {
1128                         int i = charsetIdx - CONV_CHARSET_TCVN3;
1129                         if (m_sgCharsets[i] == NULL)
1130                                 m_sgCharsets[i] = new SingleByteCharset(SingleByteTables[i]);
1131                         return m_sgCharsets[i];
1132                 }
1133                 else if (IS_DOUBLE_BYTE_CHARSET(charsetIdx)) {
1134                         int i = charsetIdx - CONV_CHARSET_VNIWIN;
1135                         if (m_dbCharsets[i] == NULL)
1136                                 m_dbCharsets[i] = new DoubleByteCharset(DoubleByteTables[i]);
1137                         return m_dbCharsets[i];
1138                 }
1139         }
1140         return NULL;
1141 }
1142
1143
1144 //-------------------------------------------------
1145 DllExport void VnConvSetOptions(VnConvOptions *pOptions)
1146 {
1147         VnCharsetLibObj.m_options = *pOptions;
1148 }
1149
1150 //-------------------------------------------------
1151 DllExport void VnConvGetOptions(VnConvOptions *pOptions)
1152 {
1153         *pOptions = VnCharsetLibObj.m_options;
1154 }
1155
1156 //-------------------------------------------------
1157 DllExport void VnConvResetOptions(VnConvOptions *pOptions)
1158 {
1159         pOptions->viqrEsc = 1;
1160         pOptions->viqrMixed = 0;
1161         pOptions->toUpper = 0;
1162         pOptions->toLower = 0;
1163         pOptions->removeTone = 0;
1164     pOptions->smartViqr = 1;
1165 }
1166
1167
1168 /////////////////////////////////////////////
1169 // Class WinCP1258Charset
1170 /////////////////////////////////////////////
1171 WinCP1258Charset::WinCP1258Charset(UKWORD *compositeChars, UKWORD *precomposedChars)
1172 {
1173   int i,k;
1174         m_toDoubleChar = compositeChars;
1175         memset(m_stdMap, 0, 256*sizeof(UKWORD));
1176
1177         // encode composite chars
1178         for (i=0; i<TOTAL_VNCHARS; i++) {
1179                 if (compositeChars[i] >> 8) // a 2-byte character
1180                         m_stdMap[compositeChars[i] >> 8] = 0xFFFF; //INVALID_STD_CHAR;
1181                 else if (m_stdMap[compositeChars[i]] == 0)
1182                         m_stdMap[compositeChars[i]] = i+1;
1183
1184                 m_vnChars[i] = (i << 16) + compositeChars[i]; // high word is used for StdChar index
1185         }
1186
1187         m_totalChars = TOTAL_VNCHARS;
1188
1189         //add precomposed chars to the table
1190         for (k=0, i=TOTAL_VNCHARS; k<TOTAL_VNCHARS; k++)
1191                 if (precomposedChars[k] != compositeChars[k]) {
1192                         if (precomposedChars[k] >> 8) // a 2-byte character
1193                                 m_stdMap[precomposedChars[k] >> 8] = 0xFFFF; //INVALID_STD_CHAR;
1194                         else if (m_stdMap[precomposedChars[k]] == 0)
1195                                 m_stdMap[precomposedChars[k]] = k+1;
1196
1197                         m_vnChars[i] = (k << 16) + precomposedChars[k];
1198                         m_totalChars++;
1199                         i++;
1200                 }
1201
1202         qsort(m_vnChars, m_totalChars, sizeof(UKDWORD), wideCharCompare);
1203 }
1204
1205
1206 //---------------------------------------------------------------------
1207 // This fuction is basically the same as that of DoubleByteCharset
1208 // with m_totalChars is used instead of constant TOTAL_VNCHARS
1209 //---------------------------------------------------------------------
1210 int WinCP1258Charset::nextInput(ByteInStream & is, StdVnChar & stdChar, int & bytesRead)
1211 {
1212         unsigned char ch;
1213
1214         // read first byte
1215         bytesRead = 0;
1216         if (!is.getNext(ch))
1217                 return 0;
1218         bytesRead = 1;
1219         stdChar = m_stdMap[ch];
1220         if (stdChar == 0)
1221                 stdChar = ch;
1222         else if (stdChar == 0xFFFF)
1223                 stdChar = INVALID_STD_CHAR;
1224         else {
1225                 stdChar += VnStdCharOffset - 1;
1226                 UKBYTE hi;
1227                 if (is.peekNext(hi) && hi > 0) {
1228                         //test if a double-byte character is encountered
1229                         UKDWORD key = MAKEWORD(ch,hi);
1230                         UKDWORD *pChar = (UKDWORD *)bsearch(&key, m_vnChars, m_totalChars, sizeof(UKDWORD), wideCharCompare);
1231                         if (pChar) {
1232                                 stdChar = VnStdCharOffset + HIWORD(*pChar);
1233                                 bytesRead = 2;
1234                                 is.getNext(hi);
1235                         }
1236                 }
1237         }
1238         return 1;
1239 }
1240
1241 //---------------------------------------------------------------------
1242 // This fuction is exactly the same as that of DoubleByteCharset
1243 //---------------------------------------------------------------------
1244 int WinCP1258Charset::putChar(ByteOutStream & os, StdVnChar stdChar, int & outLen)
1245 {
1246         int ret;
1247         if (stdChar     >= VnStdCharOffset) {
1248                 UKWORD wCh = m_toDoubleChar[stdChar-VnStdCharOffset];
1249
1250                 if (wCh & 0xFF00) {
1251                         outLen = 2;
1252                         os.putB((UKBYTE)(wCh & 0x00FF));
1253                         ret = os.putB((UKBYTE)(wCh >> 8));
1254                 }
1255                 else {
1256                         unsigned char b = (unsigned char)wCh;
1257                         if (m_stdMap[b] == 0xFFFF)
1258                                 b = PadChar;
1259                         outLen = 1;
1260                         ret = os.putB(b);
1261                 }
1262         }
1263         else {
1264                 if (stdChar > 255 || m_stdMap[stdChar]) {
1265                         outLen = 1;
1266                         ret = os.putB((UKBYTE)PadChar);
1267                 }
1268                 else {
1269                         outLen = 1;
1270                         ret = os.putB((UKBYTE)stdChar);
1271                 }
1272         }
1273         return ret;
1274 }
1275
1276 #define IS_ODD(x) (x & 1)
1277 #define IS_EVEN(x) (!(x & 1))
1278
1279 StdVnChar StdVnToUpper(StdVnChar ch)
1280 {
1281         if (ch >= VnStdCharOffset && 
1282                 ch<(VnStdCharOffset + TOTAL_ALPHA_VNCHARS) && 
1283                 IS_ODD(ch))
1284                 ch -= 1;
1285         return ch;
1286 }
1287
1288 //----------------------------------------
1289 StdVnChar StdVnToLower(StdVnChar ch)
1290 {
1291         if (ch >= VnStdCharOffset && 
1292                 ch<(VnStdCharOffset + TOTAL_ALPHA_VNCHARS) && 
1293                 IS_EVEN(ch))
1294                 ch += 1;
1295         return ch;
1296 }
1297
1298 //----------------------------------------
1299 StdVnChar StdVnGetRoot(StdVnChar ch)
1300 {
1301         if (ch >= VnStdCharOffset && ch<VnStdCharOffset+TOTAL_VNCHARS)
1302                 ch = VnStdCharOffset + StdVnRootChar[ch-VnStdCharOffset];
1303         return ch;
1304 }