1 // This file is part of The New Aspell
2 // Copyright (C) 2001 by Kevin Atkinson under the GNU LGPL license
3 // version 2.0 or 2.1. You should have received a copy of the LGPL
4 // license along with this library if you did not you can find
5 // it at http://www.gnu.org/.
11 #include "asc_ctype.hpp"
12 #include "convert.hpp"
13 #include "fstream.hpp"
14 #include "getdata.hpp"
17 #include "stack_ptr.hpp"
18 #include "cache-t.hpp"
19 #include "file_util.hpp"
20 #include "file_data_util.hpp"
21 #include "vararray.hpp"
23 #include "iostream.hpp"
29 typedef unsigned char byte;
30 typedef unsigned char Uni8;
31 typedef unsigned short Uni16;
32 typedef unsigned int Uni32;
35 //////////////////////////////////////////////////////////////////////
36 //////////////////////////////////////////////////////////////////////
40 //////////////////////////////////////////////////////////////////////
41 //////////////////////////////////////////////////////////////////////
43 //////////////////////////////////////////////////////////////////////
51 static const Uni32 npos = (Uni32)(-1);
54 Uni32 operator[] (char key) const {return data[(unsigned char)key];}
55 bool have(char key) const {return data[(unsigned char)key] != npos;}
56 bool insert(char key, Uni32 value);
59 void ToUniLookup::reset()
61 for (int i = 0; i != 256; ++i)
65 bool ToUniLookup::insert(char key, Uni32 value)
67 if (data[(unsigned char)key] != npos)
69 data[(unsigned char)key] = value;
73 //////////////////////////////////////////////////////////////////////
78 // Assumes that the maximum number of items in the table is 256
79 // Also assumes (unsigned char)i == i % 256
81 // Based on the iso-8859-* character sets it is very fast, almost all
82 // lookups involving no more than 2 comparisons.
83 // NO looks ups involded more than 3 compassions.
84 // Also, no division (or modules) is done whatsoever.
95 static const Uni32 npos = (Uni32)(-1);
96 UniItem * overflow_end;
100 UniItem overflow[256]; // you can never be too careful;
105 inline char operator() (Uni32 key, char unknown = '?') const;
106 bool insert(Uni32 key, char value);
109 void FromUniLookup::reset()
111 for (unsigned i = 0; i != 256*4; ++i)
113 overflow_end = overflow;
116 inline char FromUniLookup::operator() (Uni32 k, char unknown) const
118 const UniItem * i = data + (unsigned char)k * 4;
120 if (i->key == k) return i->value;
122 if (i->key == k) return i->value;
124 if (i->key == k) return i->value;
126 if (i->key == k) return i->value;
128 if (i->key == npos) return unknown;
130 for(i = overflow; i != overflow_end; ++i)
131 if (i->key == k) return i->value;
136 bool FromUniLookup::insert(Uni32 k, char v)
138 UniItem * i = data + (unsigned char)k * 4;
140 while (i != e && i->key != npos) {
146 for(i = overflow; i != overflow_end; ++i)
147 if (i->key == k) return false;
154 //////////////////////////////////////////////////////////////////////
165 char operator[] (char key) const {return data[(unsigned char)key];}
166 bool insert(char key, char value);
169 void CharLookup::reset() {
170 for (int i = 0; i != 256; ++i)
174 bool CharLookup::insert(char key, char value)
176 if (data[(unsigned char)key] != -1)
178 data[(unsigned char)key] = value;
182 //////////////////////////////////////////////////////////////////////
190 static const unsigned struct_size;
196 T data[1]; // hack for data[]
200 const unsigned NormTable<T>::struct_size = sizeof(NormTable<T>) - 1;
202 template <class T, class From>
205 const typename T::To * to;
207 NormLookupRet(const typename T::To * t, From * l)
211 template <class T, class From>
212 static inline NormLookupRet<T,From> norm_lookup(const NormTable<T> * d,
213 From * s, From * stop,
214 const typename T::To * def,
219 const T * i = d->data + (static_cast<typename T::From>(*s) & d->mask);
221 if (i->from == static_cast<typename T::From>(*s)) {
223 // really tail recursion
224 if (i->to[1] != T::to_non_char) {def = i->to; prev = s;}
225 d = (const NormTable<T> *)(i->sub_table);
229 return NormLookupRet<T,From>(i->to, s);
233 if (i >= d->end) break;
237 return NormLookupRet<T,From>(def, prev);
241 void free_norm_table(NormTable<T> * d)
243 for (T * cur = d->data; cur != d->end; ++cur) {
245 free_norm_table<T>(static_cast<NormTable<T> *>(cur->sub_table));
250 struct FromUniNormEntry
256 static const From from_non_char = (From)(-1);
257 static const To to_non_char = 0x10;
258 static const unsigned max_to = 4;
262 __attribute__ ((aligned (16)))
266 struct ToUniNormEntry
272 static const From from_non_char = 0x10;
273 static const To to_non_char = 0x10;
274 static const unsigned max_to = 3;
278 __attribute__ ((aligned (16)))
282 //////////////////////////////////////////////////////////////////////
287 PosibErr<void> read_in_char_data (const Config & config,
290 FromUniLookup & from)
295 String dir1,dir2,file_name;
296 fill_data_dir(&config, dir1, dir2);
297 find_file(file_name,dir1,dir2,encoding,".cset");
300 PosibErrBase err = data.open(file_name, "r");
303 snprintf(mesg, 300, _("This could also mean that the file \"%s\" could not be opened for reading or does not exist."),
305 return make_err(unknown_encoding, encoding, mesg);
312 p = get_nb_line(data, line);
314 for (chr = 0; chr != 256; ++chr) {
315 p = get_nb_line(data, line);
316 if (strtoul(p, 0, 16) != chr)
317 return make_err(bad_file_format, file_name);
318 uni = strtoul(p + 3, 0, 16);
320 from.insert(uni, chr);
326 //////////////////////////////////////////////////////////////////////
337 Tally(int s, int * d) : size(s), mask(s - 1), max(0), data(d) {
338 memset(data, 0, sizeof(int)*size);
340 void add(Uni32 chr) {
341 Uni32 p = chr & mask;
343 if (data[p] > max) max = data[p];
348 static PosibErr< NormTable<T> * > create_norm_table(IStream & in, String & buf)
350 const char * p = get_nb_line(in, buf);
353 int size = strtoul(p, (char **)&p, 10);
354 VARARRAY(T, d, size);
355 memset(d, 0, sizeof(T) * size);
356 int sz = 1 << (unsigned)floor(log(size <= 1 ? 1.0 : size - 1)/log(2.0));
357 VARARRAY(int, tally0_d, sz); Tally tally0(sz, tally0_d);
358 VARARRAY(int, tally1_d, sz*2); Tally tally1(sz*2, tally1_d);
359 VARARRAY(int, tally2_d, sz*4); Tally tally2(sz*4, tally2_d);
361 while (p = get_nb_line(in, buf), *p != '.') {
362 Uni32 f = strtoul(p, (char **)&p, 16);
363 cur->from = static_cast<typename T::From>(f);
364 assert(f == cur->from);
377 Uni32 t = strtoul(p, (char **)&p, 16);
379 assert(i < d->max_to);
380 cur->to[i] = static_cast<typename T::To>(t);
381 assert(t == static_cast<Uni32>(cur->to[i]));
385 cur->to[1] = T::to_non_char;
388 if (*p == '/') cur->sub_table = create_norm_table<T>(in,buf);
391 assert(cur - d == size);
392 Tally * which = &tally0;
393 if (which->max > tally1.max) which = &tally1;
394 if (which->max > tally2.max) which = &tally2;
395 NormTable<T> * final = (NormTable<T> *)calloc(1, NormTable<T>::struct_size +
396 sizeof(T) * which->size * which->max);
397 memset(final, 0, NormTable<T>::struct_size + sizeof(T) * which->size * which->max);
398 final->mask = which->size - 1;
399 final->height = which->size;
400 final->width = which->max;
401 final->end = final->data + which->size * which->max;
403 for (cur = d; cur != d + size; ++cur) {
404 T * dest = final->data + (cur->from & final->mask);
405 while (dest->from != 0) dest += final->height;
407 if (dest->from == 0) dest->from = T::from_non_char;
409 for (T * dest = final->data; dest < final->end; dest += final->height) {
410 if (dest->from == 0 || (dest->from == T::from_non_char && dest->to[0] == 0)) {
411 dest->from = T::from_non_char;
412 dest->to[0] = T::to_non_char;
418 PosibErr<NormTables *> NormTables::get_new(const String & encoding,
419 const Config * config)
421 String dir1,dir2,file_name;
422 fill_data_dir(config, dir1, dir2);
423 find_file(file_name,dir1,dir2,encoding,".cmap");
426 PosibErrBase err = in.open(file_name, "r");
429 snprintf(mesg, 300, _("This could also mean that the file \"%s\" could not be opened for reading or does not exist."),
431 return make_err(unknown_encoding, encoding, mesg); // FIXME
434 NormTables * d = new NormTables;
439 assert (l == "INTERNAL");
443 d->internal = create_norm_table<FromUniNormEntry>(in, l);
446 assert (l == "STRICT");
447 char * p = get_nb_line(in, l);
450 d->strict_d = create_norm_table<FromUniNormEntry>(in, l);
451 d->strict = d->strict_d;
455 assert(strcmp(p, "INTERNAL") == 0);
456 d->strict = d->internal;
458 while (get_nb_line(in, l)) {
460 d->to_uni.push_back(ToUniTable());
461 ToUniTable & e = d->to_uni.back();
462 e.name.resize(l.size());
463 for (unsigned i = 0; i != l.size(); ++i)
464 e.name[i] = asc_tolower(l[i]);
465 char * p = get_nb_line(in, l);
468 e.ptr = e.data = create_norm_table<ToUniNormEntry>(in,l);
472 for (char * q = p; *q; ++q) *q = asc_tolower(*q);
473 Vector<ToUniTable>::iterator i = d->to_uni.begin();
474 while (i->name != p && i != d->to_uni.end()) ++i;
475 assert(i != d->to_uni.end());
483 NormTables::~NormTables()
485 free_norm_table<FromUniNormEntry>(internal);
487 free_norm_table<FromUniNormEntry>(strict_d);
488 for (unsigned i = 0; i != to_uni.size(); ++i) {
490 free_norm_table<ToUniNormEntry>(to_uni[i].data);
494 //////////////////////////////////////////////////////////////////////
495 //////////////////////////////////////////////////////////////////////
499 //////////////////////////////////////////////////////////////////////
500 //////////////////////////////////////////////////////////////////////
503 bool operator== (const Convert & rhs, const Convert & lhs)
505 return strcmp(rhs.in_code(), lhs.in_code()) == 0
506 && strcmp(rhs.out_code(), lhs.out_code()) == 0;
509 //////////////////////////////////////////////////////////////////////
511 // Trivial Conversion
514 template <typename Chr>
515 struct DecodeDirect : public Decode
517 void decode(const char * in0, int size, FilterCharVector & out) const {
518 const Chr * in = reinterpret_cast<const Chr *>(in0);
523 const Chr * stop = reinterpret_cast<const Chr *>(in0 +size);
524 for (;in != stop; ++in)
528 PosibErr<void> decode_ec(const char * in0, int size,
529 FilterCharVector & out, ParmStr) const {
530 DecodeDirect::decode(in0, size, out);
535 template <typename Chr>
536 struct EncodeDirect : public Encode
538 void encode(const FilterChar * in, const FilterChar * stop,
539 CharVector & out) const {
540 for (; in != stop; ++in) {
542 if (c != in->chr) c = '?';
543 out.append(&c, sizeof(Chr));
546 PosibErr<void> encode_ec(const FilterChar * in, const FilterChar * stop,
547 CharVector & out, ParmStr orig) const {
548 for (; in != stop; ++in) {
552 snprintf(m, 70, _("The Unicode code point U+%04X is unsupported."), in->chr);
553 return make_err(invalid_string, orig, m);
555 out.append(&c, sizeof(Chr));
559 bool encode(FilterChar * &, FilterChar * &, FilterCharVector &) const {
564 template <typename Chr>
565 struct ConvDirect : public DirectConv
567 void convert(const char * in0, int size, CharVector & out) const {
569 const Chr * in = reinterpret_cast<const Chr *>(in0);
570 for (;*in != 0; ++in)
571 out.append(in, sizeof(Chr));
573 out.append(in0, size);
576 PosibErr<void> convert_ec(const char * in0, int size,
577 CharVector & out, ParmStr) const {
578 ConvDirect::convert(in0, size, out);
583 //////////////////////////////////////////////////////////////////////
588 struct DecodeLookup : public Decode
591 PosibErr<void> init(ParmStr code, const Config & c) {
592 FromUniLookup unused;
593 return read_in_char_data(c, code, lookup, unused);
595 void decode(const char * in, int size, FilterCharVector & out) const {
598 out.append(lookup[*in]);
600 const char * stop = in + size;
601 for (;in != stop; ++in)
602 out.append(lookup[*in]);
605 PosibErr<void> decode_ec(const char * in, int size,
606 FilterCharVector & out, ParmStr) const {
607 DecodeLookup::decode(in, size, out);
612 struct DecodeNormLookup : public Decode
614 typedef ToUniNormEntry E;
616 DecodeNormLookup(NormTable<E> * d) : data(d) {}
617 // must be null terminated
618 // FIXME: Why must it be null terminated?
619 void decode(const char * in, int size, FilterCharVector & out) const {
620 const char * stop = in + size; // will word even if size -1
623 if (size == -1) break;
627 NormLookupRet<E,const char> ret = norm_lookup<E>(data, in, stop, 0, in);
628 for (unsigned i = 0; ret.to[i] && i < E::max_to; ++i)
629 out.append(ret.to[i]);
634 PosibErr<void> decode_ec(const char * in, int size,
635 FilterCharVector & out, ParmStr) const {
636 DecodeNormLookup::decode(in, size, out);
641 struct EncodeLookup : public Encode
643 FromUniLookup lookup;
644 PosibErr<void> init(ParmStr code, const Config & c)
646 return read_in_char_data(c, code, unused, lookup);}
647 void encode(const FilterChar * in, const FilterChar * stop,
648 CharVector & out) const {
649 for (; in != stop; ++in) {
650 out.append(lookup(*in));
653 PosibErr<void> encode_ec(const FilterChar * in, const FilterChar * stop,
654 CharVector & out, ParmStr orig) const {
655 for (; in != stop; ++in) {
656 char c = lookup(*in, '\0');
657 if (c == '\0' && in->chr != 0) {
659 snprintf(m, 70, _("The Unicode code point U+%04X is unsupported."), in->chr);
660 return make_err(invalid_string, orig, m);
666 bool encode(FilterChar * & in0, FilterChar * & stop,
667 FilterCharVector & out) const {
668 FilterChar * in = in0;
669 for (; in != stop; ++in)
675 struct EncodeNormLookup : public Encode
677 typedef FromUniNormEntry E;
679 EncodeNormLookup(NormTable<E> * d) : data(d) {}
680 // *stop must equal 0
681 void encode(const FilterChar * in, const FilterChar * stop,
682 CharVector & out) const {
688 NormLookupRet<E,const FilterChar> ret = norm_lookup<E>(data, in, stop, (const byte *)"?", in);
689 for (unsigned i = 0; i < E::max_to && ret.to[i]; ++i)
690 out.append(ret.to[i]);
695 PosibErr<void> encode_ec(const FilterChar * in, const FilterChar * stop,
696 CharVector & out, ParmStr orig) const {
702 NormLookupRet<E,const FilterChar> ret = norm_lookup<E>(data, in, stop, 0, in);
705 snprintf(m, 70, _("The Unicode code point U+%04X is unsupported."), in->chr);
706 return make_err(invalid_string, orig, m);
708 for (unsigned i = 0; i < E::max_to && ret.to[i]; ++i)
709 out.append(ret.to[i]);
715 bool encode(FilterChar * & in, FilterChar * & stop,
716 FilterCharVector & buf) const {
720 buf.append(FilterChar(0));
723 NormLookupRet<E,FilterChar> ret = norm_lookup<E>(data, in, stop, (const byte *)"?", in);
724 const FilterChar * end = ret.last + 1;
726 for (; in != end; ++in) width += in->width;
727 buf.append(FilterChar(ret.to[0], width));
728 for (unsigned i = 1; i < E::max_to && ret.to[i]; ++i) {
729 buf.append(FilterChar(ret.to[i],0));
740 //////////////////////////////////////////////////////////////////////
745 #define get_check_next \
746 if (in == stop) goto error; \
748 if ((c & 0xC0) != 0x80) goto error; \
754 static inline FilterChar from_utf8 (const char * & in, const char * stop,
755 Uni32 err_char = '?')
757 Uni32 u = (Uni32)(-1);
758 FilterChar::Width w = 1;
760 // the first char is guaranteed not to be off the end
764 while (in != stop && (c & 0xC0) == 0x80) {c = *in; ++in; ++w;}
765 if ((c & 0x80) == 0x00) { // 1-byte wide
767 } else if ((c & 0xE0) == 0xC0) { // 2-byte wide
770 } else if ((c & 0xF0) == 0xE0) { // 3-byte wide
774 } else if ((c & 0xF8) == 0xF0) { // 4-byte wide
783 return FilterChar(u, w);
785 return FilterChar(err_char, w);
788 static inline void to_utf8 (FilterChar in, CharVector & out)
790 FilterChar::Chr c = in;
795 else if (c < 0x800) {
796 out.append(0xC0 | (c>>6));
797 out.append(0x80 | (c & 0x3F));
799 else if (c < 0x10000) {
800 out.append(0xE0 | (c>>12));
801 out.append(0x80 | (c>>6 & 0x3F));
802 out.append(0x80 | (c & 0x3F));
804 else if (c < 0x200000) {
805 out.append(0xF0 | (c>>18));
806 out.append(0x80 | (c>>12 & 0x3F));
807 out.append(0x80 | (c>>6 & 0x3F));
808 out.append(0x80 | (c & 0x3F));
812 struct DecodeUtf8 : public Decode
815 void decode(const char * in, int size, FilterCharVector & out) const {
816 if (size == 0) return; // if size == 0 then while loop cause SIGSEGV
817 const char * stop = in + size; // this is OK even if size == -1
818 while (*in && in != stop) {
819 out.append(from_utf8(in, stop));
822 PosibErr<void> decode_ec(const char * in, int size,
823 FilterCharVector & out, ParmStr orig) const {
824 const char * begin = in;
825 const char * stop = in + size; // this is OK even if size == -1
826 while (*in && in != stop) {
827 FilterChar c = from_utf8(in, stop, (Uni32)-1);
828 if (c == (Uni32)-1) {
830 snprintf(m, 70, _("Invalid UTF-8 sequence at position %ld."), (long)(in - begin));
831 return make_err(invalid_string, orig, m);
839 struct EncodeUtf8 : public Encode
841 FromUniLookup lookup;
842 void encode(const FilterChar * in, const FilterChar * stop,
843 CharVector & out) const {
844 for (; in != stop; ++in) {
848 PosibErr<void> encode_ec(const FilterChar * in, const FilterChar * stop,
849 CharVector & out, ParmStr) const {
850 for (; in != stop; ++in) {
857 //////////////////////////////////////////////////////////////////////
862 static GlobalCache<Decode> decode_cache("decode");
863 static GlobalCache<Encode> encode_cache("encode");
864 static GlobalCache<NormTables> norm_tables_cache("norm_tables");
866 //////////////////////////////////////////////////////////////////////
868 // new_aspell_convert
871 void Convert::generic_convert(const char * in, int size, CharVector & out)
874 decode_->decode(in, size, buf_);
875 FilterChar * start = buf_.pbegin();
876 FilterChar * stop = buf_.pend();
878 filter.process(start, stop);
879 encode_->encode(start, stop, out);
882 const char * fix_encoding_str(ParmStr enc, String & buf)
885 buf.reserve(enc.size() + 1);
886 for (size_t i = 0; i != enc.size(); ++i)
887 buf.push_back(asc_tolower(enc[i]));
889 if (strncmp(buf.c_str(), "iso8859", 7) == 0)
890 buf.insert(buf.begin() + 3, '-'); // For backwards compatibility
892 if (buf == "ascii" || buf == "ansi_x3.4-1968")
894 else if (buf == "machine unsigned 16" || buf == "utf-16")
896 else if (buf == "machine unsigned 32" || buf == "utf-32")
902 bool ascii_encoding(const Config & c, ParmStr enc0)
904 if (enc0.empty()) return true;
905 if (enc0 == "ANSI_X3.4-1968"
906 || enc0 == "ASCII" || enc0 == "ascii") return true;
908 const char * enc = fix_encoding_str(enc0, buf);
909 if (strcmp(enc, "utf-8") == 0
910 || strcmp(enc, "ucs-2") == 0
911 || strcmp(enc, "ucs-4") == 0) return false;
912 String dir1,dir2,file_name;
913 fill_data_dir(&c, dir1, dir2);
914 file_name << dir1 << enc << ".cset";
915 if (file_exists(file_name)) return false;
916 if (dir1 == dir2) return true;
918 file_name << dir2 << enc << ".cset";
919 return !file_exists(file_name);
922 PosibErr<Convert *> internal_new_convert(const Config & c,
929 in = fix_encoding_str(in, in_s);
932 out = fix_encoding_str(out, out_s);
934 if (if_needed && in == out) return 0;
936 StackPtr<Convert> conv(new Convert);
939 RET_ON_ERR(conv->init(c, in, out)); break;
941 RET_ON_ERR(conv->init_norm_from(c, in, out)); break;
943 RET_ON_ERR(conv->init_norm_to(c, in, out)); break;
945 return conv.release();
948 PosibErr<Decode *> Decode::get_new(const String & key, const Config * c)
950 StackPtr<Decode> ptr;
951 if (key == "iso-8859-1")
952 ptr.reset(new DecodeDirect<Uni8>);
953 else if (key == "ucs-2")
954 ptr.reset(new DecodeDirect<Uni16>);
955 else if (key == "ucs-4")
956 ptr.reset(new DecodeDirect<Uni32>);
957 else if (key == "utf-8")
958 ptr.reset(new DecodeUtf8);
960 ptr.reset(new DecodeLookup);
961 RET_ON_ERR(ptr->init(key, *c));
963 return ptr.release();
966 PosibErr<Encode *> Encode::get_new(const String & key, const Config * c)
968 StackPtr<Encode> ptr;
969 if (key == "iso-8859-1")
970 ptr.reset(new EncodeDirect<Uni8>);
971 else if (key == "ucs-2")
972 ptr.reset(new EncodeDirect<Uni16>);
973 else if (key == "ucs-4")
974 ptr.reset(new EncodeDirect<Uni32>);
975 else if (key == "utf-8")
976 ptr.reset(new EncodeUtf8);
978 ptr.reset(new EncodeLookup);
979 RET_ON_ERR(ptr->init(key, *c));
981 return ptr.release();
984 Convert::~Convert() {}
986 PosibErr<void> Convert::init(const Config & c, ParmStr in, ParmStr out)
988 RET_ON_ERR(setup(decode_c, &decode_cache, &c, in));
989 decode_ = decode_c.get();
990 RET_ON_ERR(setup(encode_c, &encode_cache, &c, out));
991 encode_ = encode_c.get();
996 conv_ = new ConvDirect<Uni16>;
997 } else if (in == "ucs-4") {
998 conv_ = new ConvDirect<Uni32>;
1000 conv_ = new ConvDirect<char>;
1005 RET_ON_ERR(conv_->init(decode_, encode_, c));
1011 PosibErr<void> Convert::init_norm_from(const Config & c, ParmStr in, ParmStr out)
1013 if (!c.retrieve_bool("normalize") && !c.retrieve_bool("norm-required"))
1014 return init(c,in,out);
1016 RET_ON_ERR(setup(norm_tables_, &norm_tables_cache, &c, out));
1018 RET_ON_ERR(setup(decode_c, &decode_cache, &c, in));
1019 decode_ = decode_c.get();
1021 if (c.retrieve_bool("norm-strict")) {
1022 encode_s = new EncodeNormLookup(norm_tables_->strict);
1025 encode_->key += ":strict";
1027 encode_s = new EncodeNormLookup(norm_tables_->internal);
1030 encode_->key += ":internal";
1037 PosibErr<void> Convert::init_norm_to(const Config & c, ParmStr in, ParmStr out)
1039 String norm_form = c.retrieve("norm-form");
1040 if ((!c.retrieve_bool("normalize") || norm_form == "none")
1041 && !c.retrieve_bool("norm-required"))
1042 return init(c,in,out);
1043 if (norm_form == "none" && c.retrieve_bool("norm-required"))
1046 RET_ON_ERR(setup(norm_tables_, &norm_tables_cache, &c, in));
1048 RET_ON_ERR(setup(encode_c, &encode_cache, &c, out));
1049 encode_ = encode_c.get();
1051 NormTables::ToUni::const_iterator i = norm_tables_->to_uni.begin();
1052 for (; i != norm_tables_->to_uni.end() && i->name != norm_form; ++i);
1053 assert(i != norm_tables_->to_uni.end());
1055 decode_s = new DecodeNormLookup(i->ptr);
1058 decode_->key += ':';
1059 decode_->key += i->name;
1066 PosibErr<void> MBLen::setup(const Config &, ParmStr enc0)
1069 const char * enc = fix_encoding_str(enc0,buf);
1070 if (strcmp(enc, "utf-8") == 0) encoding = UTF8;
1071 else if (strcmp(enc, "ucs-2") == 0) encoding = UCS2;
1072 else if (strcmp(enc, "ucs-4") == 0) encoding = UCS4;
1073 else encoding = Other;
1077 unsigned MBLen::operator()(const char * str, const char * stop)
1084 for (; str != stop; ++str) {
1085 if ((*str & 0x80) == 0 || (*str & 0xC0) == 0xC0) ++size;
1089 return (stop - str)/2;
1091 return (stop - str)/4;