+2000-07-23 Bruno Haible <haible@clisp.cons.org>
+
+ * wctype/wchar-lookup.h: New file.
+ * wctype/iswctype.c: Include "wchar-lookup.h".
+ (__iswctype): Support alternate locale format with 3-level tables.
+ * wctype/iswctype_l.c (__iswctype_l): Likewise.
+ * wctype/towctrans.c (__towctrans): Likewise.
+ * wctype/towctrans_l.c (__towctrans_l): Likewise.
+ * wctype/wcfuncs.c: Include "wchar-lookup.h".
+ (__ctype32_wctype, __ctype32_wctrans): Declare external.
+ (__iswalnum, __iswalpha, __iswcntrl, __iswdigit, __iswlower,
+ __iswgraph, __iswprint, __iswpunct, __iswspace, __iswupper,
+ __iswxdigit, towlower, towupper): Support alternate locale format
+ with 3-level tables.
+ * wctype/wcextra.c (iswblank): Likewise.
+ * wctype/wcfuncs_l.c: Include "wchar-lookup.h".
+ (__iswalnum_l, __iswalpha_l, __iswcntrl_l, __iswdigit_l, __iswlower_l,
+ __iswgraph_l, __iswprint_l, __iswpunct_l, __iswspace_l, __iswupper_l,
+ __iswxdigit_l, __towlower_l, __towupper_l): Support alternate locale
+ format with 3-level tables.
+ * wctype/wcextra_l.c (__iswblank_l): Likewise.
+ * wctype/wctype.c (__wctype): Likewise. In the alternate locale
+ format, return a 3-level table pointer.
+ * wctype/wctype_l.c (__wctype_l): Likewise.
+ * wctype/wctrans.c (wctrans): Likewise.
+ * wctype/wctype.h (__ISwupper, __ISwlower, __ISwalpha, __ISwdigit,
+ __ISwxdigit, __ISwspace, __ISwprint, __ISwgraph, __ISwblank,
+ __ISwcntrl, __ISwpunct, __ISwalnum): New enum values.
+ (iswctype): Remove macro definition.
+ * wcsmbs/wcwidth.h: Include "wchar-lookup.h".
+ (internal_wcwidth): Support alternate locale format with 3-level
+ tables.
+ * locale/langinfo.h (_NL_CTYPE_CLASS_OFFSET, _NL_CTYPE_MAP_OFFSET):
+ New nl_items.
+ * locale/categories.def (_NL_CTYPE_CLASS_OFFSET, _NL_CTYPE_MAP_OFFSET):
+ Define them as being type "word".
+ * locale/C-ctype.c (_nl_C_LC_CTYPE): Add initializers for them.
+ * ctype/ctype-info.c (__ctype32_wctype, __ctype32_wctrans,
+ __ctype32_width): New exported variables.
+ * locale/lc-ctype.c (_nl_postload_ctype): Initialize them in the
+ alternate locale format. Don't initialize __ctype_names and
+ __ctype_width in the alternate locale format.
+ * locale/programs/localedef.h (oldstyle_tables): New declaration.
+ * locale/programs/localedef.c (oldstyle_tables): New variable.
+ (OPT_OLDSTYLE): New macro.
+ (options): Add --old-style option.
+ (parse_opt): Handle --old-style option.
+ * locale/programs/ld-ctype.c (locale_ctype_t): Add class_offset,
+ map_offset, class_3level, map_3level, width_3level members.
+ (ctype_output): Support for alternate locale format: Computation of
+ nelems changes. _NL_CTYPE_TOUPPER32, _NL_CTYPE_TOLOWER32 and
+ _NL_CTYPE_CLASS32 only 256 characters. _NL_CTYPE_NAMES empty.
+ New fields _NL_CTYPE_CLASS_OFFSET, _NL_CTYPE_MAP_OFFSET. Field
+ _NL_CTYPE_WIDTH now contains the three-level table. Extra elems
+ now contain both class and map tables.
+ (struct wctype_table): New type.
+ (wctype_table_init, wctype_table_add, wctype_table_finalize): New
+ functions.
+ (struct wcwidth_table): New type.
+ (wcwidth_table_init, wcwidth_table_add, wcwidth_table_finalize): New
+ functions.
+ (struct wctrans_table): New type.
+ (wctrans_table_init, wctrans_table_add, wctrans_table_finalize): New
+ functions.
+ (allocate_arrays): Support for alternate locale format: Set
+ plane_size and plane_cnt to 0. Restrict ctype->ctype32_b to the first
+ 256 characters. Compute ctype->class_3level. Restrict ctype->map32[idx]
+ to the first 256 characters. Compute ctype->map_3level. Set
+ ctype->class_offset and ctype->map_offset. Compute ctype->width_3level
+ instead of ctype->width.
+
2000-07-24 Ulrich Drepper <drepper@redhat.com>
* libio/iogetwline.c (_IO_getwline_info): Use wide character
const __uint32_t *__ctype32_toupper = b (__uint32_t, toupper, 128);
const __uint32_t *__ctype_names = b (__uint32_t, names, 0);
const unsigned char *__ctype_width = b (unsigned char, width, 0);
+const char *__ctype32_wctype[12];
+const char *__ctype32_wctrans[2];
+const char *__ctype32_width;
{ string: "ANSI_X3.4-1968" },
{ string: (const char *) &_nl_C_LC_CTYPE_toupper[128] },
{ string: (const char *) &_nl_C_LC_CTYPE_tolower[128] },
+ { word: 0 },
+ { word: 0 },
{ word: 1 },
{ string: "0" },
{ string: "1" },
DEFINE_ELEMENT (_NL_CTYPE_CODESET_NAME, "charmap", std, string)
DEFINE_ELEMENT (_NL_CTYPE_TOUPPER32, "ctype-toupper32", std, string)
DEFINE_ELEMENT (_NL_CTYPE_TOLOWER32, "ctype-tolower32", std, string)
+ DEFINE_ELEMENT (_NL_CTYPE_CLASS_OFFSET, "ctype-class-offset", std, word)
+ DEFINE_ELEMENT (_NL_CTYPE_MAP_OFFSET, "ctype-map-offset", std, word)
DEFINE_ELEMENT (_NL_CTYPE_INDIGITS_MB_LEN, "ctype-indigits_mb-len", std, word)
DEFINE_ELEMENT (_NL_CTYPE_INDIGITS0_MB, "ctype-indigits0_mb", std, string)
DEFINE_ELEMENT (_NL_CTYPE_INDIGITS1_MB, "ctype-indigits1_mb", std, string)
#define CODESET CODESET
_NL_CTYPE_TOUPPER32,
_NL_CTYPE_TOLOWER32,
+ _NL_CTYPE_CLASS_OFFSET,
+ _NL_CTYPE_MAP_OFFSET,
_NL_CTYPE_INDIGITS_MB_LEN,
_NL_CTYPE_INDIGITS0_MB,
_NL_CTYPE_INDIGITS1_MB,
/* Define current locale data for LC_CTYPE category.
- Copyright (C) 1995, 1996, 1997, 1998, 1999 Free Software Foundation, Inc.
+ Copyright (C) 1995-1999, 2000 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
extern const unsigned char *__ctype_width;
extern const uint32_t *__ctype32_toupper;
extern const uint32_t *__ctype32_tolower;
+ extern const char *__ctype32_wctype[12];
+ extern const char *__ctype32_wctrans[2];
+ extern const char *__ctype32_width;
__ctype_b = current (uint16_t, CLASS, 128);
__ctype_toupper = current (uint32_t, TOUPPER, 128);
__ctype32_b = current (uint32_t, CLASS32, 0);
__ctype32_toupper = current (uint32_t, TOUPPER32, 0);
__ctype32_tolower = current (uint32_t, TOLOWER32, 0);
- __ctype_names = current (uint32_t, NAMES, 0);
- __ctype_width = current (unsigned char, WIDTH, 0);
+ if (_NL_CURRENT_WORD (LC_CTYPE, _NL_CTYPE_HASH_SIZE) != 0)
+ {
+ /* Old locale format. */
+ __ctype_names = current (uint32_t, NAMES, 0);
+ __ctype_width = current (unsigned char, WIDTH, 0);
+ }
+ else
+ {
+ /* New locale format. */
+ size_t offset, cnt;
+
+ offset = _NL_CURRENT_WORD (LC_CTYPE, _NL_CTYPE_CLASS_OFFSET);
+ for (cnt = 0; cnt < 12; cnt++)
+ __ctype32_wctype[cnt] =
+ _nl_current_LC_CTYPE->values[offset + cnt].string;
+
+ offset = _NL_CURRENT_WORD (LC_CTYPE, _NL_CTYPE_MAP_OFFSET);
+ for (cnt = 0; cnt < 2; cnt++)
+ __ctype32_wctrans[cnt] =
+ _nl_current_LC_CTYPE->values[offset + cnt].string;
+
+ __ctype32_width = current (char, WIDTH, 0);
+ }
}
size_t class_collection_max;
size_t class_collection_act;
uint32_t class_done;
+ uint32_t class_offset;
struct charseq **mbdigits;
size_t mbdigits_act;
size_t map_collection_nr;
size_t last_map_idx;
int tomap_done[MAX_NR_CHARMAP];
+ uint32_t map_offset;
/* Transliteration information. */
const char *translit_copy_locale;
uint32_t *names;
uint32_t **map;
uint32_t **map32;
+ struct iovec *class_3level;
+ struct iovec *map_3level;
uint32_t *class_name_ptr;
uint32_t *map_name_ptr;
unsigned char *width;
+ struct iovec width_3level;
uint32_t mb_cur_max;
const char *codeset_name;
uint32_t *translit_from_idx;
static const char nulbytes[4] = { 0, 0, 0, 0 };
struct locale_ctype_t *ctype = locale->categories[LC_CTYPE].ctype;
const size_t nelems = (_NL_ITEM_INDEX (_NL_NUM_LC_CTYPE)
- + (ctype->map_collection_nr - 2));
+ + (oldstyle_tables
+ ? (ctype->map_collection_nr - 2)
+ : (ctype->nr_charclass + ctype->map_collection_nr)));
struct iovec iov[2 + nelems + ctype->nr_charclass
+ ctype->map_collection_nr + 2];
struct locale_file data;
CTYPE_DATA (_NL_CTYPE_TOUPPER32,
ctype->map32[0],
- (ctype->plane_size * ctype->plane_cnt)
+ (oldstyle_tables ? ctype->plane_size * ctype->plane_cnt : 256)
* sizeof (uint32_t));
CTYPE_DATA (_NL_CTYPE_TOLOWER32,
ctype->map32[1],
- (ctype->plane_size * ctype->plane_cnt)
+ (oldstyle_tables ? ctype->plane_size * ctype->plane_cnt : 256)
* sizeof (uint32_t));
CTYPE_DATA (_NL_CTYPE_CLASS32,
ctype->ctype32_b,
- (ctype->plane_size * ctype->plane_cnt
- * sizeof (char_class32_t)));
+ (oldstyle_tables ? ctype->plane_size * ctype->plane_cnt : 256)
+ * sizeof (char_class32_t));
CTYPE_DATA (_NL_CTYPE_NAMES,
- ctype->names, (ctype->plane_size * ctype->plane_cnt
- * sizeof (uint32_t)));
+ ctype->names,
+ (oldstyle_tables ? ctype->plane_size * ctype->plane_cnt : 0)
+ * sizeof (uint32_t));
+
+ CTYPE_DATA (_NL_CTYPE_CLASS_OFFSET,
+ &ctype->class_offset, sizeof (uint32_t));
+
+ CTYPE_DATA (_NL_CTYPE_MAP_OFFSET,
+ &ctype->map_offset, sizeof (uint32_t));
CTYPE_DATA (_NL_CTYPE_TRANSLIT_TAB_SIZE,
&ctype->translit_idx_size, sizeof (uint32_t));
break;
CTYPE_DATA (_NL_CTYPE_WIDTH,
- ctype->width,
- (ctype->plane_size * ctype->plane_cnt + 3) & ~3ul);
+ (oldstyle_tables
+ ? ctype->width
+ : ctype->width_3level.iov_base),
+ (oldstyle_tables
+ ? (ctype->plane_size * ctype->plane_cnt + 3) & ~3ul
+ : ctype->width_3level.iov_len));
CTYPE_DATA (_NL_CTYPE_MB_CUR_MAX,
&ctype->mb_cur_max, sizeof (uint32_t));
else
{
/* Handle extra maps. */
- size_t nr = (elem - _NL_ITEM_INDEX (_NL_NUM_LC_CTYPE)) + 2;
+ if (oldstyle_tables)
+ {
+ size_t nr = (elem - _NL_ITEM_INDEX (_NL_NUM_LC_CTYPE)) + 2;
- iov[2 + elem + offset].iov_base = ctype->map32[nr];
- iov[2 + elem + offset].iov_len = ((ctype->plane_size
- * ctype->plane_cnt)
- * sizeof (uint32_t));
+ iov[2 + elem + offset].iov_base = ctype->map32[nr];
+ iov[2 + elem + offset].iov_len = ((ctype->plane_size
+ * ctype->plane_cnt)
+ * sizeof (uint32_t));
- idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len;
+ idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len;
+ }
+ else
+ {
+ size_t nr = elem - _NL_ITEM_INDEX (_NL_NUM_LC_CTYPE);
+ if (nr < ctype->nr_charclass)
+ {
+ iov[2 + elem + offset] = ctype->class_3level[nr];
+ }
+ else
+ {
+ nr -= ctype->nr_charclass;
+ assert (nr < ctype->map_collection_nr);
+ iov[2 + elem + offset] = ctype->map_3level[nr];
+ }
+ idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len;
+ }
}
}
}
+/* Construction of sparse 3-level tables.
+ See wchar-lookup.h for their structure and the meaning of p and q. */
+
+struct wctype_table
+{
+ /* Parameters. */
+ unsigned int p;
+ unsigned int q;
+ /* Working representation. */
+ size_t level1_alloc;
+ size_t level1_size;
+ uint32_t *level1;
+ size_t level2_alloc;
+ size_t level2_size;
+ uint32_t *level2;
+ size_t level3_alloc;
+ size_t level3_size;
+ uint32_t *level3;
+ /* Compressed representation. */
+ size_t result_size;
+ char *result;
+};
+
+/* Initialize. Assumes t->p and t->q have already been set. */
+static inline void
+wctype_table_init (struct wctype_table *t)
+{
+ t->level1_alloc = t->level1_size = 0;
+ t->level2_alloc = t->level2_size = 0;
+ t->level3_alloc = t->level3_size = 0;
+}
+
+/* Add one entry. */
+static void
+wctype_table_add (struct wctype_table *t, uint32_t wc)
+{
+ uint32_t index1 = wc >> (t->q + t->p + 5);
+ uint32_t index2 = (wc >> (t->p + 5)) & ((1 << t->q) - 1);
+ uint32_t index3 = (wc >> 5) & ((1 << t->p) - 1);
+ uint32_t index4 = wc & 0x1f;
+ size_t i, i1, i2;
+
+ if (index1 >= t->level1_size)
+ {
+ if (index1 >= t->level1_alloc)
+ {
+ size_t alloc = 2 * t->level1_alloc;
+ if (alloc <= index1)
+ alloc = index1 + 1;
+ t->level1 = (t->level1_alloc > 0
+ ? (uint32_t *) xrealloc ((char *) t->level1,
+ alloc * sizeof (uint32_t))
+ : (uint32_t *) xmalloc (alloc * sizeof (uint32_t)));
+ t->level1_alloc = alloc;
+ }
+ while (index1 >= t->level1_size)
+ t->level1[t->level1_size++] = ~((uint32_t) 0);
+ }
+
+ if (t->level1[index1] == ~((uint32_t) 0))
+ {
+ if (t->level2_size == t->level2_alloc)
+ {
+ size_t alloc = 2 * t->level2_alloc + 1;
+ t->level2 = (t->level2_alloc > 0
+ ? (uint32_t *) xrealloc ((char *) t->level2,
+ (alloc << t->q) * sizeof (uint32_t))
+ : (uint32_t *) xmalloc ((alloc << t->q) * sizeof (uint32_t)));
+ t->level2_alloc = alloc;
+ }
+ i1 = t->level2_size << t->q;
+ i2 = (t->level2_size + 1) << t->q;
+ for (i = i1; i < i2; i++)
+ t->level2[i] = ~((uint32_t) 0);
+ t->level1[index1] = t->level2_size++;
+ }
+
+ index2 += t->level1[index1] << t->q;
+
+ if (t->level2[index2] == ~((uint32_t) 0))
+ {
+ if (t->level3_size == t->level3_alloc)
+ {
+ size_t alloc = 2 * t->level3_alloc + 1;
+ t->level3 = (t->level3_alloc > 0
+ ? (uint32_t *) xrealloc ((char *) t->level3,
+ (alloc << t->p) * sizeof (uint32_t))
+ : (uint32_t *) xmalloc ((alloc << t->p) * sizeof (uint32_t)));
+ t->level3_alloc = alloc;
+ }
+ i1 = t->level3_size << t->p;
+ i2 = (t->level3_size + 1) << t->p;
+ for (i = i1; i < i2; i++)
+ t->level3[i] = 0;
+ t->level2[index2] = t->level3_size++;
+ }
+
+ index3 += t->level2[index2] << t->p;
+
+ t->level3[index3] |= (uint32_t)1 << index4;
+}
+
+/* Finalize and shrink. */
+static void
+wctype_table_finalize (struct wctype_table *t)
+{
+ size_t i, j, k;
+ uint32_t reorder3[t->level3_size];
+ uint32_t reorder2[t->level2_size];
+ uint32_t level1_offset, level2_offset, level3_offset;
+
+ /* Uniquify level3 blocks. */
+ k = 0;
+ for (j = 0; j < t->level3_size; j++)
+ {
+ for (i = 0; i < k; i++)
+ if (memcmp (&t->level3[i << t->p], &t->level3[j << t->p],
+ (1 << t->p) * sizeof (uint32_t)) == 0)
+ break;
+ /* Relocate block j to block i. */
+ reorder3[j] = i;
+ if (i == k)
+ {
+ if (i != j)
+ memcpy (&t->level3[i << t->p], &t->level3[j << t->p],
+ (1 << t->p) * sizeof (uint32_t));
+ k++;
+ }
+ }
+ t->level3_size = k;
+
+ for (i = 0; i < (t->level2_size << t->q); i++)
+ if (t->level2[i] != ~((uint32_t) 0))
+ t->level2[i] = reorder3[t->level2[i]];
+
+ /* Uniquify level2 blocks. */
+ k = 0;
+ for (j = 0; j < t->level2_size; j++)
+ {
+ for (i = 0; i < k; i++)
+ if (memcmp (&t->level2[i << t->q], &t->level2[j << t->q],
+ (1 << t->q) * sizeof (uint32_t)) == 0)
+ break;
+ /* Relocate block j to block i. */
+ reorder2[j] = i;
+ if (i == k)
+ {
+ if (i != j)
+ memcpy (&t->level2[i << t->q], &t->level2[j << t->q],
+ (1 << t->q) * sizeof (uint32_t));
+ k++;
+ }
+ }
+ t->level2_size = k;
+
+ for (i = 0; i < t->level1_size; i++)
+ if (t->level1[i] != ~((uint32_t) 0))
+ t->level1[i] = reorder2[t->level1[i]];
+
+ /* Create and fill the resulting compressed representation. */
+ t->result_size =
+ 5 * sizeof (uint32_t)
+ + t->level1_size * sizeof (uint32_t)
+ + (t->level2_size << t->q) * sizeof (uint32_t)
+ + (t->level3_size << t->p) * sizeof (uint32_t);
+ t->result = (char *) xmalloc (t->result_size);
+
+ level1_offset =
+ 5 * sizeof (uint32_t);
+ level2_offset =
+ 5 * sizeof (uint32_t)
+ + t->level1_size * sizeof (uint32_t);
+ level3_offset =
+ 5 * sizeof (uint32_t)
+ + t->level1_size * sizeof (uint32_t)
+ + (t->level2_size << t->q) * sizeof (uint32_t);
+
+ ((uint32_t *) t->result)[0] = t->q + t->p + 5;
+ ((uint32_t *) t->result)[1] = t->level1_size;
+ ((uint32_t *) t->result)[2] = t->p + 5;
+ ((uint32_t *) t->result)[3] = (1 << t->q) - 1;
+ ((uint32_t *) t->result)[4] = (1 << t->p) - 1;
+
+ for (i = 0; i < t->level1_size; i++)
+ ((uint32_t *) (t->result + level1_offset))[i] =
+ (t->level1[i] == ~((uint32_t) 0)
+ ? 0
+ : (t->level1[i] << t->q) * sizeof (uint32_t) + level2_offset);
+
+ for (i = 0; i < (t->level2_size << t->q); i++)
+ ((uint32_t *) (t->result + level2_offset))[i] =
+ (t->level2[i] == ~((uint32_t) 0)
+ ? 0
+ : (t->level2[i] << t->p) * sizeof (uint32_t) + level3_offset);
+
+ for (i = 0; i < (t->level3_size << t->p); i++)
+ ((uint32_t *) (t->result + level3_offset))[i] = t->level3[i];
+
+ if (t->level1_alloc > 0)
+ free (t->level1);
+ if (t->level2_alloc > 0)
+ free (t->level2);
+ if (t->level3_alloc > 0)
+ free (t->level3);
+}
+
+struct wcwidth_table
+{
+ /* Parameters. */
+ unsigned int p;
+ unsigned int q;
+ /* Working representation. */
+ size_t level1_alloc;
+ size_t level1_size;
+ uint32_t *level1;
+ size_t level2_alloc;
+ size_t level2_size;
+ uint32_t *level2;
+ size_t level3_alloc;
+ size_t level3_size;
+ uint8_t *level3;
+ /* Compressed representation. */
+ size_t result_size;
+ char *result;
+};
+
+/* Initialize. Assumes t->p and t->q have already been set. */
+static inline void
+wcwidth_table_init (struct wcwidth_table *t)
+{
+ t->level1_alloc = t->level1_size = 0;
+ t->level2_alloc = t->level2_size = 0;
+ t->level3_alloc = t->level3_size = 0;
+}
+
+/* Add one entry. */
+static void
+wcwidth_table_add (struct wcwidth_table *t, uint32_t wc, uint8_t width)
+{
+ uint32_t index1 = wc >> (t->q + t->p);
+ uint32_t index2 = (wc >> t->p) & ((1 << t->q) - 1);
+ uint32_t index3 = wc & ((1 << t->p) - 1);
+ size_t i, i1, i2;
+
+ if (width == 0xff)
+ return;
+
+ if (index1 >= t->level1_size)
+ {
+ if (index1 >= t->level1_alloc)
+ {
+ size_t alloc = 2 * t->level1_alloc;
+ if (alloc <= index1)
+ alloc = index1 + 1;
+ t->level1 = (t->level1_alloc > 0
+ ? (uint32_t *) xrealloc ((char *) t->level1,
+ alloc * sizeof (uint32_t))
+ : (uint32_t *) xmalloc (alloc * sizeof (uint32_t)));
+ t->level1_alloc = alloc;
+ }
+ while (index1 >= t->level1_size)
+ t->level1[t->level1_size++] = ~((uint32_t) 0);
+ }
+
+ if (t->level1[index1] == ~((uint32_t) 0))
+ {
+ if (t->level2_size == t->level2_alloc)
+ {
+ size_t alloc = 2 * t->level2_alloc + 1;
+ t->level2 = (t->level2_alloc > 0
+ ? (uint32_t *) xrealloc ((char *) t->level2,
+ (alloc << t->q) * sizeof (uint32_t))
+ : (uint32_t *) xmalloc ((alloc << t->q) * sizeof (uint32_t)));
+ t->level2_alloc = alloc;
+ }
+ i1 = t->level2_size << t->q;
+ i2 = (t->level2_size + 1) << t->q;
+ for (i = i1; i < i2; i++)
+ t->level2[i] = ~((uint32_t) 0);
+ t->level1[index1] = t->level2_size++;
+ }
+
+ index2 += t->level1[index1] << t->q;
+
+ if (t->level2[index2] == ~((uint32_t) 0))
+ {
+ if (t->level3_size == t->level3_alloc)
+ {
+ size_t alloc = 2 * t->level3_alloc + 1;
+ t->level3 = (t->level3_alloc > 0
+ ? (uint8_t *) xrealloc ((char *) t->level3,
+ (alloc << t->p) * sizeof (uint8_t))
+ : (uint8_t *) xmalloc ((alloc << t->p) * sizeof (uint8_t)));
+ t->level3_alloc = alloc;
+ }
+ i1 = t->level3_size << t->p;
+ i2 = (t->level3_size + 1) << t->p;
+ for (i = i1; i < i2; i++)
+ t->level3[i] = 0xff;
+ t->level2[index2] = t->level3_size++;
+ }
+
+ index3 += t->level2[index2] << t->p;
+
+ t->level3[index3] = width;
+}
+
+/* Finalize and shrink. */
+static void
+wcwidth_table_finalize (struct wcwidth_table *t)
+{
+ size_t i, j, k;
+ uint32_t reorder3[t->level3_size];
+ uint32_t reorder2[t->level2_size];
+ uint32_t level1_offset, level2_offset, level3_offset, last_offset;
+
+ /* Uniquify level3 blocks. */
+ k = 0;
+ for (j = 0; j < t->level3_size; j++)
+ {
+ for (i = 0; i < k; i++)
+ if (memcmp (&t->level3[i << t->p], &t->level3[j << t->p],
+ (1 << t->p) * sizeof (uint8_t)) == 0)
+ break;
+ /* Relocate block j to block i. */
+ reorder3[j] = i;
+ if (i == k)
+ {
+ if (i != j)
+ memcpy (&t->level3[i << t->p], &t->level3[j << t->p],
+ (1 << t->p) * sizeof (uint8_t));
+ k++;
+ }
+ }
+ t->level3_size = k;
+
+ for (i = 0; i < (t->level2_size << t->q); i++)
+ if (t->level2[i] != ~((uint32_t) 0))
+ t->level2[i] = reorder3[t->level2[i]];
+
+ /* Uniquify level2 blocks. */
+ k = 0;
+ for (j = 0; j < t->level2_size; j++)
+ {
+ for (i = 0; i < k; i++)
+ if (memcmp (&t->level2[i << t->q], &t->level2[j << t->q],
+ (1 << t->q) * sizeof (uint32_t)) == 0)
+ break;
+ /* Relocate block j to block i. */
+ reorder2[j] = i;
+ if (i == k)
+ {
+ if (i != j)
+ memcpy (&t->level2[i << t->q], &t->level2[j << t->q],
+ (1 << t->q) * sizeof (uint32_t));
+ k++;
+ }
+ }
+ t->level2_size = k;
+
+ for (i = 0; i < t->level1_size; i++)
+ if (t->level1[i] != ~((uint32_t) 0))
+ t->level1[i] = reorder2[t->level1[i]];
+
+ /* Create and fill the resulting compressed representation. */
+ last_offset =
+ 5 * sizeof (uint32_t)
+ + t->level1_size * sizeof (uint32_t)
+ + (t->level2_size << t->q) * sizeof (uint32_t)
+ + (t->level3_size << t->p) * sizeof (uint8_t);
+ t->result_size = (last_offset + 3) & ~3ul;
+ t->result = (char *) xmalloc (t->result_size);
+
+ level1_offset =
+ 5 * sizeof (uint32_t);
+ level2_offset =
+ 5 * sizeof (uint32_t)
+ + t->level1_size * sizeof (uint32_t);
+ level3_offset =
+ 5 * sizeof (uint32_t)
+ + t->level1_size * sizeof (uint32_t)
+ + (t->level2_size << t->q) * sizeof (uint32_t);
+
+ ((uint32_t *) t->result)[0] = t->q + t->p;
+ ((uint32_t *) t->result)[1] = t->level1_size;
+ ((uint32_t *) t->result)[2] = t->p;
+ ((uint32_t *) t->result)[3] = (1 << t->q) - 1;
+ ((uint32_t *) t->result)[4] = (1 << t->p) - 1;
+
+ for (i = 0; i < t->level1_size; i++)
+ ((uint32_t *) (t->result + level1_offset))[i] =
+ (t->level1[i] == ~((uint32_t) 0)
+ ? 0
+ : (t->level1[i] << t->q) * sizeof (uint32_t) + level2_offset);
+
+ for (i = 0; i < (t->level2_size << t->q); i++)
+ ((uint32_t *) (t->result + level2_offset))[i] =
+ (t->level2[i] == ~((uint32_t) 0)
+ ? 0
+ : (t->level2[i] << t->p) * sizeof (uint8_t) + level3_offset);
+
+ for (i = 0; i < (t->level3_size << t->p); i++)
+ ((uint8_t *) (t->result + level3_offset))[i] = t->level3[i];
+
+ if (last_offset < t->result_size)
+ memset (t->result + last_offset, 0, t->result_size - last_offset);
+
+ if (t->level1_alloc > 0)
+ free (t->level1);
+ if (t->level2_alloc > 0)
+ free (t->level2);
+ if (t->level3_alloc > 0)
+ free (t->level3);
+}
+
+struct wctrans_table
+{
+ /* Parameters. */
+ unsigned int p;
+ unsigned int q;
+ /* Working representation. */
+ size_t level1_alloc;
+ size_t level1_size;
+ uint32_t *level1;
+ size_t level2_alloc;
+ size_t level2_size;
+ uint32_t *level2;
+ size_t level3_alloc;
+ size_t level3_size;
+ int32_t *level3;
+ /* Compressed representation. */
+ size_t result_size;
+ char *result;
+};
+
+/* Initialize. Assumes t->p and t->q have already been set. */
+static inline void
+wctrans_table_init (struct wctrans_table *t)
+{
+ t->level1_alloc = t->level1_size = 0;
+ t->level2_alloc = t->level2_size = 0;
+ t->level3_alloc = t->level3_size = 0;
+}
+
+/* Add one entry. */
+static void
+wctrans_table_add (struct wctrans_table *t, uint32_t wc, uint32_t mapped_wc)
+{
+ uint32_t index1 = wc >> (t->q + t->p);
+ uint32_t index2 = (wc >> t->p) & ((1 << t->q) - 1);
+ uint32_t index3 = wc & ((1 << t->p) - 1);
+ int32_t value = (int32_t) mapped_wc - (int32_t) wc;
+ size_t i, i1, i2;
+
+ if (value == 0)
+ return;
+
+ if (index1 >= t->level1_size)
+ {
+ if (index1 >= t->level1_alloc)
+ {
+ size_t alloc = 2 * t->level1_alloc;
+ if (alloc <= index1)
+ alloc = index1 + 1;
+ t->level1 = (t->level1_alloc > 0
+ ? (uint32_t *) xrealloc ((char *) t->level1,
+ alloc * sizeof (uint32_t))
+ : (uint32_t *) xmalloc (alloc * sizeof (uint32_t)));
+ t->level1_alloc = alloc;
+ }
+ while (index1 >= t->level1_size)
+ t->level1[t->level1_size++] = ~((uint32_t) 0);
+ }
+
+ if (t->level1[index1] == ~((uint32_t) 0))
+ {
+ if (t->level2_size == t->level2_alloc)
+ {
+ size_t alloc = 2 * t->level2_alloc + 1;
+ t->level2 = (t->level2_alloc > 0
+ ? (uint32_t *) xrealloc ((char *) t->level2,
+ (alloc << t->q) * sizeof (uint32_t))
+ : (uint32_t *) xmalloc ((alloc << t->q) * sizeof (uint32_t)));
+ t->level2_alloc = alloc;
+ }
+ i1 = t->level2_size << t->q;
+ i2 = (t->level2_size + 1) << t->q;
+ for (i = i1; i < i2; i++)
+ t->level2[i] = ~((uint32_t) 0);
+ t->level1[index1] = t->level2_size++;
+ }
+
+ index2 += t->level1[index1] << t->q;
+
+ if (t->level2[index2] == ~((uint32_t) 0))
+ {
+ if (t->level3_size == t->level3_alloc)
+ {
+ size_t alloc = 2 * t->level3_alloc + 1;
+ t->level3 = (t->level3_alloc > 0
+ ? (int32_t *) xrealloc ((char *) t->level3,
+ (alloc << t->p) * sizeof (int32_t))
+ : (int32_t *) xmalloc ((alloc << t->p) * sizeof (int32_t)));
+ t->level3_alloc = alloc;
+ }
+ i1 = t->level3_size << t->p;
+ i2 = (t->level3_size + 1) << t->p;
+ for (i = i1; i < i2; i++)
+ t->level3[i] = 0;
+ t->level2[index2] = t->level3_size++;
+ }
+
+ index3 += t->level2[index2] << t->p;
+
+ t->level3[index3] = value;
+}
+
+/* Finalize and shrink. */
+static void
+wctrans_table_finalize (struct wctrans_table *t)
+{
+ size_t i, j, k;
+ uint32_t reorder3[t->level3_size];
+ uint32_t reorder2[t->level2_size];
+ uint32_t level1_offset, level2_offset, level3_offset;
+
+ /* Uniquify level3 blocks. */
+ k = 0;
+ for (j = 0; j < t->level3_size; j++)
+ {
+ for (i = 0; i < k; i++)
+ if (memcmp (&t->level3[i << t->p], &t->level3[j << t->p],
+ (1 << t->p) * sizeof (int32_t)) == 0)
+ break;
+ /* Relocate block j to block i. */
+ reorder3[j] = i;
+ if (i == k)
+ {
+ if (i != j)
+ memcpy (&t->level3[i << t->p], &t->level3[j << t->p],
+ (1 << t->p) * sizeof (int32_t));
+ k++;
+ }
+ }
+ t->level3_size = k;
+
+ for (i = 0; i < (t->level2_size << t->q); i++)
+ if (t->level2[i] != ~((uint32_t) 0))
+ t->level2[i] = reorder3[t->level2[i]];
+
+ /* Uniquify level2 blocks. */
+ k = 0;
+ for (j = 0; j < t->level2_size; j++)
+ {
+ for (i = 0; i < k; i++)
+ if (memcmp (&t->level2[i << t->q], &t->level2[j << t->q],
+ (1 << t->q) * sizeof (uint32_t)) == 0)
+ break;
+ /* Relocate block j to block i. */
+ reorder2[j] = i;
+ if (i == k)
+ {
+ if (i != j)
+ memcpy (&t->level2[i << t->q], &t->level2[j << t->q],
+ (1 << t->q) * sizeof (uint32_t));
+ k++;
+ }
+ }
+ t->level2_size = k;
+
+ for (i = 0; i < t->level1_size; i++)
+ if (t->level1[i] != ~((uint32_t) 0))
+ t->level1[i] = reorder2[t->level1[i]];
+
+ /* Create and fill the resulting compressed representation. */
+ t->result_size =
+ 5 * sizeof (uint32_t)
+ + t->level1_size * sizeof (uint32_t)
+ + (t->level2_size << t->q) * sizeof (uint32_t)
+ + (t->level3_size << t->p) * sizeof (int32_t);
+ t->result = (char *) xmalloc (t->result_size);
+
+ level1_offset =
+ 5 * sizeof (uint32_t);
+ level2_offset =
+ 5 * sizeof (uint32_t)
+ + t->level1_size * sizeof (uint32_t);
+ level3_offset =
+ 5 * sizeof (uint32_t)
+ + t->level1_size * sizeof (uint32_t)
+ + (t->level2_size << t->q) * sizeof (uint32_t);
+
+ ((uint32_t *) t->result)[0] = t->q + t->p;
+ ((uint32_t *) t->result)[1] = t->level1_size;
+ ((uint32_t *) t->result)[2] = t->p;
+ ((uint32_t *) t->result)[3] = (1 << t->q) - 1;
+ ((uint32_t *) t->result)[4] = (1 << t->p) - 1;
+
+ for (i = 0; i < t->level1_size; i++)
+ ((uint32_t *) (t->result + level1_offset))[i] =
+ (t->level1[i] == ~((uint32_t) 0)
+ ? 0
+ : (t->level1[i] << t->q) * sizeof (uint32_t) + level2_offset);
+
+ for (i = 0; i < (t->level2_size << t->q); i++)
+ ((uint32_t *) (t->result + level2_offset))[i] =
+ (t->level2[i] == ~((uint32_t) 0)
+ ? 0
+ : (t->level2[i] << t->p) * sizeof (int32_t) + level3_offset);
+
+ for (i = 0; i < (t->level3_size << t->p); i++)
+ ((int32_t *) (t->result + level3_offset))[i] = t->level3[i];
+
+ if (t->level1_alloc > 0)
+ free (t->level1);
+ if (t->level2_alloc > 0)
+ free (t->level2);
+ if (t->level3_alloc > 0)
+ free (t->level3);
+}
+
+
static void
allocate_arrays (struct locale_ctype_t *ctype, struct charmap_t *charmap,
struct repertoire_t *repertoire)
size_t min_total = UINT_MAX;
size_t act_size = 256;
- if (!be_quiet && ctype->charnames_act > 512)
- fputs (_("\
+ if (oldstyle_tables)
+ {
+ if (!be_quiet && ctype->charnames_act > 512)
+ fputs (_("\
Computing table size for character classes might take a while..."),
- stderr);
+ stderr);
- /* While we want to have a small total size we are willing to use a
- little bit larger table if this reduces the number of layers.
- Therefore we add a little penalty to the number of planes.
- Maybe this constant has to be adjusted a bit. */
+ /* While we want to have a small total size we are willing to use a
+ little bit larger table if this reduces the number of layers.
+ Therefore we add a little penalty to the number of planes.
+ Maybe this constant has to be adjusted a bit. */
#define PENALTY 128
- do
- {
- size_t cnt[act_size];
- size_t act_planes = 1;
-
- memset (cnt, '\0', sizeof cnt);
+ do
+ {
+ size_t cnt[act_size];
+ size_t act_planes = 1;
- for (idx = 0; idx < 256; ++idx)
- cnt[idx] = 1;
+ memset (cnt, '\0', sizeof cnt);
- for (idx = 0; idx < ctype->charnames_act; ++idx)
- if (ctype->charnames[idx] >= 256)
- {
- size_t nr = ctype->charnames[idx] % act_size;
+ for (idx = 0; idx < 256; ++idx)
+ cnt[idx] = 1;
- if (++cnt[nr] > act_planes)
+ for (idx = 0; idx < ctype->charnames_act; ++idx)
+ if (ctype->charnames[idx] >= 256)
{
- act_planes = cnt[nr];
- if ((act_size + PENALTY) * act_planes >= min_total)
- break;
+ size_t nr = ctype->charnames[idx] % act_size;
+
+ if (++cnt[nr] > act_planes)
+ {
+ act_planes = cnt[nr];
+ if ((act_size + PENALTY) * act_planes >= min_total)
+ break;
+ }
}
- }
- if ((act_size + PENALTY) * act_planes < min_total)
- {
- min_total = (act_size + PENALTY) * act_planes;
- ctype->plane_size = act_size;
- ctype->plane_cnt = act_planes;
- }
+ if ((act_size + PENALTY) * act_planes < min_total)
+ {
+ min_total = (act_size + PENALTY) * act_planes;
+ ctype->plane_size = act_size;
+ ctype->plane_cnt = act_planes;
+ }
- ++act_size;
- }
- while (act_size < min_total);
+ ++act_size;
+ }
+ while (act_size < min_total);
- if (!be_quiet && ctype->charnames_act > 512)
- fputs (_(" done\n"), stderr);
+ if (!be_quiet && ctype->charnames_act > 512)
+ fputs (_(" done\n"), stderr);
- ctype->names = (uint32_t *) xcalloc (ctype->plane_size
- * ctype->plane_cnt,
- sizeof (uint32_t));
+ ctype->names = (uint32_t *) xcalloc (ctype->plane_size
+ * ctype->plane_cnt,
+ sizeof (uint32_t));
- for (idx = 1; idx < 256; ++idx)
- ctype->names[idx] = idx;
+ for (idx = 1; idx < 256; ++idx)
+ ctype->names[idx] = idx;
- /* Trick: change the 0th entry's name to 1 to mark the cell occupied. */
- ctype->names[0] = 1;
+ /* Trick: change the 0th entry's name to 1 to mark the cell occupied. */
+ ctype->names[0] = 1;
- for (idx = 256; idx < ctype->charnames_act; ++idx)
- {
- size_t nr = (ctype->charnames[idx] % ctype->plane_size);
- size_t depth = 0;
+ for (idx = 256; idx < ctype->charnames_act; ++idx)
+ {
+ size_t nr = (ctype->charnames[idx] % ctype->plane_size);
+ size_t depth = 0;
- while (ctype->names[nr + depth * ctype->plane_size])
- ++depth;
- assert (depth < ctype->plane_cnt);
+ while (ctype->names[nr + depth * ctype->plane_size])
+ ++depth;
+ assert (depth < ctype->plane_cnt);
- ctype->names[nr + depth * ctype->plane_size] = ctype->charnames[idx];
+ ctype->names[nr + depth * ctype->plane_size] = ctype->charnames[idx];
- /* Now for faster access remember the index in the NAMES_B array. */
- ctype->charnames[idx] = nr + depth * ctype->plane_size;
+ /* Now for faster access remember the index in the NAMES_B array. */
+ ctype->charnames[idx] = nr + depth * ctype->plane_size;
+ }
+ ctype->names[0] = 0;
+ }
+ else
+ {
+ ctype->plane_size = 0;
+ ctype->plane_cnt = 0;
+ ctype->names = NULL;
}
- ctype->names[0] = 0;
-
/* You wonder about this amount of memory? This is only because some
users do not manage to address the array with unsigned values or
128 up to 255 below the entry for \0. */
ctype->ctype_b = (char_class_t *) xcalloc (256 + 128,
sizeof (char_class_t));
- ctype->ctype32_b = (char_class32_t *) xcalloc (ctype->plane_size
- * ctype->plane_cnt,
- sizeof (char_class32_t));
+ ctype->ctype32_b = (char_class32_t *)
+ xcalloc ((oldstyle_tables ? ctype->plane_size * ctype->plane_cnt : 256),
+ sizeof (char_class32_t));
+ if (!oldstyle_tables)
+ ctype->class_3level = (struct iovec *)
+ xmalloc (ctype->nr_charclass * sizeof (struct iovec));
/* This is the array accessed using the multibyte string elements. */
for (idx = 0; idx < 256; ++idx)
for (idx = 0; idx < 127; ++idx)
ctype->ctype_b[idx] = ctype->ctype_b[256 + idx];
- /* The 32 bit array contains all characters. */
- for (idx = 0; idx < ctype->class_collection_act; ++idx)
- ctype->ctype32_b[ctype->charnames[idx]] = ctype->class_collection[idx];
+ if (oldstyle_tables)
+ {
+ /* The 32 bit array contains all characters. */
+ for (idx = 0; idx < ctype->class_collection_act; ++idx)
+ ctype->ctype32_b[ctype->charnames[idx]] = ctype->class_collection[idx];
+ }
+ else
+ {
+ /* The 32 bit array contains all characters < 0x100. */
+ for (idx = 0; idx < ctype->class_collection_act; ++idx)
+ if (ctype->charnames[idx] < 0x100)
+ ctype->ctype32_b[ctype->charnames[idx]] = ctype->class_collection[idx];
+ }
+
+ if (!oldstyle_tables)
+ {
+ size_t nr;
+
+ for (nr = 0; nr < ctype->nr_charclass; nr++)
+ {
+ struct wctype_table t;
+
+ t.p = 4; /* or: 5 */
+ t.q = 7; /* or: 6 */
+ wctype_table_init (&t);
+
+ for (idx = 0; idx < ctype->class_collection_act; ++idx)
+ if (ctype->class_collection[idx] & _ISwbit (nr))
+ wctype_table_add (&t, ctype->charnames[idx]);
+
+ wctype_table_finalize (&t);
+
+ if (verbose)
+ fprintf (stderr, _("%s: table for class \"%s\": %lu bytes\n"),
+ "LC_CTYPE", ctype->classnames[nr],
+ (unsigned long int) t.result_size);
+
+ ctype->class_3level[nr].iov_base = t.result;
+ ctype->class_3level[nr].iov_len = t.result_size;
+ }
+ }
/* Room for table of mappings. */
ctype->map = (uint32_t **) xmalloc (2 * sizeof (uint32_t *));
ctype->map32 = (uint32_t **) xmalloc (ctype->map_collection_nr
- * sizeof (uint32_t *));
+ * sizeof (uint32_t *));
+ if (!oldstyle_tables)
+ ctype->map_3level = (struct iovec *)
+ xmalloc (ctype->map_collection_nr * sizeof (struct iovec));
/* Fill in all mappings. */
for (idx = 0; idx < 2; ++idx)
unsigned int idx2;
/* Allocate table. */
- ctype->map32[idx] = (uint32_t *) xmalloc (ctype->plane_size
- * ctype->plane_cnt
- * sizeof (uint32_t));
+ ctype->map32[idx] = (uint32_t *)
+ xmalloc ((oldstyle_tables ? ctype->plane_size * ctype->plane_cnt : 256)
+ * sizeof (uint32_t));
/* Copy default value (identity mapping). */
- memcpy (ctype->map32[idx], ctype->names,
- ctype->plane_size * ctype->plane_cnt * sizeof (uint32_t));
+ if (oldstyle_tables)
+ memcpy (ctype->map32[idx], ctype->names,
+ ctype->plane_size * ctype->plane_cnt * sizeof (uint32_t));
+ else
+ for (idx2 = 0; idx2 < 256; ++idx2)
+ ctype->map32[idx][idx2] = idx2;
/* Copy values from collection. */
for (idx2 = 0; idx2 < 256; ++idx2)
if (ctype->map_collection[idx][idx2] != 0)
ctype->map32[idx][idx2] = ctype->map_collection[idx][idx2];
- while (idx2 < ctype->map_collection_act[idx])
+ if (oldstyle_tables)
+ while (idx2 < ctype->map_collection_act[idx])
+ {
+ if (ctype->map_collection[idx][idx2] != 0)
+ ctype->map32[idx][ctype->charnames[idx2]] =
+ ctype->map_collection[idx][idx2];
+ ++idx2;
+ }
+ }
+
+ if (!oldstyle_tables)
+ {
+ size_t nr;
+
+ for (nr = 0; nr < ctype->map_collection_nr; nr++)
{
- if (ctype->map_collection[idx][idx2] != 0)
- ctype->map32[idx][ctype->charnames[idx2]] =
- ctype->map_collection[idx][idx2];
- ++idx2;
+ struct wctrans_table t;
+
+ t.p = 7;
+ t.q = 9;
+ wctrans_table_init (&t);
+
+ for (idx = 0; idx < ctype->map_collection_act[nr]; ++idx)
+ if (ctype->map_collection[nr][idx] != 0)
+ wctrans_table_add (&t, ctype->charnames[idx],
+ ctype->map_collection[nr][idx]);
+
+ wctrans_table_finalize (&t);
+
+ if (verbose)
+ fprintf (stderr, _("%s: table for map \"%s\": %lu bytes\n"),
+ "LC_CTYPE", ctype->mapnames[nr],
+ (unsigned long int) t.result_size);
+
+ ctype->map_3level[nr].iov_base = t.result;
+ ctype->map_3level[nr].iov_len = t.result_size;
}
}
ctype->map_name_ptr = (uint32_t *) xmalloc (ctype->map_collection_nr
* sizeof (uint32_t));
- /* Array for width information. Because the expected width are very
- small we use only one single byte. This save space and we need
- not provide the information twice with both endianesses. */
- width_table_size = (ctype->plane_size * ctype->plane_cnt + 3) & ~3ul;
- ctype->width = (unsigned char *) xmalloc (width_table_size);
+ if (oldstyle_tables)
+ {
+ ctype->class_offset = 0; /* not really used */
+ ctype->map_offset = 0; /* not really used */
+ }
+ else
+ {
+ ctype->class_offset = _NL_ITEM_INDEX (_NL_NUM_LC_CTYPE);
+ ctype->map_offset = ctype->class_offset + ctype->nr_charclass;
+ }
- /* Initialize with -1. */
- memset (ctype->width, '\xff', width_table_size);
- if (charmap->width_rules != NULL)
+ /* Array for width information. Because the expected width are very
+ small we use only one single byte. This saves space. */
+ if (oldstyle_tables)
{
- size_t cnt;
+ width_table_size = (ctype->plane_size * ctype->plane_cnt + 3) & ~3ul;
+ ctype->width = (unsigned char *) xmalloc (width_table_size);
- for (cnt = 0; cnt < charmap->nwidth_rules; ++cnt)
+ /* Initialize with -1. */
+ memset (ctype->width, '\xff', width_table_size);
+ if (charmap->width_rules != NULL)
{
- unsigned char bytes[charmap->mb_cur_max];
- int nbytes = charmap->width_rules[cnt].from->nbytes;
+ size_t cnt;
- /* We have the range of character for which the width is
- specified described using byte sequences of the multibyte
- charset. We have to convert this to UCS4 now. And we
- cannot simply convert the beginning and the end of the
- sequence, we have to iterate over the byte sequence and
- convert it for every single character. */
- memcpy (bytes, charmap->width_rules[cnt].from->bytes, nbytes);
-
- while (nbytes < charmap->width_rules[cnt].to->nbytes
- || memcmp (bytes, charmap->width_rules[cnt].to->bytes,
- nbytes) <= 0)
+ for (cnt = 0; cnt < charmap->nwidth_rules; ++cnt)
{
- /* Find the UCS value for `bytes'. */
- int inner;
- uint32_t wch;
- struct charseq *seq =
- charmap_find_symbol (charmap, bytes, nbytes);
-
- if (seq == NULL)
- wch = ILLEGAL_CHAR_VALUE;
- else if (seq->ucs4 != UNINITIALIZED_CHAR_VALUE)
- wch = seq->ucs4;
- else
- wch = repertoire_find_value (ctype->repertoire, seq->name,
- strlen (seq->name));
-
- if (wch != ILLEGAL_CHAR_VALUE)
+ unsigned char bytes[charmap->mb_cur_max];
+ int nbytes = charmap->width_rules[cnt].from->nbytes;
+
+ /* We have the range of character for which the width is
+ specified described using byte sequences of the multibyte
+ charset. We have to convert this to UCS4 now. And we
+ cannot simply convert the beginning and the end of the
+ sequence, we have to iterate over the byte sequence and
+ convert it for every single character. */
+ memcpy (bytes, charmap->width_rules[cnt].from->bytes, nbytes);
+
+ while (nbytes < charmap->width_rules[cnt].to->nbytes
+ || memcmp (bytes, charmap->width_rules[cnt].to->bytes,
+ nbytes) <= 0)
{
- /* Store the value. */
- size_t nr = wch % ctype->plane_size;
- size_t depth = 0;
+ /* Find the UCS value for `bytes'. */
+ int inner;
+ uint32_t wch;
+ struct charseq *seq =
+ charmap_find_symbol (charmap, bytes, nbytes);
+
+ if (seq == NULL)
+ wch = ILLEGAL_CHAR_VALUE;
+ else if (seq->ucs4 != UNINITIALIZED_CHAR_VALUE)
+ wch = seq->ucs4;
+ else
+ wch = repertoire_find_value (ctype->repertoire, seq->name,
+ strlen (seq->name));
- while (ctype->names[nr + depth * ctype->plane_size] != wch)
+ if (wch != ILLEGAL_CHAR_VALUE)
{
- ++depth;
- assert (depth < ctype->plane_cnt);
+ /* Store the value. */
+ size_t nr = wch % ctype->plane_size;
+ size_t depth = 0;
+
+ while (ctype->names[nr + depth * ctype->plane_size] != wch)
+ {
+ ++depth;
+ assert (depth < ctype->plane_cnt);
+ }
+
+ ctype->width[nr + depth * ctype->plane_size]
+ = charmap->width_rules[cnt].width;
}
- ctype->width[nr + depth * ctype->plane_size]
- = charmap->width_rules[cnt].width;
+ /* "Increment" the bytes sequence. */
+ inner = nbytes - 1;
+ while (inner >= 0 && bytes[inner] == 0xff)
+ --inner;
+
+ if (inner < 0)
+ {
+ /* We have to extend the byte sequence. */
+ if (nbytes >= charmap->width_rules[cnt].to->nbytes)
+ break;
+
+ bytes[0] = 1;
+ memset (&bytes[1], 0, nbytes);
+ ++nbytes;
+ }
+ else
+ {
+ ++bytes[inner];
+ while (++inner < nbytes)
+ bytes[inner] = 0;
+ }
}
+ }
+ }
- /* "Increment" the bytes sequence. */
- inner = nbytes - 1;
- while (inner >= 0 && bytes[inner] == 0xff)
- --inner;
+ /* Now set all the other characters of the character set to the
+ default width. */
+ curs = NULL;
+ while (iterate_table (&charmap->char_table, &curs, &key, &len, &vdata) == 0)
+ {
+ struct charseq *data = (struct charseq *) vdata;
+ size_t nr;
+ size_t depth;
- if (inner < 0)
- {
- /* We have to extend the byte sequence. */
- if (nbytes >= charmap->width_rules[cnt].to->nbytes)
- break;
+ if (data->ucs4 == UNINITIALIZED_CHAR_VALUE)
+ data->ucs4 = repertoire_find_value (ctype->repertoire,
+ data->name, len);
- bytes[0] = 1;
- memset (&bytes[1], 0, nbytes);
- ++nbytes;
- }
- else
+ if (data->ucs4 != ILLEGAL_CHAR_VALUE)
+ {
+ nr = data->ucs4 % ctype->plane_size;
+ depth = 0;
+
+ while (ctype->names[nr + depth * ctype->plane_size] != data->ucs4)
{
- ++bytes[inner];
- while (++inner < nbytes)
- bytes[inner] = 0;
+ ++depth;
+ assert (depth < ctype->plane_cnt);
}
+
+ if (ctype->width[nr + depth * ctype->plane_size]
+ == (unsigned char) '\xff')
+ ctype->width[nr + depth * ctype->plane_size] =
+ charmap->width_default;
}
}
}
-
- /* Now set all the other characters of the character set to the
- default width. */
- curs = NULL;
- while (iterate_table (&charmap->char_table, &curs, &key, &len, &vdata) == 0)
+ else
{
- struct charseq *data = (struct charseq *) vdata;
- size_t nr;
- size_t depth;
+ struct wcwidth_table t;
- if (data->ucs4 == UNINITIALIZED_CHAR_VALUE)
- data->ucs4 = repertoire_find_value (ctype->repertoire,
- data->name, len);
+ t.p = 7;
+ t.q = 9;
+ wcwidth_table_init (&t);
- if (data->ucs4 != ILLEGAL_CHAR_VALUE)
+ /* First set all the characters of the character set to the default width. */
+ curs = NULL;
+ while (iterate_table (&charmap->char_table, &curs, &key, &len, &vdata) == 0)
+ {
+ struct charseq *data = (struct charseq *) vdata;
+
+ if (data->ucs4 == UNINITIALIZED_CHAR_VALUE)
+ data->ucs4 = repertoire_find_value (ctype->repertoire,
+ data->name, len);
+
+ if (data->ucs4 != ILLEGAL_CHAR_VALUE)
+ wcwidth_table_add (&t, data->ucs4, charmap->width_default);
+ }
+
+ /* Now add the explicitly specified widths. */
+ if (charmap->width_rules != NULL)
{
- nr = data->ucs4 % ctype->plane_size;
- depth = 0;
+ size_t cnt;
- while (ctype->names[nr + depth * ctype->plane_size] != data->ucs4)
+ for (cnt = 0; cnt < charmap->nwidth_rules; ++cnt)
{
- ++depth;
- assert (depth < ctype->plane_cnt);
- }
+ unsigned char bytes[charmap->mb_cur_max];
+ int nbytes = charmap->width_rules[cnt].from->nbytes;
+
+ /* We have the range of character for which the width is
+ specified described using byte sequences of the multibyte
+ charset. We have to convert this to UCS4 now. And we
+ cannot simply convert the beginning and the end of the
+ sequence, we have to iterate over the byte sequence and
+ convert it for every single character. */
+ memcpy (bytes, charmap->width_rules[cnt].from->bytes, nbytes);
+
+ while (nbytes < charmap->width_rules[cnt].to->nbytes
+ || memcmp (bytes, charmap->width_rules[cnt].to->bytes,
+ nbytes) <= 0)
+ {
+ /* Find the UCS value for `bytes'. */
+ int inner;
+ uint32_t wch;
+ struct charseq *seq =
+ charmap_find_symbol (charmap, bytes, nbytes);
+
+ if (seq == NULL)
+ wch = ILLEGAL_CHAR_VALUE;
+ else if (seq->ucs4 != UNINITIALIZED_CHAR_VALUE)
+ wch = seq->ucs4;
+ else
+ wch = repertoire_find_value (ctype->repertoire, seq->name,
+ strlen (seq->name));
+
+ if (wch != ILLEGAL_CHAR_VALUE)
+ /* Store the value. */
+ wcwidth_table_add (&t, wch, charmap->width_rules[cnt].width);
+
+ /* "Increment" the bytes sequence. */
+ inner = nbytes - 1;
+ while (inner >= 0 && bytes[inner] == 0xff)
+ --inner;
- if (ctype->width[nr + depth * ctype->plane_size]
- == (unsigned char) '\xff')
- ctype->width[nr + depth * ctype->plane_size] =
- charmap->width_default;
+ if (inner < 0)
+ {
+ /* We have to extend the byte sequence. */
+ if (nbytes >= charmap->width_rules[cnt].to->nbytes)
+ break;
+
+ bytes[0] = 1;
+ memset (&bytes[1], 0, nbytes);
+ ++nbytes;
+ }
+ else
+ {
+ ++bytes[inner];
+ while (++inner < nbytes)
+ bytes[inner] = 0;
+ }
+ }
+ }
}
+
+ wcwidth_table_finalize (&t);
+
+ if (verbose)
+ fprintf (stderr, _("%s: table for width: %lu bytes\n"),
+ "LC_CTYPE", (unsigned long int) t.result_size);
+
+ ctype->width_3level.iov_base = t.result;
+ ctype->width_3level.iov_len = t.result_size;
}
/* Set MB_CUR_MAX. */
/* If not zero suppress warnings and information messages. */
int be_quiet;
+/* If not zero, produce old-style hash table instead of 3-level access tables. */
+int oldstyle_tables;
+
/* If not zero force output even if warning were issued. */
static int force_output;
#define OPT_POSIX 1
#define OPT_QUIET 2
+#define OPT_OLDSTYLE 3
/* Definitions of arguments for argp functions. */
static const struct argp_option options[] =
{ NULL, 0, NULL, 0, N_("Output control:") },
{ "force", 'c', NULL, 0,
N_("Create output even if warning messages were issued") },
+ { "old-style", OPT_OLDSTYLE, NULL, 0, N_("Create old-style tables") },
{ "posix", OPT_POSIX, NULL, 0, N_("Be strictly POSIX conform") },
{ "quiet", OPT_QUIET, NULL, 0,
N_("Suppress warnings and information messages") },
case OPT_POSIX:
posix_conformance = 1;
break;
+ case OPT_OLDSTYLE:
+ oldstyle_tables = 1;
+ break;
case 'c':
force_output = 1;
break;
/* Global variables of the localedef program. */
extern int verbose;
extern int be_quiet;
+extern int oldstyle_tables;
extern const char *repertoire_global;
2000-07-24 Ulrich Drepper <drepper@redhat.com>
+ * tst-wctype.input: Add more input text.
+ * tst-wctype.c: Test more classes.
+
* Makefile: Add rules to build, run, and distribute tst-langinfo.
* tst-langinfo.c: New file.
* tst-langinfo.sh: New file.
+/* Test program for iswctype() function in ja_JP locale.
+ Copyright (C) 2000 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+ Contributed by Ulrich Drepper <drepper@cygnus.com>.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Library General Public License as
+ published by the Free Software Foundation; either version 2 of the
+ License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Library General Public License for more details.
+
+ You should have received a copy of the GNU Library General Public
+ License along with the GNU C Library; see the file COPYING.LIB. If not,
+ write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ Boston, MA 02111-1307, USA. */
+
#include <error.h>
#include <locale.h>
#include <stdio.h>
}
}
+ wct = wctype ("jkata");
+ if (wct == 0)
+ error (EXIT_FAILURE, 0, "jkata: no such character class");
+
+ if (fgetws (buf, sizeof (buf) / sizeof (buf[0]), stdin) != NULL)
+ {
+ int n;
+
+ wprintf (L"buf[] = \"%ls\"\n", buf);
+
+ result = 0;
+
+ for (n = 0; buf[n] != L'\0'; ++n)
+ {
+ wprintf (L"jkata(U%04lx = %lc) = %d\n", (long) buf[n], buf[n],
+ iswctype (buf[n], wct));
+ result |= ((buf[n] < 0xff && iswctype (buf[n], wct))
+ || (buf[n] > 0xff && !iswctype (buf[n], wct)));
+ }
+ }
+
+ wct = wctype ("jdigit");
+ if (wct == 0)
+ error (EXIT_FAILURE, 0, "jdigit: no such character class");
+
+ if (fgetws (buf, sizeof (buf) / sizeof (buf[0]), stdin) != NULL)
+ {
+ int n;
+
+ wprintf (L"buf[] = \"%ls\"\n", buf);
+
+ result = 0;
+
+ for (n = 0; buf[n] != L'\0'; ++n)
+ {
+ wprintf (L"jdigit(U%04lx = %lc) = %d\n", (long) buf[n], buf[n],
+ iswctype (buf[n], wct));
+ result |= ((buf[n] < 0xff && iswctype (buf[n], wct))
+ || (buf[n] > 0xff && !iswctype (buf[n], wct)));
+ }
+ }
+
+ wct = wctype ("jspace");
+ if (wct == 0)
+ error (EXIT_FAILURE, 0, "jspace: no such character class");
+
+ if (fgetws (buf, sizeof (buf) / sizeof (buf[0]), stdin) != NULL)
+ {
+ int n;
+
+ wprintf (L"buf[] = \"%ls\"\n", buf);
+
+ result = 0;
+
+ for (n = 0; buf[n] != L'\0'; ++n)
+ {
+ wprintf (L"jspace(U%04lx = %lc) = %d\n", (long) buf[n], buf[n],
+ iswctype (buf[n], wct));
+ result |= ((buf[n] < 0xff && iswctype (buf[n], wct))
+ || (buf[n] > 0xff && !iswctype (buf[n], wct)));
+ }
+ }
+
+ wct = wctype ("jkanji");
+ if (wct == 0)
+ error (EXIT_FAILURE, 0, "jkanji: no such character class");
+
+ if (fgetws (buf, sizeof (buf) / sizeof (buf[0]), stdin) != NULL)
+ {
+ int n;
+
+ wprintf (L"buf[] = \"%ls\"\n", buf);
+
+ result = 0;
+
+ for (n = 0; buf[n] != L'\0'; ++n)
+ {
+ wprintf (L"jkanji(U%04lx = %lc) = %d\n", (long) buf[n], buf[n],
+ iswctype (buf[n], wct));
+ result |= ((buf[n] < 0xff && iswctype (buf[n], wct))
+ || (buf[n] > 0xff && !iswctype (buf[n], wct)));
+ }
+ }
+
return result;
}
¤¡¤¢¤£¤¤¤¥¤¦¤§¤¨¤©¤ª¤«¤¬¤Â¤®¤¯¤°¤±¤²¤³¤´¤µ¤¶¤·¤¸¤¹¤ºabcdefghjklmnoprrstuvwxyz
+¥¡¥¢¥£¥¤¥¥¥¦¥§¥¨¥©¥ª¥«¥¬¥Â¥®¥¯¥°¥±¥²¥³¥´¥µ¥¶¥·¥¸¥¹¥ºabcdefghjklmnoprrstuvwxyz
+£°£±£²£³£´£µ£¶£·£¸£¹0123456789
+¡¡ a
+°¡°¢°£°¤°¥°¦°§°¨°©°ª°«°¬°Â°®°¯°°°±°²°³°´°µ°¶°·°¸°¹°ºabcdefghjklmnoprrstuvwxyz
#include <wchar.h>
#include <wctype.h>
#include "../wctype/cname-lookup.h"
+#include "../wctype/wchar-lookup.h"
-/* Array containing width information. */
+/* Tables containing character property information. */
+extern const char *__ctype32_wctype[12];
+
+/* Tables containing width information. */
extern unsigned char *__ctype_width;
+extern const char *__ctype32_width;
/* If the program is compiled without optimization the following declaration
is not visible in the header. */
extern unsigned int *__ctype32_b;
static __inline int
-internal_wcwidth (wint_t ch)
+internal_wcwidth (wint_t wc)
{
- size_t idx;
- unsigned char res;
-
- if (ch == L'\0')
+ if (wc == L'\0')
return 0;
- idx = cname_lookup (ch);
- if (idx == ~((size_t) 0) || (__ctype32_b[idx] & _ISwprint) == 0)
- return -1;
+ if (_NL_CURRENT_WORD (LC_CTYPE, _NL_CTYPE_HASH_SIZE) != 0)
+ {
+ /* Old locale format. */
+ size_t idx;
+ unsigned char res;
+
+ idx = cname_lookup (wc);
+ if (idx == ~((size_t) 0) || (__ctype32_b[idx] & _ISwprint) == 0)
+ return -1;
+
+ res = __ctype_width[idx];
+ return res == (unsigned char) '\xff' ? -1 : (int) res;
+ }
+ else
+ {
+ /* New locale format. */
+ unsigned char res;
+
+ if (wctype_table_lookup (__ctype32_wctype[__ISwprint], wc) == 0)
+ return -1;
- res = __ctype_width[idx];
- return res == (unsigned char) '\xff' ? -1 : (int) res;
+ res = wcwidth_table_lookup (__ctype32_width, wc);
+ return res == (unsigned char) '\xff' ? -1 : (int) res;
+ }
}
-/* Copyright (C) 1996 Free Software Foundation, Inc.
+/* Copyright (C) 1996, 2000 Free Software Foundation, Inc.
This file is part of the GNU C Library.
Contributed by Ulrich Drepper, <drepper@gnu.ai.mit.edu>.
#include <wctype.h>
#include "cname-lookup.h"
+#include "wchar-lookup.h"
extern unsigned int *__ctype32_b;
int
__iswctype (wint_t wc, wctype_t desc)
{
- size_t idx;
-
- idx = cname_lookup (wc);
- if (idx == ~((size_t) 0))
- return 0;
-
- return __ctype32_b[idx] & desc;
+ if (_NL_CURRENT_WORD (LC_CTYPE, _NL_CTYPE_HASH_SIZE) != 0)
+ {
+ /* Old locale format. */
+ size_t idx;
+
+ idx = cname_lookup (wc);
+ if (idx == ~((size_t) 0))
+ return 0;
+
+ return __ctype32_b[idx] & desc;
+ }
+ else
+ {
+ /* If the user passes in an invalid DESC valid (the one returned from
+ `wctype' in case of an error) simply return 0. */
+ if (desc == (wctype_t) 0)
+ return 0;
+
+ /* New locale format. */
+ return wctype_table_lookup ((const char *) desc, wc);
+ }
}
weak_alias (__iswctype, iswctype)
#define USE_IN_EXTENDED_LOCALE_MODEL 1
#include "cname-lookup.h"
+#include "wchar-lookup.h"
int
__iswctype_l (wint_t wc, wctype_t desc, __locale_t locale)
{
- const uint32_t *class32_b;
- size_t idx;
-
- idx = cname_lookup (wc, locale);
- if (idx == ~((size_t) 0))
- return 0;
-
- class32_b = (uint32_t *)
- locale->__locales[LC_CTYPE]->values[_NL_ITEM_INDEX (_NL_CTYPE_CLASS32)].string;
-
- return class32_b[idx] & desc;
+ if (locale->__locales[LC_CTYPE]->values[_NL_ITEM_INDEX (_NL_CTYPE_HASH_SIZE)].word != 0)
+ {
+ /* Old locale format. */
+ const uint32_t *class32_b;
+ size_t idx;
+
+ idx = cname_lookup (wc, locale);
+ if (idx == ~((size_t) 0))
+ return 0;
+
+ class32_b = (uint32_t *)
+ locale->__locales[LC_CTYPE]->values[_NL_ITEM_INDEX (_NL_CTYPE_CLASS32)].string;
+
+ return class32_b[idx] & desc;
+ }
+ else
+ {
+ /* If the user passes in an invalid DESC valid (the one returned from
+ `__wctype_l' in case of an error) simply return 0. */
+ if (desc == (wctype_t) 0)
+ return 0;
+
+ /* New locale format. */
+ return wctype_table_lookup ((const char *) desc, wc);
+ }
}
/* Define the lookup function. */
#include "cname-lookup.h"
+#include "wchar-lookup.h"
wint_t
__towctrans (wint_t wc, wctrans_t desc)
{
- size_t idx;
-
/* If the user passes in an invalid DESC valid (the one returned from
`wctrans' in case of an error) simply return the value. */
if (desc == (wctrans_t) 0)
return wc;
- idx = cname_lookup (wc);
- if (idx == ~((size_t) 0))
- /* Character is not known. Default action is to simply return it. */
- return wc;
-
- return (wint_t) desc[idx];
+ if (_NL_CURRENT_WORD (LC_CTYPE, _NL_CTYPE_HASH_SIZE) != 0)
+ {
+ /* Old locale format. */
+ size_t idx;
+
+ idx = cname_lookup (wc);
+ if (idx == ~((size_t) 0))
+ /* Character is not known. Default action is to simply return it. */
+ return wc;
+
+ return (wint_t) desc[idx];
+ }
+ else
+ {
+ /* New locale format. */
+ return wctrans_table_lookup ((const char *) desc, wc);
+ }
}
weak_alias (__towctrans, towctrans)
/* Map wide character using given mapping and locale.
- Copyright (C) 1996, 1997 Free Software Foundation, Inc.
+ Copyright (C) 1996, 1997, 2000 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
/* Define the lookup function. */
#define USE_IN_EXTENDED_LOCALE_MODEL 1
#include "cname-lookup.h"
+#include "wchar-lookup.h"
wint_t
__towctrans_l (wint_t wc, wctrans_t desc, __locale_t locale)
{
- size_t idx;
-
- idx = cname_lookup (wc, locale);
- if (idx == ~((size_t) 0))
- /* Character is not known. Default action is to simply return it. */
+ /* If the user passes in an invalid DESC valid (the one returned from
+ `__wctrans_l' in case of an error) simply return the value. */
+ if (desc == (wctrans_t) 0)
return wc;
- return (wint_t) desc[idx];
+ if (locale->__locales[LC_CTYPE]->values[_NL_ITEM_INDEX (_NL_CTYPE_HASH_SIZE)].word != 0)
+ {
+ /* Old locale format. */
+ size_t idx;
+
+ idx = cname_lookup (wc, locale);
+ if (idx == ~((size_t) 0))
+ /* Character is not known. Default action is to simply return it. */
+ return wc;
+
+ return (wint_t) desc[idx];
+ }
+ else
+ {
+ /* New locale format. */
+ return wctrans_table_lookup ((const char *) desc, wc);
+ }
}
/* Additional non standardized wide character classification functions.
- Copyright (C) 1997, 1999 Free Software Foundation, Inc.
+ Copyright (C) 1997, 1999, 2000 Free Software Foundation, Inc.
This file is part of the GNU C Library.
Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997.
#include <wctype.h>
#include "cname-lookup.h"
+#include "wchar-lookup.h"
/* If the program is compiled without optimization the following declaration
is not visible in the header. */
extern unsigned int *__ctype32_b;
+/* This is not exported. */
+extern const char *__ctype32_wctype[12];
+
int
(iswblank) (wint_t wc)
{
- size_t idx;
-
- idx = cname_lookup (wc);
- if (idx == ~((size_t) 0))
- return 0;
-
- return __ctype32_b[idx] & _ISwblank;
+ if (_NL_CURRENT_WORD (LC_CTYPE, _NL_CTYPE_HASH_SIZE) != 0)
+ {
+ /* Old locale format. */
+ size_t idx;
+
+ idx = cname_lookup (wc);
+ if (idx == ~((size_t) 0))
+ return 0;
+
+ return __ctype32_b[idx] & _ISwblank;
+ }
+ else
+ {
+ /* New locale format. */
+ return wctype_table_lookup (__ctype32_wctype[__ISwblank], wc);
+ }
}
/* Additional non standardized wide character classification functions.
- Copyright (C) 1997, 1999 Free Software Foundation, Inc.
+ Copyright (C) 1997, 1999, 2000 Free Software Foundation, Inc.
This file is part of the GNU C Library.
Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997.
#define USE_IN_EXTENDED_LOCALE_MODEL 1
#include "cname-lookup.h"
+#include "wchar-lookup.h"
int
(__iswblank_l) (wint_t wc, __locale_t locale)
{
- const unsigned int *class32_b;
- size_t idx;
-
- idx = cname_lookup (wc, locale);
- if (idx == ~((size_t) 0))
- return 0;
-
- class32_b = (uint32_t *)
- locale->__locales[LC_CTYPE]->values[_NL_ITEM_INDEX (_NL_CTYPE_CLASS32)].string;
-
- return class32_b[idx] & _ISwblank;
+ if (locale->__locales[LC_CTYPE]->values[_NL_ITEM_INDEX (_NL_CTYPE_HASH_SIZE)].word != 0)
+ {
+ /* Old locale format. */
+ const uint32_t *class32_b;
+ size_t idx;
+
+ idx = cname_lookup (wc, locale);
+ if (idx == ~((size_t) 0))
+ return 0;
+
+ class32_b = (uint32_t *)
+ locale->__locales[LC_CTYPE]->values[_NL_ITEM_INDEX (_NL_CTYPE_CLASS32)].string;
+
+ return class32_b[idx] & _ISwbit (__ISwblank);
+ }
+ else
+ {
+ /* New locale format. */
+ size_t i = locale->__locales[LC_CTYPE]->values[_NL_ITEM_INDEX (_NL_CTYPE_CLASS_OFFSET)].word + __ISwblank;
+ const char *desc = locale->__locales[LC_CTYPE]->values[i].string;
+ return wctype_table_lookup (desc, wc);
+ }
}
#include <ctype.h> /* For __ctype_tolower and __ctype_toupper. */
#include "cname-lookup.h"
+#include "wchar-lookup.h"
/* If the program is compiled without optimization the following declaration
is not visible in the header. */
/* These are not exported. */
extern const uint32_t *__ctype32_toupper;
extern const uint32_t *__ctype32_tolower;
+extern const char *__ctype32_wctype[12];
+extern const char *__ctype32_wctrans[2];
/* Provide real-function versions of all the wctype macros. */
int \
__##name (wint_t wc) \
{ \
- size_t idx; \
+ if (_NL_CURRENT_WORD (LC_CTYPE, _NL_CTYPE_HASH_SIZE) != 0) \
+ { \
+ /* Old locale format. */ \
+ size_t idx; \
\
- idx = cname_lookup (wc); \
- if (idx == ~((size_t) 0)) \
- return 0; \
+ idx = cname_lookup (wc); \
+ if (idx == ~((size_t) 0)) \
+ return 0; \
\
- return __ctype32_b[idx] & type; \
+ return __ctype32_b[idx] & _ISwbit (type); \
+ } \
+ else \
+ { \
+ /* New locale format. */ \
+ return wctype_table_lookup (__ctype32_wctype[type], wc); \
+ } \
} \
weak_alias (__##name, name)
#undef iswalnum
-func (iswalnum, _ISwalnum)
+func (iswalnum, __ISwalnum)
#undef iswalpha
-func (iswalpha, _ISwalpha)
+func (iswalpha, __ISwalpha)
#undef iswcntrl
-func (iswcntrl, _ISwcntrl)
+func (iswcntrl, __ISwcntrl)
#undef iswdigit
-func (iswdigit, _ISwdigit)
+func (iswdigit, __ISwdigit)
#undef iswlower
-func (iswlower, _ISwlower)
+func (iswlower, __ISwlower)
#undef iswgraph
-func (iswgraph, _ISwgraph)
+func (iswgraph, __ISwgraph)
#undef iswprint
-func (iswprint, _ISwprint)
+func (iswprint, __ISwprint)
#undef iswpunct
-func (iswpunct, _ISwpunct)
+func (iswpunct, __ISwpunct)
#undef iswspace
-func (iswspace, _ISwspace)
+func (iswspace, __ISwspace)
#undef iswupper
-func (iswupper, _ISwupper)
+func (iswupper, __ISwupper)
#undef iswxdigit
-func (iswxdigit, _ISwxdigit)
+func (iswxdigit, __ISwxdigit)
wint_t
(towlower) (wc)
wint_t wc;
{
- size_t idx;
-
- idx = cname_lookup (wc);
- if (idx == ~((size_t) 0))
- /* Character is not known. Default action is to simply return it. */
- return wc;
-
- return (wint_t) __ctype32_tolower[idx];
+ if (_NL_CURRENT_WORD (LC_CTYPE, _NL_CTYPE_HASH_SIZE) != 0)
+ {
+ /* Old locale format. */
+ size_t idx;
+
+ idx = cname_lookup (wc);
+ if (idx == ~((size_t) 0))
+ /* Character is not known. Default action is to simply return it. */
+ return wc;
+
+ return (wint_t) __ctype32_tolower[idx];
+ }
+ else
+ {
+ /* New locale format. */
+ return wctrans_table_lookup (__ctype32_wctrans[1], wc);
+ }
}
wint_t
(towupper) (wc)
wint_t wc;
{
- size_t idx;
-
- idx = cname_lookup (wc);
- if (idx == ~((size_t) 0))
- /* Character is not known. Default action is to simply return it. */
- return wc;
-
- return (wint_t) __ctype32_toupper[idx];
+ if (_NL_CURRENT_WORD (LC_CTYPE, _NL_CTYPE_HASH_SIZE) != 0)
+ {
+ /* Old locale format. */
+ size_t idx;
+
+ idx = cname_lookup (wc);
+ if (idx == ~((size_t) 0))
+ /* Character is not known. Default action is to simply return it. */
+ return wc;
+
+ return (wint_t) __ctype32_toupper[idx];
+ }
+ else
+ {
+ /* New locale format. */
+ return wctrans_table_lookup (__ctype32_wctrans[0], wc);
+ }
}
#define USE_IN_EXTENDED_LOCALE_MODEL
#include "cname-lookup.h"
+#include "wchar-lookup.h"
/* Provide real-function versions of all the wctype macros. */
#define func(name, type) \
int name (wint_t wc, __locale_t locale) \
- { return __iswctype_l (wc, type, locale); }
-
-func (__iswalnum_l, _ISwalnum)
-func (__iswalpha_l, _ISwalpha)
-func (__iswcntrl_l, _ISwcntrl)
-func (__iswdigit_l, _ISwdigit)
-func (__iswlower_l, _ISwlower)
-func (__iswgraph_l, _ISwgraph)
-func (__iswprint_l, _ISwprint)
-func (__iswpunct_l, _ISwpunct)
-func (__iswspace_l, _ISwspace)
-func (__iswupper_l, _ISwupper)
-func (__iswxdigit_l, _ISwxdigit)
+ { \
+ if (locale->__locales[LC_CTYPE]->values[_NL_ITEM_INDEX (_NL_CTYPE_HASH_SIZE)].word != 0) \
+ { \
+ /* Old locale format. */ \
+ const uint32_t *class32_b; \
+ size_t idx; \
+ \
+ idx = cname_lookup (wc, locale); \
+ if (idx == ~((size_t) 0)) \
+ return 0; \
+ \
+ class32_b = (uint32_t *) \
+ locale->__locales[LC_CTYPE]->values[_NL_ITEM_INDEX (_NL_CTYPE_CLASS32)].string; \
+ \
+ return class32_b[idx] & _ISwbit (type); \
+ } \
+ else \
+ { \
+ /* New locale format. */ \
+ size_t i = locale->__locales[LC_CTYPE]->values[_NL_ITEM_INDEX (_NL_CTYPE_CLASS_OFFSET)].word + type; \
+ const char *desc = locale->__locales[LC_CTYPE]->values[i].string; \
+ return wctype_table_lookup (desc, wc); \
+ } \
+ }
+
+func (__iswalnum_l, __ISwalnum)
+func (__iswalpha_l, __ISwalpha)
+func (__iswcntrl_l, __ISwcntrl)
+func (__iswdigit_l, __ISwdigit)
+func (__iswlower_l, __ISwlower)
+func (__iswgraph_l, __ISwgraph)
+func (__iswprint_l, __ISwprint)
+func (__iswpunct_l, __ISwpunct)
+func (__iswspace_l, __ISwspace)
+func (__iswupper_l, __ISwupper)
+func (__iswxdigit_l, __ISwxdigit)
wint_t
(__towlower_l) (wint_t wc, __locale_t locale)
{
- const int32_t *class32_tolower;
- size_t idx;
-
- idx = cname_lookup (wc, locale);
- if (idx == ~((size_t) 0))
- return 0;
-
- class32_tolower = (const int32_t *)
- locale->__locales[LC_CTYPE]->values[_NL_ITEM_INDEX (_NL_CTYPE_TOLOWER32)].string;
-
- return class32_tolower[idx];
+ if (locale->__locales[LC_CTYPE]->values[_NL_ITEM_INDEX (_NL_CTYPE_HASH_SIZE)].word != 0)
+ {
+ /* Old locale format. */
+ const int32_t *class32_tolower;
+ size_t idx;
+
+ idx = cname_lookup (wc, locale);
+ if (idx == ~((size_t) 0))
+ return 0;
+
+ class32_tolower = (const int32_t *)
+ locale->__locales[LC_CTYPE]->values[_NL_ITEM_INDEX (_NL_CTYPE_TOLOWER32)].string;
+
+ return class32_tolower[idx];
+ }
+ else
+ {
+ /* New locale format. */
+ size_t i = locale->__locales[LC_CTYPE]->values[_NL_ITEM_INDEX (_NL_CTYPE_MAP_OFFSET)].word + 1;
+ const char *desc = locale->__locales[LC_CTYPE]->values[i].string;
+ return wctrans_table_lookup (desc, wc);
+ }
}
wint_t
(__towupper_l) (wint_t wc, __locale_t locale)
{
- const int32_t *class32_toupper;
- size_t idx;
-
- idx = cname_lookup (wc, locale);
- if (idx == ~((size_t) 0))
- return 0;
-
- class32_toupper = (const int32_t *)
- locale->__locales[LC_CTYPE]->values[_NL_ITEM_INDEX (_NL_CTYPE_TOUPPER32)].string;
-
- return class32_toupper[idx];
+ if (locale->__locales[LC_CTYPE]->values[_NL_ITEM_INDEX (_NL_CTYPE_HASH_SIZE)].word != 0)
+ {
+ /* Old locale format. */
+ const int32_t *class32_toupper;
+ size_t idx;
+
+ idx = cname_lookup (wc, locale);
+ if (idx == ~((size_t) 0))
+ return 0;
+
+ class32_toupper = (const int32_t *)
+ locale->__locales[LC_CTYPE]->values[_NL_ITEM_INDEX (_NL_CTYPE_TOUPPER32)].string;
+
+ return class32_toupper[idx];
+ }
+ else
+ {
+ /* New locale format. */
+ size_t i = locale->__locales[LC_CTYPE]->values[_NL_ITEM_INDEX (_NL_CTYPE_MAP_OFFSET)].word + 0;
+ const char *desc = locale->__locales[LC_CTYPE]->values[i].string;
+ return wctrans_table_lookup (desc, wc);
+ }
}
--- /dev/null
+/* Copyright (C) 2000 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+ Contributed by Bruno Haible <haible@clisp.cons.org>, 2000.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Library General Public License as
+ published by the Free Software Foundation; either version 2 of the
+ License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Library General Public License for more details.
+
+ You should have received a copy of the GNU Library General Public
+ License along with the GNU C Library; see the file COPYING.LIB. If not,
+ write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ Boston, MA 02111-1307, USA. */
+
+/* Tables indexed by a wide character are compressed through the use
+ of a multi-level lookup. The compression effect comes from blocks
+ that don't need particular data and from block that can share their
+ data. */
+
+/* Bit tables are accessed by cutting wc in four blocks of bits:
+ - the high 32-q-p bits,
+ - the next q bits,
+ - the next p bits,
+ - the next 5 bits.
+
+ +------------------+-----+-----+-----+
+ wc = + 32-q-p-5 | q | p | 5 |
+ +------------------+-----+-----+-----+
+
+ p and q are variable. For 16-bit Unicode it is sufficient to
+ choose p and q such that q+p+5 <= 16.
+
+ The table contains the following uint32_t words:
+ - q+p+5,
+ - s = upper exclusive bound for wc >> (q+p+5),
+ - p+5,
+ - 2^q-1,
+ - 2^p-1,
+ - 1st-level table: s offsets, pointing into the 2nd-level table,
+ - 2nd-level table: k*2^q offsets, pointing into the 3rd-level table,
+ - 3rd-level table: j*2^p words, each containing 32 bits of data.
+*/
+
+static __inline int
+wctype_table_lookup (const char *table, uint32_t wc)
+{
+ uint32_t shift1 = ((const uint32_t *) table)[0];
+ uint32_t index1 = wc >> shift1;
+ uint32_t bound = ((const uint32_t *) table)[1];
+ if (index1 < bound)
+ {
+ uint32_t lookup1 = ((const uint32_t *) table)[5 + index1];
+ if (lookup1 != 0)
+ {
+ uint32_t shift2 = ((const uint32_t *) table)[2];
+ uint32_t mask2 = ((const uint32_t *) table)[3];
+ uint32_t index2 = (wc >> shift2) & mask2;
+ uint32_t lookup2 = ((const uint32_t *)(table + lookup1))[index2];
+ if (lookup2 != 0)
+ {
+ uint32_t mask3 = ((const uint32_t *) table)[4];
+ uint32_t index3 = (wc >> 5) & mask3;
+ uint32_t lookup3 = ((const uint32_t *)(table + lookup2))[index3];
+
+ return (lookup3 >> (wc & 0x1f)) & 1;
+ }
+ }
+ }
+ return 0;
+}
+
+/* Byte tables are similar to bit tables, except that the addressing
+ unit is a single byte, and no 5 bits are used as a word index. */
+
+static __inline int
+wcwidth_table_lookup (const char *table, uint32_t wc)
+{
+ uint32_t shift1 = ((const uint32_t *) table)[0];
+ uint32_t index1 = wc >> shift1;
+ uint32_t bound = ((const uint32_t *) table)[1];
+ if (index1 < bound)
+ {
+ uint32_t lookup1 = ((const uint32_t *) table)[5 + index1];
+ if (lookup1 != 0)
+ {
+ uint32_t shift2 = ((const uint32_t *) table)[2];
+ uint32_t mask2 = ((const uint32_t *) table)[3];
+ uint32_t index2 = (wc >> shift2) & mask2;
+ uint32_t lookup2 = ((const uint32_t *)(table + lookup1))[index2];
+ if (lookup2 != 0)
+ {
+ uint32_t mask3 = ((const uint32_t *) table)[4];
+ uint32_t index3 = wc & mask3;
+ uint8_t lookup3 = ((const uint8_t *)(table + lookup2))[index3];
+
+ return lookup3;
+ }
+ }
+ }
+ return 0xff;
+}
+
+/* Mapping tables are similar to bit tables, except that the
+ addressing unit is a single signed 32-bit word, containing the
+ difference between the desired result and the argument, and no 5
+ bits are used as a word index. */
+
+static __inline uint32_t
+wctrans_table_lookup (const char *table, uint32_t wc)
+{
+ uint32_t shift1 = ((const uint32_t *) table)[0];
+ uint32_t index1 = wc >> shift1;
+ uint32_t bound = ((const uint32_t *) table)[1];
+ if (index1 < bound)
+ {
+ uint32_t lookup1 = ((const uint32_t *) table)[5 + index1];
+ if (lookup1 != 0)
+ {
+ uint32_t shift2 = ((const uint32_t *) table)[2];
+ uint32_t mask2 = ((const uint32_t *) table)[3];
+ uint32_t index2 = (wc >> shift2) & mask2;
+ uint32_t lookup2 = ((const uint32_t *)(table + lookup1))[index2];
+ if (lookup2 != 0)
+ {
+ uint32_t mask3 = ((const uint32_t *) table)[4];
+ uint32_t index3 = wc & mask3;
+ int32_t lookup3 = ((const int32_t *)(table + lookup2))[index3];
+
+ return wc + lookup3;
+ }
+ }
+ }
+ return wc;
+}
{
const char *names;
size_t cnt;
- int32_t *result;
names = _NL_CURRENT (LC_CTYPE, _NL_CTYPE_MAP_NAMES);
cnt = 0;
if (names[0] == '\0')
return 0;
- if (cnt == 0)
- return (wctrans_t) __ctype32_toupper;
- else if (cnt == 1)
- return (wctrans_t) __ctype32_tolower;
-
- /* We have to search the table. */
- result = (int32_t *) _NL_CURRENT (LC_CTYPE, _NL_NUM_LC_CTYPE + cnt - 2);
+ if (_NL_CURRENT_WORD (LC_CTYPE, _NL_CTYPE_HASH_SIZE) != 0)
+ {
+ /* Old locale format. */
+ if (cnt == 0)
+ return (wctrans_t) __ctype32_toupper;
+ else if (cnt == 1)
+ return (wctrans_t) __ctype32_tolower;
- return (wctrans_t) result;
+ /* We have to search the table. */
+ return (wctrans_t) (const int32_t *) _NL_CURRENT (LC_CTYPE, _NL_NUM_LC_CTYPE + cnt - 2);
+ }
+ else
+ {
+ /* New locale format. */
+ size_t i = _NL_CURRENT_WORD (LC_CTYPE, _NL_CTYPE_MAP_OFFSET) + cnt;
+ return (wctrans_t) _nl_current_LC_CTYPE->values[i].string;
+ }
}
-/* Copyright (C) 1996, 1997, 1998 Free Software Foundation, Inc.
+/* Copyright (C) 1996, 1997, 1998, 2000 Free Software Foundation, Inc.
This file is part of the GNU C Library.
Contributed by Ulrich Drepper <drepper@gnu.ai.mit.edu>, 1996.
__wctype (const char *property)
{
const char *names;
- wctype_t result;
+ unsigned int result;
size_t proplen = strlen (property);
names = _NL_CURRENT (LC_CTYPE, _NL_CTYPE_CLASS_NAMES);
- for (result = 1; result != 0; result <<= 1)
+ for (result = 0; ; result++)
{
size_t nameslen = strlen (names);
return 0;
}
+ if (_NL_CURRENT_WORD (LC_CTYPE, _NL_CTYPE_HASH_SIZE) != 0)
+ {
+ /* Old locale format. */
#if __BYTE_ORDER == __BIG_ENDIAN
- return result;
+ return 1 << result;
#else
# define SWAPU32(w) \
(((w) << 24) | (((w) & 0xff00) << 8) | (((w) >> 8) & 0xff00) | ((w) >> 24))
-
- return SWAPU32 (result);
+ return 1 << (result ^ 0x18); /* = SWAPU32 (1 << result); */
#endif
+ }
+ else
+ {
+ /* New locale format. */
+ size_t i = _NL_CURRENT_WORD (LC_CTYPE, _NL_CTYPE_CLASS_OFFSET) + result;
+ return (wctype_t) _nl_current_LC_CTYPE->values[i].string;
+ }
}
weak_alias (__wctype, wctype)
enum
{
- _ISwupper = _ISwbit (0), /* UPPERCASE. */
- _ISwlower = _ISwbit (1), /* lowercase. */
- _ISwalpha = _ISwbit (2), /* Alphabetic. */
- _ISwdigit = _ISwbit (3), /* Numeric. */
- _ISwxdigit = _ISwbit (4), /* Hexadecimal numeric. */
- _ISwspace = _ISwbit (5), /* Whitespace. */
- _ISwprint = _ISwbit (6), /* Printing. */
- _ISwgraph = _ISwbit (7), /* Graphical. */
- _ISwblank = _ISwbit (8), /* Blank (usually SPC and TAB). */
- _ISwcntrl = _ISwbit (9), /* Control character. */
- _ISwpunct = _ISwbit (10), /* Punctuation. */
- _ISwalnum = _ISwbit (11) /* Alphanumeric. */
+ __ISwupper = 0, /* UPPERCASE. */
+ __ISwlower = 1, /* lowercase. */
+ __ISwalpha = 2, /* Alphabetic. */
+ __ISwdigit = 3, /* Numeric. */
+ __ISwxdigit = 4, /* Hexadecimal numeric. */
+ __ISwspace = 5, /* Whitespace. */
+ __ISwprint = 6, /* Printing. */
+ __ISwgraph = 7, /* Graphical. */
+ __ISwblank = 8, /* Blank (usually SPC and TAB). */
+ __ISwcntrl = 9, /* Control character. */
+ __ISwpunct = 10, /* Punctuation. */
+ __ISwalnum = 11, /* Alphanumeric. */
+
+ _ISwupper = _ISwbit (__ISwupper), /* UPPERCASE. */
+ _ISwlower = _ISwbit (__ISwlower), /* lowercase. */
+ _ISwalpha = _ISwbit (__ISwalpha), /* Alphabetic. */
+ _ISwdigit = _ISwbit (__ISwdigit), /* Numeric. */
+ _ISwxdigit = _ISwbit (__ISwxdigit), /* Hexadecimal numeric. */
+ _ISwspace = _ISwbit (__ISwspace), /* Whitespace. */
+ _ISwprint = _ISwbit (__ISwprint), /* Printing. */
+ _ISwgraph = _ISwbit (__ISwgraph), /* Graphical. */
+ _ISwblank = _ISwbit (__ISwblank), /* Blank (usually SPC and TAB). */
+ _ISwcntrl = _ISwbit (__ISwcntrl), /* Control character. */
+ _ISwpunct = _ISwbit (__ISwpunct), /* Punctuation. */
+ _ISwalnum = _ISwbit (__ISwalnum) /* Alphanumeric. */
};
# endif /* Not _ISwbit */
? (int) (__ctype32_b[(wint_t) (wc)] & _ISwblank) : iswblank (wc)))
# endif
-# define iswctype(wc, desc) \
- (__extension__ \
- (__builtin_constant_p (wc) && (wint_t) (wc) <= L'\xff' \
- ? (int) (__ctype32_b[(wint_t) (wc)] & desc) : iswctype (wc, desc)))
-
#endif /* gcc && optimizing */
/*
-/* Copyright (C) 1996, 1997 Free Software Foundation, Inc.
+/* Copyright (C) 1996, 1997, 2000 Free Software Foundation, Inc.
This file is part of the GNU C Library.
Contributed by Ulrich Drepper <drepper@gnu.ai.mit.edu>, 1996.
__wctype_l (const char *property, __locale_t locale)
{
const char *names;
- wctype_t result;
+ unsigned int result;
size_t proplen = strlen (property);
names = locale->__locales[LC_CTYPE]->values[_NL_ITEM_INDEX (_NL_CTYPE_CLASS_NAMES)].string;
- for (result = 1; result != 0; result <<= 1)
+ for (result = 0; ; result++)
{
size_t nameslen = strlen (names);
return 0;
}
+ if (locale->__locales[LC_CTYPE]->values[_NL_ITEM_INDEX (_NL_CTYPE_HASH_SIZE)].word == 0)
+ {
+ /* Old locale format. */
#if __BYTE_ORDER == __BIG_ENDIAN
- return result;
+ return 1 << result;
#else
# define SWAPU32(w) \
(((w) << 24) | (((w) & 0xff00) << 8) | (((w) >> 8) & 0xff00) | ((w) >> 24))
-
- return SWAPU32 (result);
+ return 1 << (result ^ 0x18); /* = SWAPU32 (1 << result); */
#endif
+ }
+ else
+ {
+ /* New locale format. */
+ size_t i = locale->__locales[LC_CTYPE]->values[_NL_ITEM_INDEX (_NL_CTYPE_CLASS_OFFSET)].word + result;
+ return (wctype_t) locale->__locales[LC_CTYPE]->values[i].string;
+ }
}