locale/programs/ld-ctype.c

   1 /* Copyright (C) 1995, 1996 Free Software Foundation, Inc.
   2 This file is part of the GNU C Library.
   3 Contributed by Ulrich Drepper, <drepper@gnu.ai.mit.edu>.
   4
   5 The GNU C Library is free software; you can redistribute it and/or
   6 modify it under the terms of the GNU Library General Public License as
   7 published by the Free Software Foundation; either version 2 of the
   8 License, or (at your option) any later version.
   9
  10 The GNU C Library is distributed in the hope that it will be useful,
  11 but WITHOUT ANY WARRANTY; without even the implied warranty of
  12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  13 Library General Public License for more details.
  14
  15 You should have received a copy of the GNU Library General Public
  16 License along with the GNU C Library; see the file COPYING.LIB.  If
  17 not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
  18 Boston, MA 02111-1307, USA.  */
  19
  20 #ifdef HAVE_CONFIG_H
  21 # include <config.h>
  22 #endif
  23
  24 #include <endian.h>
  25 #include <limits.h>
  26 #include <string.h>
  27
  28 #include "locales.h"
  29 #include "localeinfo.h"
  30 #include "langinfo.h"
  31 #include "locfile-token.h"
  32 #include "stringtrans.h"
  33
  34 /* Uncomment the following line in the production version.  */
  35 /* define NDEBUG 1 */
  36 #include <assert.h>
  37
  38
  39 void *xmalloc (size_t __n);
  40 void *xcalloc (size_t __n, size_t __s);
  41 void *xrealloc (void *__ptr, size_t __n);
  42
  43
  44 /* The bit used for representing a special class.  */
  45 #define BITPOS(class) ((class) - tok_upper)
  46 #define BIT(class) (1 << BITPOS (class))
  47
  48 #define ELEM(ctype, collection, idx, value)                                   \
  49   *find_idx (ctype, &ctype->collection idx, &ctype->collection##_max idx,     \
  50              &ctype->collection##_act idx, value)
  51
  52 #define SWAPU32(w) \
  53   (((w) << 24) | (((w) & 0xff00) << 8) | (((w) >> 8) & 0xff00) | ((w) >> 24))
  54
  55 #define SWAPU16(w) \
  56   ((((w)  >> 8) & 0xff) | (((w) & 0xff) << 8))
  57
  58
  59 /* To be compatible with former implementations we for now restrict
  60    the number of bits for character classes to 16.  When compatibility
  61    is not necessary anymore increase the number to 32.  */
  62 #define char_class_t u_int16_t
  63 #define CHAR_CLASS_TRANS SWAPU16
  64 #define char_class32_t u_int32_t
  65 #define CHAR_CLASS32_TRANS SWAPU32
  66
  67
  68 /* The real definition of the struct for the LC_CTYPE locale.  */
  69 struct locale_ctype_t
  70 {
  71   unsigned int *charnames;
  72   size_t charnames_max;
  73   size_t charnames_act;
  74
  75   /* We will allow up to 8 * sizeof(u_int32_t) - 1 character classes.  */
  76 #define MAX_NR_CHARCLASS (8 * sizeof (u_int32_t) - 1)
  77   int nr_charclass;
  78   const char *classnames[MAX_NR_CHARCLASS];
  79   unsigned long int current_class_mask;
  80   unsigned int last_class_char;
  81   u_int32_t *class_collection;
  82   size_t class_collection_max;
  83   size_t class_collection_act;
  84   unsigned long int class_done;
  85
  86   /* If the following number ever turns out to be too small simply
  87      increase it.  But I doubt it will.  --drepper@gnu */
  88 #define MAX_NR_CHARMAP 16
  89   const char *mapnames[MAX_NR_CHARMAP];
  90   u_int32_t *map_collection[MAX_NR_CHARMAP];
  91   u_int32_t map_collection_max[MAX_NR_CHARMAP];
  92   u_int32_t map_collection_act[MAX_NR_CHARMAP];
  93   size_t map_collection_nr;
  94   size_t last_map_idx;
  95   unsigned int from_map_char;
  96   int toupper_done;
  97   int tolower_done;
  98
  99   /* The arrays for the binary representation.  */
 100   u_int32_t plane_size;
 101   u_int32_t plane_cnt;
 102   char_class_t *ctype_b;
 103   char_class32_t *ctype32_b;
 104   u_int32_t *names_el;
 105   u_int32_t *names_eb;
 106   u_int32_t **map_eb;
 107   u_int32_t **map_el;
 108   u_int32_t *class_name_ptr;
 109   u_int32_t *map_name_ptr;
 110   unsigned char *width;
 111   u_int32_t mb_cur_max;
 112   const char *codeset_name;
 113 };
 114
 115
 116 /* Prototypes for local functions.  */
 117 static void ctype_class_newP (struct linereader *lr,
 118                               struct locale_ctype_t *ctype, const char *name);
 119 static void ctype_map_newP (struct linereader *lr,
 120                             struct locale_ctype_t *ctype,
 121                             const char *name, struct charset_t *charset);
 122 static u_int32_t *find_idx (struct locale_ctype_t *ctype, u_int32_t **table,
 123                             size_t *max, size_t *act, unsigned int idx);
 124 static void set_class_defaults (struct locale_ctype_t *ctype,
 125                                 struct charset_t *charset);
 126 static void allocate_arrays (struct locale_ctype_t *ctype,
 127                              struct charset_t *charset);
 128
 129
 130 void
 131 ctype_startup (struct linereader *lr, struct localedef_t *locale,
 132                struct charset_t *charset)
 133 {
 134   unsigned int cnt;
 135   struct locale_ctype_t *ctype;
 136
 137   /* It is important that we always use UCS1 encoding for strings now.  */
 138   encoding_method = ENC_UCS1;
 139
 140   /* Allocate the needed room.  */
 141   locale->categories[LC_CTYPE].ctype = ctype =
 142     (struct locale_ctype_t *) xmalloc (sizeof (struct locale_ctype_t));
 143
 144   /* We have no names seen yet.  */
 145   ctype->charnames_max = charset->mb_cur_max == 1 ? 256 : 512;
 146   ctype->charnames =
 147     (unsigned int *) xmalloc (ctype->charnames_max * sizeof (unsigned int));
 148   for (cnt = 0; cnt < 256; ++cnt)
 149     ctype->charnames[cnt] = cnt;
 150   ctype->charnames_act = 256;
 151
 152   /* Fill character class information.  */
 153   ctype->nr_charclass = 0;
 154   ctype->current_class_mask = 0;
 155   ctype->last_class_char = ILLEGAL_CHAR_VALUE;
 156   /* The order of the following instructions determines the bit
 157      positions!  */
 158   ctype_class_newP (lr, ctype, "upper");
 159   ctype_class_newP (lr, ctype, "lower");
 160   ctype_class_newP (lr, ctype, "alpha");
 161   ctype_class_newP (lr, ctype, "digit");
 162   ctype_class_newP (lr, ctype, "xdigit");
 163   ctype_class_newP (lr, ctype, "space");
 164   ctype_class_newP (lr, ctype, "print");
 165   ctype_class_newP (lr, ctype, "graph");
 166   ctype_class_newP (lr, ctype, "blank");
 167   ctype_class_newP (lr, ctype, "cntrl");
 168   ctype_class_newP (lr, ctype, "punct");
 169   ctype_class_newP (lr, ctype, "alnum");
 170
 171   ctype->class_collection_max = charset->mb_cur_max == 1 ? 256 : 512;
 172   ctype->class_collection
 173     = (u_int32_t *) xmalloc (sizeof (unsigned long int)
 174                              * ctype->class_collection_max);
 175   memset (ctype->class_collection, '\0',
 176           sizeof (unsigned long int) * ctype->class_collection_max);
 177   ctype->class_collection_act = 256;
 178
 179   /* Fill character map information.  */
 180   ctype->map_collection_nr = 0;
 181   ctype->last_map_idx = MAX_NR_CHARMAP;
 182   ctype->from_map_char = ILLEGAL_CHAR_VALUE;
 183   ctype_map_newP (lr, ctype, "toupper", charset);
 184   ctype_map_newP (lr, ctype, "tolower", charset);
 185
 186   /* Fill first 256 entries in `toupper' and `tolower' arrays.  */
 187   for (cnt = 0; cnt < 256; ++cnt)
 188     {
 189       ctype->map_collection[0][cnt] = cnt;
 190       ctype->map_collection[1][cnt] = cnt;
 191     }
 192 }
 193
 194
 195 void
 196 ctype_finish (struct localedef_t *locale, struct charset_t *charset)
 197 {
 198   /* See POSIX.2, table 2-6 for the meaning of the following table.  */
 199 #define NCLASS 12
 200   static const struct
 201   {
 202     const char *name;
 203     const char allow[NCLASS];
 204   }
 205   valid_table[NCLASS] =
 206   {
 207     /* The order is important.  See token.h for more information.
 208        M = Always, D = Default, - = Permitted, X = Mutually exclusive  */
 209     { "upper",  "--MX-XDDXXX-" },
 210     { "lower",  "--MX-XDDXXX-" },
 211     { "alpha",  "---X-XDDXXX-" },
 212     { "digit",  "XXX--XDDXXX-" },
 213     { "xdigit", "-----XDDXXX-" },
 214     { "space",  "XXXXX------X" },
 215     { "print",  "---------X--" },
 216     { "graph",  "---------X--" },
 217     { "blank",  "XXXXXM-----X" },
 218     { "cntrl",  "XXXXX-XX--XX" },
 219     { "punct",  "XXXXX-DD-X-X" },
 220     { "alnum",  "-----XDDXXX-" }
 221   };
 222   size_t cnt;
 223   int cls1, cls2;
 224   unsigned int space_value;
 225   struct locale_ctype_t *ctype = locale->categories[LC_CTYPE].ctype;
 226
 227   /* Set default value for classes not specified.  */
 228   set_class_defaults (ctype, charset);
 229
 230   /* Check according to table.  */
 231   for (cnt = 0; cnt < ctype->class_collection_max; ++cnt)
 232     {
 233       unsigned long int tmp;
 234
 235       tmp = ctype->class_collection[cnt];
 236       if (tmp == 0)
 237         continue;
 238
 239       for (cls1 = 0; cls1 < NCLASS; ++cls1)
 240         if ((tmp & (1 << cls1)) != 0)
 241           for (cls2 = 0; cls2 < NCLASS; ++cls2)
 242             if (valid_table[cls1].allow[cls2] != '-')
 243               {
 244                 int eq = (tmp & (1 << cls2)) != 0;
 245                 switch (valid_table[cls1].allow[cls2])
 246                   {
 247                   case 'M':
 248                     if (!eq)
 249                       {
 250                         char buf[17];
 251                         char *cp = buf;
 252                         unsigned int value;
 253
 254                         value = ctype->charnames[cnt];
 255
 256                         if ((value & 0xff000000) != 0)
 257                           cp += sprintf (cp, "\\%o", (value >> 24) & 0xff);
 258                         if ((value & 0xffff0000) != 0)
 259                           cp += sprintf (cp, "\\%o", (value >> 16) & 0xff);
 260                         if ((value & 0xffffff00) != 0)
 261                           cp += sprintf (cp, "\\%o", (value >> 8) & 0xff);
 262                         sprintf (cp, "\\%o", value & 0xff);
 263
 264                         error (0, 0, _("\
 265 character %s'%s' in class `%s' must be in class `%s'"), value > 256 ? "L" : "",
 266                                cp, valid_table[cls1].name,
 267                                valid_table[cls2].name);
 268                       }
 269                     break;
 270
 271                   case 'X':
 272                     if (eq)
 273                       {
 274                         char buf[17];
 275                         char *cp = buf;
 276                         unsigned int value;
 277
 278                         value = ctype->charnames[cnt];
 279
 280                         if ((value & 0xff000000) != 0)
 281                           cp += sprintf (cp, "\\%o", value >> 24);
 282                         if ((value & 0xffff0000) != 0)
 283                           cp += sprintf (cp, "\\%o", (value >> 16) & 0xff);
 284                         if ((value & 0xffffff00) != 0)
 285                           cp += sprintf (cp, "\\%o", (value >> 8) & 0xff);
 286                         sprintf (cp, "\\%o", value & 0xff);
 287
 288                         error (0, 0, _("\
 289 character %s'%s' in class `%s' must not be in class `%s'"),
 290                                value > 256 ? "L" : "", cp,
 291                                valid_table[cls1].name, valid_table[cls2].name);
 292                       }
 293                     break;
 294
 295                   case 'D':
 296                     ctype->class_collection[cnt] |= 1 << cls2;
 297                     break;
 298
 299                   default:
 300                     error (5, 0, _("internal error in %s, line %u"),
 301                            __FUNCTION__, __LINE__);
 302                   }
 303               }
 304     }
 305
 306   /* ... and now test <SP> as a special case.  */
 307   space_value = charset_find_value (charset, "SP", 2);
 308   if (space_value == ILLEGAL_CHAR_VALUE)
 309     error (0, 0, _("character <SP> not defined in character map"));
 310   else if ((cnt = BITPOS (tok_space),
 311             (ELEM (ctype, class_collection, , space_value)
 312              & BIT (tok_space)) == 0)
 313            || (cnt = BITPOS (tok_blank),
 314                (ELEM (ctype, class_collection, , space_value)
 315                 & BIT (tok_blank)) == 0))
 316     error (0, 0, _("<SP> character not in class `%s'"),
 317            valid_table[cnt].name);
 318   else if ((cnt = BITPOS (tok_punct),
 319             (ELEM (ctype, class_collection, , space_value)
 320              & BIT (tok_punct)) != 0)
 321            || (cnt = BITPOS (tok_graph),
 322                (ELEM (ctype, class_collection, , space_value)
 323                 & BIT (tok_graph))
 324                != 0))
 325     error (0, 0, _("<SP> character must not be in class `%s'"),
 326            valid_table[cnt].name);
 327   else
 328     ELEM (ctype, class_collection, , space_value) |= BIT (tok_print);
 329
 330   /* Now that the tests are done make sure the name array contains all
 331      characters which are handled in the WIDTH section of the
 332      character set definition file.  */
 333   if (charset->width_rules != NULL)
 334     for (cnt = 0; cnt < charset->nwidth_rules; ++cnt)
 335       {
 336         size_t inner;
 337         for (inner = charset->width_rules[cnt].from;
 338              inner <= charset->width_rules[cnt].to; ++inner)
 339           (void) find_idx (ctype, NULL, NULL, NULL, inner);
 340       }
 341 }
 342
 343
 344 void
 345 ctype_output (struct localedef_t *locale, struct charset_t *charset,
 346               const char *output_path)
 347 {
 348   struct locale_ctype_t *ctype = locale->categories[LC_CTYPE].ctype;
 349   const size_t nelems = (_NL_ITEM_INDEX (_NL_NUM_LC_CTYPE)
 350                          + 2 * (ctype->map_collection_nr - 2));
 351   struct iovec iov[2 + nelems + ctype->nr_charclass
 352                   + ctype->map_collection_nr];
 353   struct locale_file data;
 354   u_int32_t idx[nelems];
 355   size_t elem, cnt, offset, total;
 356
 357
 358   if ((locale->binary & (1 << LC_CTYPE)) != 0)
 359     {
 360       iov[0].iov_base = ctype;
 361       iov[0].iov_len = locale->len[LC_CTYPE];
 362
 363       write_locale_data (output_path, "LC_CTYPE", 1, iov);
 364
 365       return;
 366     }
 367
 368
 369   /* Now prepare the output: Find the sizes of the table we can use.  */
 370   allocate_arrays (ctype, charset);
 371
 372   data.magic = LIMAGIC (LC_CTYPE);
 373   data.n = nelems;
 374   iov[0].iov_base = (void *) &data;
 375   iov[0].iov_len = sizeof (data);
 376
 377   iov[1].iov_base = (void *) idx;
 378   iov[1].iov_len = sizeof (idx);
 379
 380   idx[0] = iov[0].iov_len + iov[1].iov_len;
 381   offset = 0;
 382
 383   for (elem = 0; elem < nelems; ++elem)
 384     {
 385       if (elem < _NL_ITEM_INDEX (_NL_NUM_LC_CTYPE))
 386         switch (elem)
 387           {
 388 #define CTYPE_DATA(name, base, len)                                           \
 389           case _NL_ITEM_INDEX (name):                                         \
 390             iov[2 + elem + offset].iov_base = (base);                         \
 391             iov[2 + elem + offset].iov_len = (len);                           \
 392             if (elem + 1 < nelems)                                            \
 393               idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len;     \
 394             break
 395
 396           CTYPE_DATA (_NL_CTYPE_CLASS,
 397                       ctype->ctype_b,
 398                       (256 + 128) * sizeof (char_class_t));
 399
 400           CTYPE_DATA (_NL_CTYPE_TOUPPER_EB,
 401                       ctype->map_eb[0],
 402                       (ctype->plane_size * ctype->plane_cnt + 128)
 403                       * sizeof (u_int32_t));
 404           CTYPE_DATA (_NL_CTYPE_TOLOWER_EB,
 405                       ctype->map_eb[1],
 406                       (ctype->plane_size * ctype->plane_cnt + 128)
 407                       * sizeof (u_int32_t));
 408
 409           CTYPE_DATA (_NL_CTYPE_TOUPPER_EL,
 410                       ctype->map_el[0],
 411                       (ctype->plane_size * ctype->plane_cnt + 128)
 412                       * sizeof (u_int32_t));
 413           CTYPE_DATA (_NL_CTYPE_TOLOWER_EL,
 414                       ctype->map_el[1],
 415                       (ctype->plane_size * ctype->plane_cnt + 128)
 416                       * sizeof (u_int32_t));
 417
 418           CTYPE_DATA (_NL_CTYPE_CLASS32,
 419                       ctype->ctype32_b,
 420                       (ctype->plane_size * ctype->plane_cnt
 421                        * sizeof (char_class32_t)));
 422
 423           CTYPE_DATA (_NL_CTYPE_NAMES_EB,
 424                       ctype->names_eb, (ctype->plane_size * ctype->plane_cnt
 425                                         * sizeof (u_int32_t)));
 426           CTYPE_DATA (_NL_CTYPE_NAMES_EL,
 427                       ctype->names_el, (ctype->plane_size * ctype->plane_cnt
 428                                         * sizeof (u_int32_t)));
 429
 430           CTYPE_DATA (_NL_CTYPE_HASH_SIZE,
 431                       &ctype->plane_size, sizeof (u_int32_t));
 432           CTYPE_DATA (_NL_CTYPE_HASH_LAYERS,
 433                       &ctype->plane_cnt, sizeof (u_int32_t));
 434
 435           case _NL_ITEM_INDEX (_NL_CTYPE_CLASS_NAMES):
 436             /* The class name array.  */
 437             total = 0;
 438             for (cnt = 0; cnt < ctype->nr_charclass; ++cnt, ++offset)
 439               {
 440                 iov[2 + elem + offset].iov_base
 441                   = (void *) ctype->classnames[cnt];
 442                 iov[2 + elem + offset].iov_len
 443                   = strlen (ctype->classnames[cnt]) + 1;
 444                 total += iov[2 + elem + offset].iov_len;
 445               }
 446             iov[2 + elem + offset].iov_base = (void *) "\0\0\0";
 447             iov[2 + elem + offset].iov_len = 1 + (4 - ((total + 1) % 4));
 448             total += 1 + (4 - ((total + 1) % 4));
 449
 450             if (elem + 1 < nelems)
 451               idx[elem + 1] = idx[elem] + total;
 452             break;
 453
 454           case _NL_ITEM_INDEX (_NL_CTYPE_MAP_NAMES):
 455             /* The class name array.  */
 456             total = 0;
 457             for (cnt = 0; cnt < ctype->map_collection_nr; ++cnt, ++offset)
 458               {
 459                 iov[2 + elem + offset].iov_base
 460                   = (void *) ctype->mapnames[cnt];
 461                 iov[2 + elem + offset].iov_len
 462                   = strlen (ctype->mapnames[cnt]) + 1;
 463                 total += iov[2 + elem + offset].iov_len;
 464               }
 465             iov[2 + elem + offset].iov_base = (void *) "\0\0\0";
 466             iov[2 + elem + offset].iov_len = 1 + (4 - ((total + 1) % 4));
 467             total += 1 + (4 - ((total + 1) % 4));
 468
 469             if (elem + 1 < nelems)
 470               idx[elem + 1] = idx[elem] + total;
 471             break;
 472
 473           CTYPE_DATA (_NL_CTYPE_WIDTH,
 474                       ctype->width, ctype->plane_size * ctype->plane_cnt);
 475
 476           CTYPE_DATA (_NL_CTYPE_MB_CUR_MAX,
 477                       &ctype->mb_cur_max, sizeof (u_int32_t));
 478
 479           case _NL_ITEM_INDEX (_NL_CTYPE_CODESET_NAME):
 480             total = strlen (ctype->codeset_name) + 1;
 481             if (total % 4 == 0)
 482               iov[2 + elem + offset].iov_base = (char *) ctype->codeset_name;
 483             else
 484               {
 485                 iov[2 + elem + offset].iov_base = alloca ((total + 3) & ~3);
 486                 memcpy (iov[2 + elem + offset].iov_base, ctype->codeset_name,
 487                         total);
 488                 total = (total + 3) & ~3;
 489               }
 490             iov[2 + elem + offset].iov_len = total;
 491             if (elem + 1 < nelems)
 492               idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len;
 493             break;
 494
 495           default:
 496             assert (! "unknown CTYPE element");
 497           }
 498       else
 499         {
 500           /* Handle extra maps.  */
 501           size_t nr = (elem - _NL_ITEM_INDEX (_NL_NUM_LC_CTYPE)) >> 1;
 502
 503           if (((elem - _NL_ITEM_INDEX (_NL_NUM_LC_CTYPE)) & 1) == 0)
 504             iov[2 + elem + offset].iov_base = ctype->map_eb[nr];
 505           else
 506             iov[2 + elem + offset].iov_base = ctype->map_el[nr];
 507
 508           iov[2 + elem + offset].iov_len = ((ctype->plane_size
 509                                              * ctype->plane_cnt + 128)
 510                                             * sizeof (u_int32_t));
 511
 512           if (elem + 1 < nelems)
 513             idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len;
 514         }
 515     }
 516
 517   assert (2 + elem + offset == (nelems + ctype->nr_charclass
 518                                 + ctype->map_collection_nr + 2));
 519
 520   write_locale_data (output_path, "LC_CTYPE", 2 + elem + offset, iov);
 521 }
 522
 523
 524 /* Character class handling.  */
 525 void
 526 ctype_class_new (struct linereader *lr, struct localedef_t *locale,
 527                  enum token_t tok, struct token *code,
 528                  struct charset_t *charset)
 529 {
 530   ctype_class_newP (lr, locale->categories[LC_CTYPE].ctype,
 531                     code->val.str.start);
 532 }
 533
 534
 535 int
 536 ctype_is_charclass (struct linereader *lr, struct localedef_t *locale,
 537                     const char *name)
 538 {
 539   int cnt;
 540
 541   for (cnt = 0; cnt < locale->categories[LC_CTYPE].ctype->nr_charclass; ++cnt)
 542     if (strcmp (name, locale->categories[LC_CTYPE].ctype->classnames[cnt])
 543         == 0)
 544       return 1;
 545
 546   return 0;
 547 }
 548
 549
 550 void
 551 ctype_class_start (struct linereader *lr, struct localedef_t *locale,
 552                    enum token_t tok, const char *str,
 553                    struct charset_t *charset)
 554 {
 555   struct locale_ctype_t *ctype = locale->categories[LC_CTYPE].ctype;
 556   int cnt;
 557
 558   switch (tok)
 559     {
 560     case tok_upper:
 561       str = "upper";
 562       break;
 563     case tok_lower:
 564       str = "lower";
 565       break;
 566     case tok_alpha:
 567       str = "alpha";
 568       break;
 569     case tok_digit:
 570       str = "digit";
 571       break;
 572     case tok_xdigit:
 573       str = "xdigit";
 574       break;
 575     case tok_space:
 576       str = "space";
 577       break;
 578     case tok_print:
 579       str = "print";
 580       break;
 581     case tok_graph:
 582       str = "graph";
 583       break;
 584     case tok_blank:
 585       str = "blank";
 586       break;
 587     case tok_cntrl:
 588       str = "cntrl";
 589       break;
 590     case tok_punct:
 591       str = "punct";
 592       break;
 593     case tok_alnum:
 594       str = "alnum";
 595       break;
 596     case tok_ident:
 597       break;
 598     default:
 599       assert (! "illegal token as class name: should not happen");
 600     }
 601
 602   for (cnt = 0; cnt < ctype->nr_charclass; ++cnt)
 603     if (strcmp (str, ctype->classnames[cnt]) == 0)
 604       break;
 605
 606   if (cnt >= ctype->nr_charclass)
 607     assert (! "unknown class in class definition: should not happen");
 608
 609   ctype->class_done |= BIT (tok);
 610
 611   ctype->current_class_mask = 1 << cnt;
 612   ctype->last_class_char = ILLEGAL_CHAR_VALUE;
 613 }
 614
 615
 616 void
 617 ctype_class_from (struct linereader *lr, struct localedef_t *locale,
 618                   struct token *code, struct charset_t *charset)
 619 {
 620   struct locale_ctype_t *ctype = locale->categories[LC_CTYPE].ctype;
 621   unsigned int value;
 622
 623   value = charset_find_value (charset, code->val.str.start, code->val.str.len);
 624
 625   ctype->last_class_char = value;
 626
 627   if (value == ILLEGAL_CHAR_VALUE)
 628     /* In the LC_CTYPE category it is no error when a character is
 629        not found.  This has to be ignored silently.  */
 630     return;
 631
 632   *find_idx (ctype, &ctype->class_collection, &ctype->class_collection_max,
 633              &ctype->class_collection_act, value)
 634     |= ctype->current_class_mask;
 635 }
 636
 637
 638 void
 639 ctype_class_to (struct linereader *lr, struct localedef_t *locale,
 640                 struct token *code, struct charset_t *charset)
 641 {
 642   struct locale_ctype_t *ctype = locale->categories[LC_CTYPE].ctype;
 643   unsigned int value, cnt;
 644
 645   value = charset_find_value (charset, code->val.str.start, code->val.str.len);
 646
 647   assert (value >= ctype->last_class_char);
 648
 649   for (cnt = ctype->last_class_char + 1; cnt <= value; ++cnt)
 650     *find_idx (ctype, &ctype->class_collection, &ctype->class_collection_max,
 651                &ctype->class_collection_act, cnt)
 652       |= ctype->current_class_mask;
 653
 654   ctype->last_class_char = ILLEGAL_CHAR_VALUE;
 655 }
 656
 657
 658 void
 659 ctype_class_end (struct linereader *lr, struct localedef_t *locale)
 660 {
 661   struct locale_ctype_t *ctype = locale->categories[LC_CTYPE].ctype;
 662
 663   /* We have no special actions to perform here.  */
 664   ctype->current_class_mask = 0;
 665   ctype->last_class_char = ILLEGAL_CHAR_VALUE;
 666 }
 667
 668
 669 /* Character map handling.  */
 670 void
 671 ctype_map_new (struct linereader *lr, struct localedef_t *locale,
 672                enum token_t tok, struct token *code,
 673                struct charset_t *charset)
 674 {
 675   ctype_map_newP (lr, locale->categories[LC_CTYPE].ctype,
 676                   code->val.str.start, charset);
 677 }
 678
 679
 680 int
 681 ctype_is_charconv (struct linereader *lr, struct localedef_t *locale,
 682                    const char *name)
 683 {
 684   struct locale_ctype_t *ctype = locale->categories[LC_CTYPE].ctype;
 685   size_t cnt;
 686
 687   for (cnt = 0; cnt < ctype->map_collection_nr; ++cnt)
 688     if (strcmp (name, ctype->mapnames[cnt]) == 0)
 689       return 1;
 690
 691   return 0;
 692 }
 693
 694
 695 void
 696 ctype_map_start (struct linereader *lr, struct localedef_t *locale,
 697                  enum token_t tok, const char *name, struct charset_t *charset)
 698 {
 699   struct locale_ctype_t *ctype = locale->categories[LC_CTYPE].ctype;
 700   size_t cnt;
 701
 702   switch (tok)
 703     {
 704     case tok_toupper:
 705       ctype->toupper_done = 1;
 706       name = "toupper";
 707       break;
 708     case tok_tolower:
 709       ctype->tolower_done = 1;
 710       name = "tolower";
 711       break;
 712     case tok_ident:
 713       break;
 714     default:
 715       assert (! "unknown token in category `LC_CTYPE' should not happen");
 716     }
 717
 718   for (cnt = 0; cnt < ctype->map_collection_nr; ++cnt)
 719     if (strcmp (name, ctype->mapnames[cnt]) == 0)
 720       break;
 721
 722   if (cnt == ctype->map_collection_nr)
 723     assert (! "unknown token in category `LC_CTYPE' should not happen");
 724
 725   ctype->last_map_idx = cnt;
 726   ctype->from_map_char = ILLEGAL_CHAR_VALUE;
 727 }
 728
 729
 730 void
 731 ctype_map_from (struct linereader *lr, struct localedef_t *locale,
 732                 struct token *code, struct charset_t *charset)
 733 {
 734   struct locale_ctype_t *ctype = locale->categories[LC_CTYPE].ctype;
 735   unsigned int value;
 736
 737   value = charset_find_value (charset, code->val.str.start, code->val.str.len);
 738
 739   if (value == ILLEGAL_CHAR_VALUE)
 740     /* In the LC_CTYPE category it is no error when a character is
 741        not found.  This has to be ignored silently.  */
 742     return;
 743
 744   assert (ctype->last_map_idx < ctype->map_collection_nr);
 745
 746   ctype->from_map_char = value;
 747 }
 748
 749
 750 void
 751 ctype_map_to (struct linereader *lr, struct localedef_t *locale,
 752               struct token *code, struct charset_t *charset)
 753 {
 754   struct locale_ctype_t *ctype = locale->categories[LC_CTYPE].ctype;
 755   unsigned int value;
 756
 757   value = charset_find_value (charset, code->val.str.start, code->val.str.len);
 758
 759   if (ctype->from_map_char == ILLEGAL_CHAR_VALUE
 760       || value == ILLEGAL_CHAR_VALUE)
 761     {
 762       /* In the LC_CTYPE category it is no error when a character is
 763          not found.  This has to be ignored silently.  */
 764       ctype->from_map_char = ILLEGAL_CHAR_VALUE;
 765       return;
 766     }
 767
 768   *find_idx (ctype, &ctype->map_collection[ctype->last_map_idx],
 769              &ctype->map_collection_max[ctype->last_map_idx],
 770              &ctype->map_collection_act[ctype->last_map_idx],
 771              ctype->from_map_char) = value;
 772
 773   ctype->from_map_char = ILLEGAL_CHAR_VALUE;
 774 }
 775
 776
 777 void
 778 ctype_map_end (struct linereader *lr, struct localedef_t *locale)
 779 {
 780   struct locale_ctype_t *ctype = locale->categories[LC_CTYPE].ctype;
 781
 782   ctype->last_map_idx = MAX_NR_CHARMAP;
 783   ctype->from_map_char = ILLEGAL_CHAR_VALUE;
 784 }
 785
 786
 787 /* Local functions.  */
 788 static void
 789 ctype_class_newP (struct linereader *lr, struct locale_ctype_t *ctype,
 790                   const char *name)
 791 {
 792   int cnt;
 793
 794   for (cnt = 0; cnt < ctype->nr_charclass; ++cnt)
 795     if (strcmp (ctype->classnames[cnt], name) == 0)
 796       break;
 797
 798   if (cnt < ctype->nr_charclass)
 799     {
 800       lr_error (lr, _("character class `%s' already defined"));
 801       return;
 802     }
 803
 804   if (ctype->nr_charclass == MAX_NR_CHARCLASS)
 805     /* Exit code 2 is prescribed in P1003.2b.  */
 806     error (2, 0, _("\
 807 implementation limit: no more than %d character classes allowed"),
 808            MAX_NR_CHARCLASS);
 809
 810   ctype->classnames[ctype->nr_charclass++] = name;
 811 }
 812
 813
 814 static void
 815 ctype_map_newP (struct linereader *lr, struct locale_ctype_t *ctype,
 816                 const char *name, struct charset_t *charset)
 817 {
 818   size_t max_chars = 0;
 819   int cnt;
 820
 821   for (cnt = 0; cnt < ctype->map_collection_nr; ++cnt)
 822     {
 823       if (strcmp (ctype->mapnames[cnt], name) == 0)
 824         break;
 825
 826       if (max_chars < ctype->map_collection_max[cnt])
 827         max_chars = ctype->map_collection_max[cnt];
 828     }
 829
 830   if (cnt < ctype->map_collection_nr)
 831     {
 832       lr_error (lr, _("character map `%s' already defined"));
 833       return;
 834     }
 835
 836   if (ctype->map_collection_nr == MAX_NR_CHARMAP)
 837     /* Exit code 2 is prescribed in P1003.2b.  */
 838     error (2, 0, _("\
 839 implementation limit: no more than %d character maps allowed"),
 840            MAX_NR_CHARMAP);
 841
 842   ctype->mapnames[cnt] = name;
 843
 844   if (max_chars == 0)
 845     ctype->map_collection_max[cnt] = charset->mb_cur_max == 1 ? 256 : 512;
 846   else
 847     ctype->map_collection_max[cnt] = max_chars;
 848
 849   ctype->map_collection[cnt] = (u_int32_t *)
 850     xmalloc (sizeof (u_int32_t) * ctype->map_collection_max[cnt]);
 851   memset (ctype->map_collection[cnt], '\0',
 852           sizeof (u_int32_t) * ctype->map_collection_max[cnt]);
 853   ctype->map_collection_act[cnt] = 256;
 854
 855   ++ctype->map_collection_nr;
 856 }
 857
 858
 859 /* We have to be prepared that TABLE, MAX, and ACT can be NULL.  This
 860    is possible if we only want ot extend the name array.  */
 861 static u_int32_t *
 862 find_idx (struct locale_ctype_t *ctype, u_int32_t **table, size_t *max,
 863           size_t *act, unsigned int idx)
 864 {
 865   size_t cnt;
 866
 867   if (idx < 256)
 868     return table == NULL ? NULL : &(*table)[idx];
 869
 870   for (cnt = 256; cnt < ctype->charnames_act; ++cnt)
 871     if (ctype->charnames[cnt] == idx)
 872       break;
 873
 874   /* We have to distinguish two cases: the names is found or not.  */
 875   if (cnt == ctype->charnames_act)
 876     {
 877       /* Extend the name array.  */
 878       if (ctype->charnames_act == ctype->charnames_max)
 879         {
 880           ctype->charnames_max *= 2;
 881           ctype->charnames = (unsigned int *)
 882             xrealloc (ctype->charnames,
 883                       sizeof (unsigned int) * ctype->charnames_max);
 884         }
 885       ctype->charnames[ctype->charnames_act++] = idx;
 886     }
 887
 888   if (table == NULL)
 889     /* We have done everything we are asked to do.  */
 890     return NULL;
 891
 892   if (cnt >= *act)
 893     {
 894       if (cnt >= *max)
 895         {
 896           size_t old_max = *max;
 897           do
 898             *max *= 2;
 899           while (*max <= cnt);
 900
 901           *table =
 902             (u_int32_t *) xrealloc (*table, *max * sizeof (unsigned long int));
 903           memset (&(*table)[old_max], '\0',
 904                   (*max - old_max) * sizeof (u_int32_t));
 905         }
 906
 907       (*table)[cnt] = 0;
 908       *act = cnt;
 909     }
 910
 911   return &(*table)[cnt];
 912 }
 913
 914
 915 static void
 916 set_class_defaults (struct locale_ctype_t *ctype, struct charset_t *charset)
 917 {
 918   /* These function defines the default values for the classes and conversions
 919      according to POSIX.2 2.5.2.1.
 920      It may seem that the order of these if-blocks is arbitrary but it is NOT.
 921      Don't move them unless you know what you do!  */
 922
 923   void set_default (int bit, int from, int to)
 924     {
 925       char tmp[2];
 926       int ch;
 927       /* Define string.  */
 928       strcpy (tmp, "?");
 929
 930       for (ch = from; ch <= to; ++ch)
 931         {
 932           unsigned int value;
 933           tmp[0] = ch;
 934
 935           value = charset_find_value (charset, tmp, 1);
 936           if (value == ILLEGAL_CHAR_VALUE)
 937             {
 938               error (0, 0, _("\
 939 character `%s' not defined while needed as default value"),
 940                      tmp);
 941               continue;
 942             }
 943           else
 944             ELEM (ctype, class_collection, , value) |= bit;
 945         }
 946     }
 947
 948   /* Set default values if keyword was not present.  */
 949   if ((ctype->class_done & BIT (tok_upper)) == 0)
 950     /* "If this keyword [lower] is not specified, the lowercase letters
 951         `A' through `Z', ..., shall automatically belong to this class,
 952         with implementation defined character values."  [P1003.2, 2.5.2.1]  */
 953     set_default (BIT (tok_upper), 'A', 'Z');
 954
 955   if ((ctype->class_done & BIT (tok_lower)) == 0)
 956     /* "If this keyword [lower] is not specified, the lowercase letters
 957         `a' through `z', ..., shall automatically belong to this class,
 958         with implementation defined character values."  [P1003.2, 2.5.2.1]  */
 959     set_default (BIT (tok_lower), 'a', 'z');
 960
 961   if ((ctype->class_done & BIT (tok_alpha)) == 0)
 962     {
 963       /* Table 2-6 in P1003.2 says that characters in class `upper' or
 964          class `lower' *must* be in class `alpha'.  */
 965       unsigned long int mask = BIT (tok_upper) | BIT (tok_lower);
 966       size_t cnt;
 967
 968       for (cnt = 0; cnt < ctype->class_collection_act; ++cnt)
 969         if ((ctype->class_collection[cnt] & mask) != 0)
 970           ctype->class_collection[cnt] |= BIT (tok_alpha);
 971     }
 972
 973   if ((ctype->class_done & BIT (tok_digit)) == 0)
 974     /* "If this keyword [digit] is not specified, the digits `0' through
 975         `9', ..., shall automatically belong to this class, with
 976         implementation-defined character values."  [P1003.2, 2.5.2.1]  */
 977     set_default (BIT (tok_digit), '0', '9');
 978
 979   /* "Only characters specified for the `alpha' and `digit' keyword
 980      shall be specified.  Characters specified for the keyword `alpha'
 981      and `digit' are automatically included in this class.  */
 982   {
 983     unsigned long int mask = BIT (tok_alpha) | BIT (tok_digit);
 984     size_t cnt;
 985
 986     for (cnt = 0; cnt < ctype->class_collection_act; ++cnt)
 987       if ((ctype->class_collection[cnt] & mask) != 0)
 988         ctype->class_collection[cnt] |= BIT (tok_alnum);
 989   }
 990
 991   if ((ctype->class_done & BIT (tok_space)) == 0)
 992     /* "If this keyword [space] is not specified, the characters <space>,
 993         <form-feed>, <newline>, <carriage-return>, <tab>, and
 994         <vertical-tab>, ..., shall automatically belong to this class,
 995         with implementation-defined character values."  [P1003.2, 2.5.2.1]  */
 996     {
 997       unsigned int value;
 998
 999       value = charset_find_value (charset, "space", 5);
1000       if (value == ILLEGAL_CHAR_VALUE)
1001         error (0, 0, _("\
1002 character `%s' not defined while needed as default value"),
1003                "<space>");
1004       else
1005         ELEM (ctype, class_collection, , value) |= BIT (tok_space);
1006
1007       value = charset_find_value (charset, "form-feed", 9);
1008       if (value == ILLEGAL_CHAR_VALUE)
1009         error (0, 0, _("\
1010 character `%s' not defined while needed as default value"),
1011                "<form-feed>");
1012       else
1013         ELEM (ctype, class_collection, , value) |= BIT (tok_space);
1014
1015       value = charset_find_value (charset, "newline", 7);
1016       if (value == ILLEGAL_CHAR_VALUE)
1017         error (0, 0, _("\
1018 character `%s' not defined while needed as default value"),
1019                "<newline>");
1020       else
1021         ELEM (ctype, class_collection, , value) |= BIT (tok_space);
1022
1023       value = charset_find_value (charset, "carriage-return", 15);
1024       if (value == ILLEGAL_CHAR_VALUE)
1025         error (0, 0, _("\
1026 character `%s' not defined while needed as default value"),
1027                "<carriage-return>");
1028       else
1029         ELEM (ctype, class_collection, , value) |= BIT (tok_space);
1030
1031       value = charset_find_value (charset, "tab", 3);
1032       if (value == ILLEGAL_CHAR_VALUE)
1033         error (0, 0, _("\
1034 character `%s' not defined while needed as default value"),
1035                "<tab>");
1036       else
1037         ELEM (ctype, class_collection, , value) |= BIT (tok_space);
1038
1039       value = charset_find_value (charset, "vertical-tab", 12);
1040       if (value == ILLEGAL_CHAR_VALUE)
1041         error (0, 0, _("\
1042 character `%s' not defined while needed as default value"),
1043                "<vertical-tab>");
1044       else
1045         ELEM (ctype, class_collection, , value) |= BIT (tok_space);
1046     }
1047
1048   if ((ctype->class_done & BIT (tok_xdigit)) == 0)
1049     /* "If this keyword is not specified, the digits `0' to `9', the
1050         uppercase letters `A' through `F', and the lowercase letters `a'
1051         through `f', ..., shell automatically belong to this class, with
1052         implementation defined character values."  [P1003.2, 2.5.2.1]  */
1053     {
1054       set_default (BIT (tok_xdigit), '0', '9');
1055       set_default (BIT (tok_xdigit), 'A', 'F');
1056       set_default (BIT (tok_xdigit), 'a', 'f');
1057     }
1058
1059   if ((ctype->class_done & BIT (tok_blank)) == 0)
1060     /* "If this keyword [blank] is unspecified, the characters <space> and
1061        <tab> shall belong to this character class."  [P1003.2, 2.5.2.1]  */
1062    {
1063       unsigned int value;
1064
1065       value = charset_find_value (charset, "space", 5);
1066       if (value == ILLEGAL_CHAR_VALUE)
1067         error (0, 0, _("\
1068 character `%s' not defined while needed as default value"),
1069                "<space>");
1070       else
1071         ELEM (ctype, class_collection, , value) |= BIT (tok_blank);
1072
1073       value = charset_find_value (charset, "tab", 3);
1074       if (value == ILLEGAL_CHAR_VALUE)
1075         error (0, 0, _("\
1076 character `%s' not defined while needed as default value"),
1077                "<tab>");
1078       else
1079         ELEM (ctype, class_collection, , value) |= BIT (tok_blank);
1080     }
1081
1082   if ((ctype->class_done & BIT (tok_graph)) == 0)
1083     /* "If this keyword [graph] is not specified, characters specified for
1084         the keywords `upper', `lower', `alpha', `digit', `xdigit' and `punct',
1085         shall belong to this character class."  [P1003.2, 2.5.2.1]  */
1086     {
1087       unsigned long int mask = BIT (tok_upper) | BIT (tok_lower) |
1088         BIT (tok_alpha) | BIT (tok_digit) | BIT (tok_xdigit) | BIT (tok_punct);
1089       size_t cnt;
1090
1091       for (cnt = 0; cnt < ctype->class_collection_act; ++cnt)
1092         if ((ctype->class_collection[cnt] & mask) != 0)
1093           ctype->class_collection[cnt] |= BIT (tok_graph);
1094     }
1095
1096   if ((ctype->class_done & BIT (tok_print)) == 0)
1097     /* "If this keyword [print] is not provided, characters specified for
1098         the keywords `upper', `lower', `alpha', `digit', `xdigit', `punct',
1099         and the <space> character shall belong to this character class."
1100         [P1003.2, 2.5.2.1]  */
1101     {
1102       unsigned long int mask = BIT (tok_upper) | BIT (tok_lower) |
1103         BIT (tok_alpha) | BIT (tok_digit) | BIT (tok_xdigit) | BIT (tok_punct);
1104       size_t cnt;
1105       int space;
1106
1107       for (cnt = 0; cnt < ctype->class_collection_act; ++cnt)
1108         if ((ctype->class_collection[cnt] & mask) != 0)
1109           ctype->class_collection[cnt] |= BIT (tok_print);
1110
1111       space = charset_find_value (charset, "space", 5);
1112       if (space == ILLEGAL_CHAR_VALUE)
1113         error (0, 0, _("\
1114 character `%s' not defined while needed as default value"),
1115                "<space>");
1116       else
1117         ELEM (ctype, class_collection, , space) |= BIT (tok_print);
1118     }
1119
1120   if (ctype->toupper_done == 0)
1121     /* "If this keyword [toupper] is not spcified, the lowercase letters
1122         `a' through `z', and their corresponding uppercase letters `A' to
1123         `Z', ..., shall automatically be included, with implementation-
1124         defined character values."  [P1003.2, 2.5.2.1]  */
1125     {
1126       char tmp[4];
1127       int ch;
1128
1129       strcpy (tmp, "<?>");
1130
1131       for (ch = 'a'; ch <= 'z'; ++ch)
1132         {
1133           unsigned int value_from, value_to;
1134
1135           tmp[1] = (char) ch;
1136
1137           value_from = charset_find_value (charset, &tmp[1], 1);
1138           if (value_from == ILLEGAL_CHAR_VALUE)
1139             {
1140               error (0, 0, _("\
1141 character `%c' not defined while needed as default value"),
1142                      tmp);
1143               continue;
1144             }
1145
1146           /* This conversion is implementation defined.  */
1147           tmp[1] = (char) (ch + ('A' - 'a'));
1148           value_to = charset_find_value (charset, &tmp[1], 1);
1149           if (value_to == -1)
1150             {
1151               error (0, 0, _("\
1152 character `%s' not defined while needed as default value"),
1153                      tmp);
1154               continue;
1155             }
1156
1157           /* The index [0] is determined by the order of the
1158              `ctype_map_newP' calls in `ctype_startup'.  */
1159           ELEM (ctype, map_collection, [0], value_from) = value_to;
1160         }
1161     }
1162
1163   if (ctype->tolower_done == 0)
1164     /* "If this keyword [tolower] is not specified, the mapping shall be
1165        the reverse mapping of the one specified to `toupper'."  [P1003.2]  */
1166     {
1167       size_t cnt;
1168
1169       for (cnt = 0; cnt < ctype->map_collection_act[0]; ++cnt)
1170         if (ctype->map_collection[0][cnt] != 0)
1171           ELEM (ctype, map_collection, [1],
1172                 ctype->map_collection[0][cnt])
1173             = ctype->charnames[cnt];
1174     }
1175 }
1176
1177
1178 static void
1179 allocate_arrays (struct locale_ctype_t *ctype, struct charset_t *charset)
1180 {
1181   size_t idx;
1182
1183   /* First we have to decide how we organize the arrays.  It is easy for
1184      a one-byte character set.  But multi-byte character set cannot be
1185      stored flat because they might be sparsly used.  So we determine an
1186      optimal hashing function for the used characters.
1187
1188      We use a very trivial hashing function to store the sparse table.
1189      CH % TABSIZE is used as an index.  To solve multiple hits we have
1190      N planes.  This gurantees a fixed search time for a character [N
1191      / 2].  In the following code we determine the minmum value for
1192      TABSIZE * N, where TABSIZE >= 256.  */
1193   size_t min_total = UINT_MAX;
1194   size_t act_size = 256;
1195
1196   fputs (_("\
1197 Computing table size for character classes might take a while..."),
1198          stderr);
1199
1200   while (act_size < min_total)
1201     {
1202       size_t cnt[act_size];
1203       size_t act_planes = 1;
1204
1205       memset (cnt, '\0', sizeof cnt);
1206
1207       for (idx = 0; idx < 256; ++idx)
1208         cnt[idx] = 1;
1209
1210       for (idx = 0; idx < ctype->charnames_act; ++idx)
1211         if (ctype->charnames[idx] >= 256)
1212           {
1213             size_t nr = ctype->charnames[idx] % act_size;
1214
1215             if (++cnt[nr] > act_planes)
1216               {
1217                 act_planes = cnt[nr];
1218                 if (act_size * act_planes >= min_total)
1219                   break;
1220               }
1221           }
1222
1223       if (act_size * act_planes < min_total)
1224         {
1225           min_total = act_size * act_planes;
1226           ctype->plane_size = act_size;
1227           ctype->plane_cnt = act_planes;
1228         }
1229
1230       ++act_size;
1231     }
1232
1233   fprintf (stderr, _(" done\n"));
1234
1235
1236 #if __BYTE_ORDER == __LITTLE_ENDIAN
1237 # define NAMES_B1 ctype->names_el
1238 # define NAMES_B2 ctype->names_eb
1239 #else
1240 # define NAMES_B1 ctype->names_eb
1241 # define NAMES_B2 ctype->names_el
1242 #endif
1243
1244   ctype->names_eb = (u_int32_t *) xcalloc (ctype->plane_size
1245                                            * ctype->plane_cnt,
1246                                            sizeof (u_int32_t));
1247   ctype->names_el = (u_int32_t *) xcalloc (ctype->plane_size
1248                                            * ctype->plane_cnt,
1249                                            sizeof (u_int32_t));
1250
1251   for (idx = 1; idx < 256; ++idx)
1252     NAMES_B1[idx] = idx;
1253
1254   /* Trick: change the 0th entry's name to 1 to mark the cell occupied.  */
1255   NAMES_B1[0] = 1;
1256
1257   for (idx = 256; idx < ctype->charnames_act; ++idx)
1258     {
1259       size_t nr = (ctype->charnames[idx] % ctype->plane_size);
1260       size_t depth = 0;
1261
1262       while (NAMES_B1[nr + depth * ctype->plane_size])
1263         ++depth;
1264       assert (depth < ctype->plane_cnt);
1265
1266       NAMES_B1[nr + depth * ctype->plane_size] = ctype->charnames[idx];
1267
1268       /* Now for faster access remember the index in the NAMES_B array.  */
1269       ctype->charnames[idx] = nr + depth * ctype->plane_size;
1270     }
1271   NAMES_B1[0] = 0;
1272
1273   for (idx = 0; idx < ctype->plane_size * ctype->plane_cnt; ++idx)
1274     NAMES_B2[idx] = SWAPU32 (NAMES_B1[idx]);
1275
1276
1277   /* You wonder about this amount of memory?  This is only because some
1278      users do not manage to address the array with unsigned values or
1279      data types with range >= 256.  '\200' would result in the array
1280      index -128.  To help these poor people we duplicate the entries for
1281      128 up to 255 below the entry for \0.  */
1282   ctype->ctype_b = (char_class_t *) xcalloc (256 + 128,
1283                                              sizeof (char_class_t));
1284   ctype->ctype32_b = (char_class32_t *) xcalloc (ctype->plane_size
1285                                                  * ctype->plane_cnt,
1286                                                  sizeof (char_class32_t));
1287
1288   /* Fill in the character class information.  */
1289 #if __BYTE_ORDER == __LITTLE_ENDIAN
1290 # define TRANS(w) CHAR_CLASS_TRANS (w)
1291 # define TRANS32(w) CHAR_CLASS32_TRANS (w)
1292 #else
1293 # define TRANS(w) (w)
1294 # define TRANS32(w) (w)
1295 #endif
1296
1297   for (idx = 0; idx < ctype->class_collection_act; ++idx)
1298     if (ctype->charnames[idx] < 256)
1299       ctype->ctype_b[128 + ctype->charnames[idx]]
1300         = TRANS (ctype->class_collection[idx]);
1301
1302   /* Mirror first 127 entries.  We must take care that entry -1 is not
1303      mirrored because EOF == -1.  */
1304   for (idx = 0; idx < 127; ++idx)
1305     ctype->ctype_b[idx] = ctype->ctype_b[256 + idx];
1306
1307   /* The 32 bit array contains all characters.  */
1308   for (idx = 0; idx < ctype->class_collection_act; ++idx)
1309     ctype->ctype32_b[ctype->charnames[idx]]
1310       = TRANS32 (ctype->class_collection[idx]);
1311
1312   /* Room for table of mappings.  */
1313   ctype->map_eb = (u_int32_t **) xmalloc (ctype->map_collection_nr
1314                                           * sizeof (u_int32_t *));
1315   ctype->map_el = (u_int32_t **) xmalloc (ctype->map_collection_nr
1316                                           * sizeof (u_int32_t *));
1317
1318   /* Fill in all mappings.  */
1319   for (idx = 0; idx < ctype->map_collection_nr; ++idx)
1320     {
1321       unsigned int idx2;
1322
1323       /* Allocate table.  */
1324       ctype->map_eb[idx] = (u_int32_t *) xmalloc ((ctype->plane_size
1325                                                    * ctype->plane_cnt + 128)
1326                                                   * sizeof (u_int32_t));
1327       ctype->map_el[idx] = (u_int32_t *) xmalloc ((ctype->plane_size
1328                                                    * ctype->plane_cnt + 128)
1329                                                   * sizeof (u_int32_t));
1330
1331 #if __BYTE_ORDER == __LITTLE_ENDIAN
1332 # define MAP_B1 ctype->map_el
1333 # define MAP_B2 ctype->map_eb
1334 #else
1335 # define MAP_B1 ctype->map_eb
1336 # define MAP_B2 ctype->map_el
1337 #endif
1338
1339       /* Copy default value (identity mapping).  */
1340       memcpy (&MAP_B1[idx][128], NAMES_B1,
1341               ctype->plane_size * ctype->plane_cnt * sizeof (u_int32_t));
1342
1343       /* Copy values from collection.  */
1344       for (idx2 = 0; idx2 < ctype->map_collection_act[idx]; ++idx2)
1345         if (ctype->map_collection[idx][idx2] != 0)
1346           MAP_B1[idx][128 + ctype->charnames[idx2]] =
1347             ctype->map_collection[idx][idx2];
1348
1349       /* Mirror first 127 entries.  We must take care not to map entry
1350          -1 because EOF == -1.  */
1351       for (idx2 = 0; idx2 < 127; ++idx2)
1352         MAP_B1[idx][idx2] = MAP_B1[idx][256 + idx2];
1353
1354       /* EOF must map to EOF.  */
1355       MAP_B1[idx][127] = EOF;
1356
1357       /* And now the other byte order.  */
1358       for (idx2 = 0; idx2 < ctype->plane_size * ctype->plane_cnt + 128; ++idx2)
1359         MAP_B2[idx][idx2] = SWAPU32 (MAP_B1[idx][idx2]);
1360     }
1361
1362   /* Extra array for class and map names.  */
1363   ctype->class_name_ptr = (u_int32_t *) xmalloc (ctype->nr_charclass
1364                                                  * sizeof (u_int32_t));
1365   ctype->map_name_ptr = (u_int32_t *) xmalloc (ctype->map_collection_nr
1366                                                * sizeof (u_int32_t));
1367
1368   /* Array for width information.  Because the expected width are very
1369      small we use only one single byte.  This save space and we need
1370      not provide the information twice with both endianesses.  */
1371   ctype->width = (unsigned char *) xmalloc (ctype->plane_size
1372                                             * ctype->plane_cnt);
1373   /* Initialize with default width value.  */
1374   memset (ctype->width, charset->width_default,
1375           ctype->plane_size * ctype->plane_cnt);
1376   if (charset->width_rules != NULL)
1377     {
1378       size_t cnt;
1379
1380       for (cnt = 0; cnt < charset->nwidth_rules; ++cnt)
1381         if (charset->width_rules[cnt].width != charset->width_default)
1382           for (idx = charset->width_rules[cnt].from;
1383                idx <= charset->width_rules[cnt].to; ++idx)
1384             {
1385               size_t nr = idx % ctype->plane_size;
1386               size_t depth = 0;
1387
1388               while (NAMES_B1[nr + depth * ctype->plane_size] != nr)
1389                 ++depth;
1390               assert (depth < ctype->plane_cnt);
1391
1392               ctype->width[nr + depth * ctype->plane_size]
1393                 = charset->width_rules[cnt].width;
1394             }
1395     }
1396
1397   /* Compute MB_CUR_MAX.  Please note the value mb_cur_max in the
1398      character set definition gives the number of bytes in the wide
1399      character representation.  We compute the number of bytes used
1400      for the UTF-8 encoded form.  */
1401   ctype->mb_cur_max = ((int []) { 2, 3, 5, 6 }) [charset->mb_cur_max - 1];
1402
1403   /* We need the name of the currently used 8-bit character set to
1404      make correct conversion between this 8-bit representation and the
1405      ISO 10646 character set used internally for wide characters.  */
1406   ctype->codeset_name = charset->code_set_name;
1407 }