locale/programs/ld-collate.c

   1 /* Copyright (C) 1995-1999, 2000, 2001 Free Software Foundation, Inc.
   2    This file is part of the GNU C Library.
   3    Contributed by Ulrich Drepper <drepper@gnu.org>, 1995.
   4
   5    The GNU C Library is free software; you can redistribute it and/or
   6    modify it under the terms of the GNU Lesser General Public
   7    License as published by the Free Software Foundation; either
   8    version 2.1 of the License, or (at your option) any later version.
   9
  10    The GNU C Library is distributed in the hope that it will be useful,
  11    but WITHOUT ANY WARRANTY; without even the implied warranty of
  12    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  13    Lesser General Public License for more details.
  14
  15    You should have received a copy of the GNU Lesser General Public
  16    License along with the GNU C Library; if not, write to the Free
  17    Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
  18    02111-1307 USA.  */
  19
  20 #ifdef HAVE_CONFIG_H
  21 # include <config.h>
  22 #endif
  23
  24 #include <errno.h>
  25 #include <error.h>
  26 #include <stdlib.h>
  27 #include <wchar.h>
  28 #include <sys/param.h>
  29
  30 #include "charmap.h"
  31 #include "localeinfo.h"
  32 #include "linereader.h"
  33 #include "locfile.h"
  34 #include "localedef.h"
  35 #include "elem-hash.h"
  36
  37 /* Uncomment the following line in the production version.  */
  38 /* #define NDEBUG 1 */
  39 #include <assert.h>
  40
  41 #define obstack_chunk_alloc malloc
  42 #define obstack_chunk_free free
  43
  44 static inline void
  45 obstack_int32_grow (struct obstack *obstack, int32_t data)
  46 {
  47   if (sizeof (int32_t) == sizeof (int))
  48     obstack_int_grow (obstack, data);
  49   else
  50     obstack_grow (obstack, &data, sizeof (int32_t));
  51 }
  52
  53 static inline void
  54 obstack_int32_grow_fast (struct obstack *obstack, int32_t data)
  55 {
  56   if (sizeof (int32_t) == sizeof (int))
  57     obstack_int_grow_fast (obstack, data);
  58   else
  59     obstack_grow (obstack, &data, sizeof (int32_t));
  60 }
  61
  62 /* Forward declaration.  */
  63 struct element_t;
  64
  65 /* Data type for list of strings.  */
  66 struct section_list
  67 {
  68   /* Successor in the known_sections list.  */
  69   struct section_list *def_next;
  70   /* Successor in the sections list.  */
  71   struct section_list *next;
  72   /* Name of the section.  */
  73   const char *name;
  74   /* First element of this section.  */
  75   struct element_t *first;
  76   /* Last element of this section.  */
  77   struct element_t *last;
  78   /* These are the rules for this section.  */
  79   enum coll_sort_rule *rules;
  80   /* Index of the rule set in the appropriate section of the output file.  */
  81   int ruleidx;
  82 };
  83
  84 struct element_t;
  85
  86 struct element_list_t
  87 {
  88   /* Number of elements.  */
  89   int cnt;
  90
  91   struct element_t **w;
  92 };
  93
  94 /* Data type for collating element.  */
  95 struct element_t
  96 {
  97   const char *name;
  98
  99   const char *mbs;
 100   size_t nmbs;
 101   const uint32_t *wcs;
 102   size_t nwcs;
 103   int *mborder;
 104   int wcorder;
 105
 106   /* The following is a bit mask which bits are set if this element is
 107      used in the appropriate level.  Interesting for the singlebyte
 108      weight computation.
 109
 110      XXX The type here restricts the number of levels to 32.  It could
 111      be changed if necessary but I doubt this is necessary.  */
 112   unsigned int used_in_level;
 113
 114   struct element_list_t *weights;
 115
 116   /* Nonzero if this is a real character definition.  */
 117   int is_character;
 118
 119   /* Order of the character in the sequence.  This information will
 120      be used in range expressions.  */
 121   int mbseqorder;
 122   int wcseqorder;
 123
 124   /* Where does the definition come from.  */
 125   const char *file;
 126   size_t line;
 127
 128   /* Which section does this belong to.  */
 129   struct section_list *section;
 130
 131   /* Predecessor and successor in the order list.  */
 132   struct element_t *last;
 133   struct element_t *next;
 134
 135   /* Next element in multibyte output list.  */
 136   struct element_t *mbnext;
 137   struct element_t *mblast;
 138
 139   /* Next element in wide character output list.  */
 140   struct element_t *wcnext;
 141   struct element_t *wclast;
 142 };
 143
 144 /* Special element value.  */
 145 #define ELEMENT_ELLIPSIS2       ((struct element_t *) 1)
 146 #define ELEMENT_ELLIPSIS3       ((struct element_t *) 2)
 147 #define ELEMENT_ELLIPSIS4       ((struct element_t *) 3)
 148
 149 /* Data type for collating symbol.  */
 150 struct symbol_t
 151 {
 152   const char *name;
 153
 154   /* Point to place in the order list.  */
 155   struct element_t *order;
 156
 157   /* Where does the definition come from.  */
 158   const char *file;
 159   size_t line;
 160 };
 161
 162 /* Sparse table of struct element_t *.  */
 163 #define TABLE wchead_table
 164 #define ELEMENT struct element_t *
 165 #define DEFAULT NULL
 166 #define ITERATE
 167 #define NO_FINALIZE
 168 #include "3level.h"
 169
 170 /* Sparse table of int32_t.  */
 171 #define TABLE collidx_table
 172 #define ELEMENT int32_t
 173 #define DEFAULT 0
 174 #include "3level.h"
 175
 176 /* Sparse table of uint32_t.  */
 177 #define TABLE collseq_table
 178 #define ELEMENT uint32_t
 179 #define DEFAULT ~((uint32_t) 0)
 180 #include "3level.h"
 181
 182
 183 /* The real definition of the struct for the LC_COLLATE locale.  */
 184 struct locale_collate_t
 185 {
 186   int col_weight_max;
 187   int cur_weight_max;
 188
 189   /* List of known scripts.  */
 190   struct section_list *known_sections;
 191   /* List of used sections.  */
 192   struct section_list *sections;
 193   /* Current section using definition.  */
 194   struct section_list *current_section;
 195   /* There always can be an unnamed section.  */
 196   struct section_list unnamed_section;
 197   /* To make handling of errors easier we have another section.  */
 198   struct section_list error_section;
 199   /* Sometimes we are defining the values for collating symbols before
 200      the first actual section.  */
 201   struct section_list symbol_section;
 202
 203   /* Start of the order list.  */
 204   struct element_t *start;
 205
 206   /* The undefined element.  */
 207   struct element_t undefined;
 208
 209   /* This is the cursor for `reorder_after' insertions.  */
 210   struct element_t *cursor;
 211
 212   /* This value is used when handling ellipsis.  */
 213   struct element_t ellipsis_weight;
 214
 215   /* Known collating elements.  */
 216   hash_table elem_table;
 217
 218   /* Known collating symbols.  */
 219   hash_table sym_table;
 220
 221   /* Known collation sequences.  */
 222   hash_table seq_table;
 223
 224   struct obstack mempool;
 225
 226   /* The LC_COLLATE category is a bit special as it is sometimes possible
 227      that the definitions from more than one input file contains information.
 228      Therefore we keep all relevant input in a list.  */
 229   struct locale_collate_t *next;
 230
 231   /* Arrays with heads of the list for each of the leading bytes in
 232      the multibyte sequences.  */
 233   struct element_t *mbheads[256];
 234
 235   /* Arrays with heads of the list for each of the leading bytes in
 236      the multibyte sequences.  */
 237   struct wchead_table wcheads;
 238
 239   /* The arrays with the collation sequence order.  */
 240   unsigned char mbseqorder[256];
 241   struct collseq_table wcseqorder;
 242 };
 243
 244
 245 /* We have a few global variables which are used for reading all
 246    LC_COLLATE category descriptions in all files.  */
 247 static uint32_t nrules;
 248
 249
 250 /* We need UTF-8 encoding of numbers.  */
 251 static inline int
 252 utf8_encode (char *buf, int val)
 253 {
 254   int retval;
 255
 256   if (val < 0x80)
 257     {
 258       *buf++ = (char) val;
 259       retval = 1;
 260     }
 261   else
 262     {
 263       int step;
 264
 265       for (step = 2; step < 6; ++step)
 266         if ((val & (~(uint32_t)0 << (5 * step + 1))) == 0)
 267           break;
 268       retval = step;
 269
 270       *buf = (unsigned char) (~0xff >> step);
 271       --step;
 272       do
 273         {
 274           buf[step] = 0x80 | (val & 0x3f);
 275           val >>= 6;
 276         }
 277       while (--step > 0);
 278       *buf |= val;
 279     }
 280
 281   return retval;
 282 }
 283
 284
 285 static struct section_list *
 286 make_seclist_elem (struct locale_collate_t *collate, const char *string,
 287                    struct section_list *next)
 288 {
 289   struct section_list *newp;
 290
 291   newp = (struct section_list *) obstack_alloc (&collate->mempool,
 292                                                 sizeof (*newp));
 293   newp->next = next;
 294   newp->name = string;
 295   newp->first = NULL;
 296   newp->last = NULL;
 297
 298   return newp;
 299 }
 300
 301
 302 static struct element_t *
 303 new_element (struct locale_collate_t *collate, const char *mbs, size_t mbslen,
 304              const uint32_t *wcs, const char *name, size_t namelen,
 305              int is_character)
 306 {
 307   struct element_t *newp;
 308
 309   newp = (struct element_t *) obstack_alloc (&collate->mempool,
 310                                              sizeof (*newp));
 311   newp->name = name == NULL ? NULL : obstack_copy0 (&collate->mempool,
 312                                                     name, namelen);
 313   if (mbs != NULL)
 314     {
 315       newp->mbs = obstack_copy0 (&collate->mempool, mbs, mbslen);
 316       newp->nmbs = mbslen;
 317     }
 318   else
 319     {
 320       newp->mbs = NULL;
 321       newp->nmbs = 0;
 322     }
 323   if (wcs != NULL)
 324     {
 325       size_t nwcs = wcslen ((wchar_t *) wcs);
 326       uint32_t zero = 0;
 327       obstack_grow (&collate->mempool, wcs, nwcs * sizeof (uint32_t));
 328       obstack_grow (&collate->mempool, &zero, sizeof (uint32_t));
 329       newp->wcs = (uint32_t *) obstack_finish (&collate->mempool);
 330       newp->nwcs = nwcs;
 331     }
 332   else
 333     {
 334       newp->wcs = NULL;
 335       newp->nwcs = 0;
 336     }
 337   newp->mborder = NULL;
 338   newp->wcorder = 0;
 339   newp->used_in_level = 0;
 340   newp->is_character = is_character;
 341
 342   /* Will be assigned later.  XXX  */
 343   newp->mbseqorder = 0;
 344   newp->wcseqorder = 0;
 345
 346   /* Will be allocated later.  */
 347   newp->weights = NULL;
 348
 349   newp->file = NULL;
 350   newp->line = 0;
 351
 352   newp->section = collate->current_section;
 353
 354   newp->last = NULL;
 355   newp->next = NULL;
 356
 357   newp->mbnext = NULL;
 358   newp->mblast = NULL;
 359
 360   newp->wcnext = NULL;
 361   newp->wclast = NULL;
 362
 363   return newp;
 364 }
 365
 366
 367 static struct symbol_t *
 368 new_symbol (struct locale_collate_t *collate, const char *name, size_t len)
 369 {
 370   struct symbol_t *newp;
 371
 372   newp = (struct symbol_t *) obstack_alloc (&collate->mempool, sizeof (*newp));
 373
 374   newp->name = obstack_copy0 (&collate->mempool, name, len);
 375   newp->order = NULL;
 376
 377   newp->file = NULL;
 378   newp->line = 0;
 379
 380   return newp;
 381 }
 382
 383
 384 /* Test whether this name is already defined somewhere.  */
 385 static int
 386 check_duplicate (struct linereader *ldfile, struct locale_collate_t *collate,
 387                  struct charmap_t *charmap, struct repertoire_t *repertoire,
 388                  const char *symbol, size_t symbol_len)
 389 {
 390   void *ignore = NULL;
 391
 392   if (find_entry (&charmap->char_table, symbol, symbol_len, &ignore) == 0)
 393     {
 394       lr_error (ldfile, _("`%.*s' already defined in charmap"),
 395                 (int) symbol_len, symbol);
 396       return 1;
 397     }
 398
 399   if (repertoire != NULL
 400       && (find_entry (&repertoire->char_table, symbol, symbol_len, &ignore)
 401           == 0))
 402     {
 403       lr_error (ldfile, _("`%.*s' already defined in repertoire"),
 404                 (int) symbol_len, symbol);
 405       return 1;
 406     }
 407
 408   if (find_entry (&collate->sym_table, symbol, symbol_len, &ignore) == 0)
 409     {
 410       lr_error (ldfile, _("`%.*s' already defined as collating symbol"),
 411                 (int) symbol_len, symbol);
 412       return 1;
 413     }
 414
 415   if (find_entry (&collate->elem_table, symbol, symbol_len, &ignore) == 0)
 416     {
 417       lr_error (ldfile, _("`%.*s' already defined as collating element"),
 418                 (int) symbol_len, symbol);
 419       return 1;
 420     }
 421
 422   return 0;
 423 }
 424
 425
 426 /* Read the direction specification.  */
 427 static void
 428 read_directions (struct linereader *ldfile, struct token *arg,
 429                  struct charmap_t *charmap, struct repertoire_t *repertoire,
 430                  struct locale_collate_t *collate)
 431 {
 432   int cnt = 0;
 433   int max = nrules ?: 10;
 434   enum coll_sort_rule *rules = calloc (max, sizeof (*rules));
 435   int warned = 0;
 436
 437   while (1)
 438     {
 439       int valid = 0;
 440
 441       if (arg->tok == tok_forward)
 442         {
 443           if (rules[cnt] & sort_backward)
 444             {
 445               if (! warned)
 446                 {
 447                   lr_error (ldfile, _("\
 448 %s: `forward' and `backward' are mutually excluding each other"),
 449                             "LC_COLLATE");
 450                   warned = 1;
 451                 }
 452             }
 453           else if (rules[cnt] & sort_forward)
 454             {
 455               if (! warned)
 456                 {
 457                   lr_error (ldfile, _("\
 458 %s: `%s' mentioned more than once in definition of weight %d"),
 459                             "LC_COLLATE", "forward", cnt + 1);
 460                 }
 461             }
 462           else
 463             rules[cnt] |= sort_forward;
 464
 465           valid = 1;
 466         }
 467       else if (arg->tok == tok_backward)
 468         {
 469           if (rules[cnt] & sort_forward)
 470             {
 471               if (! warned)
 472                 {
 473                   lr_error (ldfile, _("\
 474 %s: `forward' and `backward' are mutually excluding each other"),
 475                             "LC_COLLATE");
 476                   warned = 1;
 477                 }
 478             }
 479           else if (rules[cnt] & sort_backward)
 480             {
 481               if (! warned)
 482                 {
 483                   lr_error (ldfile, _("\
 484 %s: `%s' mentioned more than once in definition of weight %d"),
 485                             "LC_COLLATE", "backward", cnt + 1);
 486                 }
 487             }
 488           else
 489             rules[cnt] |= sort_backward;
 490
 491           valid = 1;
 492         }
 493       else if (arg->tok == tok_position)
 494         {
 495           if (rules[cnt] & sort_position)
 496             {
 497               if (! warned)
 498                 {
 499                   lr_error (ldfile, _("\
 500 %s: `%s' mentioned more than once in definition of weight %d"),
 501                             "LC_COLLATE", "position", cnt + 1);
 502                 }
 503             }
 504           else
 505             rules[cnt] |= sort_position;
 506
 507           valid = 1;
 508         }
 509
 510       if (valid)
 511         arg = lr_token (ldfile, charmap, repertoire, verbose);
 512
 513       if (arg->tok == tok_eof || arg->tok == tok_eol || arg->tok == tok_comma
 514           || arg->tok == tok_semicolon)
 515         {
 516           if (! valid && ! warned)
 517             {
 518               lr_error (ldfile, _("%s: syntax error"), "LC_COLLATE");
 519               warned = 1;
 520             }
 521
 522           /* See whether we have to increment the counter.  */
 523           if (arg->tok != tok_comma && rules[cnt] != 0)
 524             {
 525               /* Add the default `forward' if we have seen only `position'.  */
 526               if (rules[cnt] == sort_position)
 527                 rules[cnt] = sort_position | sort_forward;
 528
 529               ++cnt;
 530             }
 531
 532           if (arg->tok == tok_eof || arg->tok == tok_eol)
 533             /* End of line or file, so we exit the loop.  */
 534             break;
 535
 536           if (nrules == 0)
 537             {
 538               /* See whether we have enough room in the array.  */
 539               if (cnt == max)
 540                 {
 541                   max += 10;
 542                   rules = (enum coll_sort_rule *) xrealloc (rules,
 543                                                             max
 544                                                             * sizeof (*rules));
 545                   memset (&rules[cnt], '\0', (max - cnt) * sizeof (*rules));
 546                 }
 547             }
 548           else
 549             {
 550               if (cnt == nrules)
 551                 {
 552                   /* There must not be any more rule.  */
 553                   if (! warned)
 554                     {
 555                       lr_error (ldfile, _("\
 556 %s: too many rules; first entry only had %d"),
 557                                 "LC_COLLATE", nrules);
 558                       warned = 1;
 559                     }
 560
 561                   lr_ignore_rest (ldfile, 0);
 562                   break;
 563                 }
 564             }
 565         }
 566       else
 567         {
 568           if (! warned)
 569             {
 570               lr_error (ldfile, _("%s: syntax error"), "LC_COLLATE");
 571               warned = 1;
 572             }
 573         }
 574
 575       arg = lr_token (ldfile, charmap, repertoire, verbose);
 576     }
 577
 578   if (nrules == 0)
 579     {
 580       /* Now we know how many rules we have.  */
 581       nrules = cnt;
 582       rules = (enum coll_sort_rule *) xrealloc (rules,
 583                                                 nrules * sizeof (*rules));
 584     }
 585   else
 586     {
 587       if (cnt < nrules)
 588         {
 589           /* Not enough rules in this specification.  */
 590           if (! warned)
 591             lr_error (ldfile, _("%s: not enough sorting rules"), "LC_COLLATE");
 592
 593           do
 594             rules[cnt] = sort_forward;
 595           while (++cnt < nrules);
 596         }
 597     }
 598
 599   collate->current_section->rules = rules;
 600 }
 601
 602
 603 static struct element_t *
 604 find_element (struct linereader *ldfile, struct locale_collate_t *collate,
 605               const char *str, size_t len)
 606 {
 607   struct element_t *result = NULL;
 608
 609   /* Search for the entries among the collation sequences already define.  */
 610   if (find_entry (&collate->seq_table, str, len, (void **) &result) != 0)
 611     {
 612       /* Nope, not define yet.  So we see whether it is a
 613          collation symbol.  */
 614       void *ptr;
 615
 616       if (find_entry (&collate->sym_table, str, len, &ptr) == 0)
 617         {
 618           /* It's a collation symbol.  */
 619           struct symbol_t *sym = (struct symbol_t *) ptr;
 620           result = sym->order;
 621
 622           if (result == NULL)
 623             result = sym->order = new_element (collate, NULL, 0, NULL,
 624                                                NULL, 0, 0);
 625         }
 626       else if (find_entry (&collate->elem_table, str, len,
 627                            (void **) &result) != 0)
 628         {
 629           /* It's also no collation element.  So it is a character
 630              element defined later.  */
 631           result = new_element (collate, NULL, 0, NULL, str, len, 1);
 632           /* Insert it into the sequence table.  */
 633           insert_entry (&collate->seq_table, str, len, result);
 634         }
 635     }
 636
 637   return result;
 638 }
 639
 640
 641 static void
 642 unlink_element (struct locale_collate_t *collate)
 643 {
 644   if (collate->cursor == collate->start)
 645     {
 646       assert (collate->cursor->next == NULL);
 647       assert (collate->cursor->last == NULL);
 648       collate->cursor = NULL;
 649     }
 650   else
 651     {
 652       if (collate->cursor->next != NULL)
 653         collate->cursor->next->last = collate->cursor->last;
 654       if (collate->cursor->last != NULL)
 655         collate->cursor->last->next = collate->cursor->next;
 656       collate->cursor = collate->cursor->last;
 657     }
 658 }
 659
 660
 661 static void
 662 insert_weights (struct linereader *ldfile, struct element_t *elem,
 663                 struct charmap_t *charmap, struct repertoire_t *repertoire,
 664                 struct locale_collate_t *collate, enum token_t ellipsis)
 665 {
 666   int weight_cnt;
 667   struct token *arg;
 668
 669   /* Initialize all the fields.  */
 670   elem->file = ldfile->fname;
 671   elem->line = ldfile->lineno;
 672
 673   elem->last = collate->cursor;
 674   elem->next = collate->cursor ? collate->cursor->next : NULL;
 675   if (collate->cursor != NULL && collate->cursor->next != NULL)
 676     collate->cursor->next->last = elem;
 677   if (collate->cursor != NULL)
 678     collate->cursor->next = elem;
 679   if (collate->start == NULL)
 680     {
 681       assert (collate->cursor == NULL);
 682       collate->start = elem;
 683     }
 684
 685   elem->section = collate->current_section;
 686
 687   if (collate->current_section->first == NULL)
 688     collate->current_section->first = elem;
 689   if (collate->current_section->last == collate->cursor)
 690     collate->current_section->last = elem;
 691
 692   collate->cursor = elem;
 693
 694   elem->weights = (struct element_list_t *)
 695     obstack_alloc (&collate->mempool, nrules * sizeof (struct element_list_t));
 696   memset (elem->weights, '\0', nrules * sizeof (struct element_list_t));
 697
 698   weight_cnt = 0;
 699
 700   arg = lr_token (ldfile, charmap, repertoire, verbose);
 701   do
 702     {
 703       if (arg->tok == tok_eof || arg->tok == tok_eol)
 704         break;
 705
 706       if (arg->tok == tok_ignore)
 707         {
 708           /* The weight for this level has to be ignored.  We use the
 709              null pointer to indicate this.  */
 710           elem->weights[weight_cnt].w = (struct element_t **)
 711             obstack_alloc (&collate->mempool, sizeof (struct element_t *));
 712           elem->weights[weight_cnt].w[0] = NULL;
 713           elem->weights[weight_cnt].cnt = 1;
 714         }
 715       else if (arg->tok == tok_bsymbol || arg->tok == tok_ucs4)
 716         {
 717           char ucs4str[10];
 718           struct element_t *val;
 719           char *symstr;
 720           size_t symlen;
 721
 722           if (arg->tok == tok_bsymbol)
 723             {
 724               symstr = arg->val.str.startmb;
 725               symlen = arg->val.str.lenmb;
 726             }
 727           else
 728             {
 729               snprintf (ucs4str, sizeof (ucs4str), "U%08X", arg->val.ucs4);
 730               symstr = ucs4str;
 731               symlen = 9;
 732             }
 733
 734           val = find_element (ldfile, collate, symstr, symlen);
 735           if (val == NULL)
 736             break;
 737
 738           elem->weights[weight_cnt].w = (struct element_t **)
 739             obstack_alloc (&collate->mempool, sizeof (struct element_t *));
 740           elem->weights[weight_cnt].w[0] = val;
 741           elem->weights[weight_cnt].cnt = 1;
 742         }
 743       else if (arg->tok == tok_string)
 744         {
 745           /* Split the string up in the individual characters and put
 746              the element definitions in the list.  */
 747           const char *cp = arg->val.str.startmb;
 748           int cnt = 0;
 749           struct element_t *charelem;
 750           struct element_t **weights = NULL;
 751           int max = 0;
 752
 753           if (*cp == '\0')
 754             {
 755               lr_error (ldfile, _("%s: empty weight string not allowed"),
 756                         "LC_COLLATE");
 757               lr_ignore_rest (ldfile, 0);
 758               break;
 759             }
 760
 761           do
 762             {
 763               if (*cp == '<')
 764                 {
 765                   /* Ahh, it's a bsymbol or an UCS4 value.  If it's
 766                      the latter we have to unify the name.  */
 767                   const char *startp = ++cp;
 768                   size_t len;
 769
 770                   while (*cp != '>')
 771                     {
 772                       if (*cp == ldfile->escape_char)
 773                         ++cp;
 774                       if (*cp == '\0')
 775                         /* It's a syntax error.  */
 776                         goto syntax;
 777
 778                       ++cp;
 779                     }
 780
 781                   if (cp - startp == 5 && startp[0] == 'U'
 782                       && isxdigit (startp[1]) && isxdigit (startp[2])
 783                       && isxdigit (startp[3]) && isxdigit (startp[4]))
 784                     {
 785                       unsigned int ucs4 = strtoul (startp + 1, NULL, 16);
 786                       char *newstr;
 787
 788                       newstr = (char *) xmalloc (10);
 789                       snprintf (newstr, 10, "U%08X", ucs4);
 790                       startp = newstr;
 791
 792                       len = 9;
 793                     }
 794                   else
 795                     len = cp - startp;
 796
 797                   charelem = find_element (ldfile, collate, startp, len);
 798                   ++cp;
 799                 }
 800               else
 801                 {
 802                   /* People really shouldn't use characters directly in
 803                      the string.  Especially since it's not really clear
 804                      what this means.  We interpret all characters in the
 805                      string as if that would be bsymbols.  Otherwise we
 806                      would have to match back to bsymbols somehow and this
 807                      is normally not what people normally expect.  */
 808                   charelem = find_element (ldfile, collate, cp++, 1);
 809                 }
 810
 811               if (charelem == NULL)
 812                 {
 813                   /* We ignore the rest of the line.  */
 814                   lr_ignore_rest (ldfile, 0);
 815                   break;
 816                 }
 817
 818               /* Add the pointer.  */
 819               if (cnt >= max)
 820                 {
 821                   struct element_t **newp;
 822                   max += 10;
 823                   newp = (struct element_t **)
 824                     alloca (max * sizeof (struct element_t *));
 825                   memcpy (newp, weights, cnt * sizeof (struct element_t *));
 826                   weights = newp;
 827                 }
 828               weights[cnt++] = charelem;
 829             }
 830           while (*cp != '\0');
 831
 832           /* Now store the information.  */
 833           elem->weights[weight_cnt].w = (struct element_t **)
 834             obstack_alloc (&collate->mempool,
 835                            cnt * sizeof (struct element_t *));
 836           memcpy (elem->weights[weight_cnt].w, weights,
 837                   cnt * sizeof (struct element_t *));
 838           elem->weights[weight_cnt].cnt = cnt;
 839
 840           /* We don't need the string anymore.  */
 841           free (arg->val.str.startmb);
 842         }
 843       else if (ellipsis != tok_none
 844                && (arg->tok == tok_ellipsis2
 845                    || arg->tok == tok_ellipsis3
 846                    || arg->tok == tok_ellipsis4))
 847         {
 848           /* It must be the same ellipsis as used in the initial column.  */
 849           if (arg->tok != ellipsis)
 850             lr_error (ldfile, _("\
 851 %s: weights must use the same ellipsis symbol as the name"),
 852                       "LC_COLLATE");
 853
 854           /* The weight for this level will depend on the element
 855              iterating over the range.  Put a placeholder.  */
 856           elem->weights[weight_cnt].w = (struct element_t **)
 857             obstack_alloc (&collate->mempool, sizeof (struct element_t *));
 858           elem->weights[weight_cnt].w[0] = ELEMENT_ELLIPSIS2;
 859           elem->weights[weight_cnt].cnt = 1;
 860         }
 861       else
 862         {
 863         syntax:
 864           /* It's a syntax error.  */
 865           lr_error (ldfile, _("%s: syntax error"), "LC_COLLATE");
 866           lr_ignore_rest (ldfile, 0);
 867           break;
 868         }
 869
 870       arg = lr_token (ldfile, charmap, repertoire, verbose);
 871       /* This better should be the end of the line or a semicolon.  */
 872       if (arg->tok == tok_semicolon)
 873         /* OK, ignore this and read the next token.  */
 874         arg = lr_token (ldfile, charmap, repertoire, verbose);
 875       else if (arg->tok != tok_eof && arg->tok != tok_eol)
 876         {
 877           /* It's a syntax error.  */
 878           lr_error (ldfile, _("%s: syntax error"), "LC_COLLATE");
 879           lr_ignore_rest (ldfile, 0);
 880           break;
 881         }
 882     }
 883   while (++weight_cnt < nrules);
 884
 885   if (weight_cnt < nrules)
 886     {
 887       /* This means the rest of the line uses the current element as
 888          the weight.  */
 889       do
 890         {
 891           elem->weights[weight_cnt].w = (struct element_t **)
 892             obstack_alloc (&collate->mempool, sizeof (struct element_t *));
 893           if (ellipsis == tok_none)
 894             elem->weights[weight_cnt].w[0] = elem;
 895           else
 896             elem->weights[weight_cnt].w[0] = ELEMENT_ELLIPSIS2;
 897           elem->weights[weight_cnt].cnt = 1;
 898         }
 899       while (++weight_cnt < nrules);
 900     }
 901   else
 902     {
 903       if (arg->tok == tok_ignore || arg->tok == tok_bsymbol)
 904         {
 905           /* Too many rule values.  */
 906           lr_error (ldfile, _("%s: too many values"), "LC_COLLATE");
 907           lr_ignore_rest (ldfile, 0);
 908         }
 909       else
 910         lr_ignore_rest (ldfile, arg->tok != tok_eol && arg->tok != tok_eof);
 911     }
 912 }
 913
 914
 915 static int
 916 insert_value (struct linereader *ldfile, const char *symstr, size_t symlen,
 917               struct charmap_t *charmap, struct repertoire_t *repertoire,
 918               struct locale_collate_t *collate)
 919 {
 920   /* First find out what kind of symbol this is.  */
 921   struct charseq *seq;
 922   uint32_t wc;
 923   struct element_t *elem = NULL;
 924
 925   /* Try to find the character in the charmap.  */
 926   seq = charmap_find_value (charmap, symstr, symlen);
 927
 928   /* Determine the wide character.  */
 929   if (seq == NULL || seq->ucs4 == UNINITIALIZED_CHAR_VALUE)
 930     {
 931       wc = repertoire_find_value (repertoire, symstr, symlen);
 932       if (seq != NULL)
 933         seq->ucs4 = wc;
 934     }
 935   else
 936     wc = seq->ucs4;
 937
 938   if (wc == ILLEGAL_CHAR_VALUE && seq == NULL)
 939     {
 940       /* It's no character, so look through the collation elements and
 941          symbol list.  */
 942       void *result;
 943
 944       if (find_entry (&collate->sym_table, symstr, symlen, &result) == 0)
 945         {
 946           /* It's a collation symbol.  */
 947           struct symbol_t *sym = (struct symbol_t *) result;
 948           elem = sym->order;
 949
 950           if (elem == NULL)
 951             elem = sym->order = new_element (collate, NULL, 0, NULL,
 952                                              sym->name, strlen (sym->name), 0);
 953         }
 954       else if (find_entry (&collate->elem_table, symstr, symlen,
 955                            (void **) &elem) != 0)
 956         {
 957           /* It's also no collation element.  Therefore ignore it.  */
 958           lr_ignore_rest (ldfile, 0);
 959           return 1;
 960         }
 961     }
 962   else
 963     {
 964       /* Otherwise the symbols stands for a character.  */
 965       if (find_entry (&collate->seq_table, symstr, symlen,
 966                       (void **) &elem) != 0)
 967         {
 968           uint32_t wcs[2] = { wc, 0 };
 969
 970           /* We have to allocate an entry.  */
 971           elem = new_element (collate, seq != NULL ? seq->bytes : NULL,
 972                               seq != NULL ? seq->nbytes : 0,
 973                               wc == ILLEGAL_CHAR_VALUE ? NULL : wcs,
 974                               symstr, symlen, 1);
 975
 976           /* And add it to the table.  */
 977           if (insert_entry (&collate->seq_table, symstr, symlen, elem) != 0)
 978             /* This cannot happen.  */
 979             assert (! "Internal error");
 980         }
 981       else
 982         {
 983           /* Maybe the character was used before the definition.  In this case
 984              we have to insert the byte sequences now.  */
 985           if (elem->mbs == NULL && seq != NULL)
 986             {
 987               elem->mbs = obstack_copy0 (&collate->mempool,
 988                                          seq->bytes, seq->nbytes);
 989               elem->nmbs = seq->nbytes;
 990             }
 991
 992           if (elem->wcs == NULL && wc != ILLEGAL_CHAR_VALUE)
 993             {
 994               uint32_t wcs[2] = { wc, 0 };
 995
 996               elem->wcs = obstack_copy (&collate->mempool, wcs, sizeof (wcs));
 997               elem->nwcs = 1;
 998             }
 999         }
1000     }
1001
1002   /* Test whether this element is not already in the list.  */
1003   if (elem->next != NULL || elem == collate->cursor)
1004     {
1005       lr_error (ldfile, _("order for `%.*s' already defined at %s:%Zu"),
1006                 (int) symlen, symstr, elem->file, elem->line);
1007       lr_ignore_rest (ldfile, 0);
1008       return 1;
1009     }
1010
1011   insert_weights (ldfile, elem, charmap, repertoire, collate, tok_none);
1012
1013   return 0;
1014 }
1015
1016
1017 static void
1018 handle_ellipsis (struct linereader *ldfile, const char *symstr, size_t symlen,
1019                  enum token_t ellipsis, struct charmap_t *charmap,
1020                  struct repertoire_t *repertoire,
1021                  struct locale_collate_t *collate)
1022 {
1023   struct element_t *startp;
1024   struct element_t *endp;
1025
1026   /* Unlink the entry added for the ellipsis.  */
1027   unlink_element (collate);
1028   startp = collate->cursor;
1029
1030   /* Process and add the end-entry.  */
1031   if (symstr != NULL
1032       && insert_value (ldfile, symstr, symlen, charmap, repertoire, collate))
1033     /* Something went wrong with inserting the to-value.  This means
1034        we cannot process the ellipsis.  */
1035     return;
1036
1037   /* Reset the cursor.  */
1038   collate->cursor = startp;
1039
1040   /* Now we have to handle many different situations:
1041      - we have to distinguish between the three different ellipsis forms
1042      - the is the ellipsis at the beginning, in the middle, or at the end.
1043   */
1044   endp = collate->cursor->next;
1045   assert (symstr == NULL || endp != NULL);
1046
1047   /* XXX The following is probably very wrong since also collating symbols
1048      can appear in ranges.  But do we want/can refine the test for that?  */
1049 #if 0
1050   /* Both, the start and the end symbol, must stand for characters.  */
1051   if ((startp != NULL && (startp->name == NULL || ! startp->is_character))
1052       || (endp != NULL && (endp->name == NULL|| ! endp->is_character)))
1053     {
1054       lr_error (ldfile, _("\
1055 %s: the start and the end symbol of a range must stand for characters"),
1056                 "LC_COLLATE");
1057       return;
1058     }
1059 #endif
1060
1061   if (ellipsis == tok_ellipsis3)
1062     {
1063       /* One requirement we make here: the length of the byte
1064          sequences for the first and end character must be the same.
1065          This is mainly to prevent unwanted effects and this is often
1066          not what is wanted.  */
1067       size_t len = (startp->mbs != NULL ? startp->nmbs
1068                     : (endp->mbs != NULL ? endp->nmbs : 0));
1069       char mbcnt[len + 1];
1070       char mbend[len + 1];
1071
1072       /* Well, this should be caught somewhere else already.  Just to
1073          make sure.  */
1074       assert (startp == NULL || startp->wcs == NULL || startp->wcs[1] == 0);
1075       assert (endp == NULL || endp->wcs == NULL || endp->wcs[1] == 0);
1076
1077       if (startp != NULL && endp != NULL
1078           && startp->mbs != NULL && endp->mbs != NULL
1079           && startp->nmbs != endp->nmbs)
1080         {
1081           lr_error (ldfile, _("\
1082 %s: byte sequences of first and last character must have the same length"),
1083                     "LC_COLLATE");
1084           return;
1085         }
1086
1087       /* Determine whether we have to generate multibyte sequences.  */
1088       if ((startp == NULL || startp->mbs != NULL)
1089           && (endp == NULL || endp->mbs != NULL))
1090         {
1091           int cnt;
1092           int ret;
1093
1094           /* Prepare the beginning byte sequence.  This is either from the
1095              beginning byte sequence or it is all nulls if it was an
1096              initial ellipsis.  */
1097           if (startp == NULL || startp->mbs == NULL)
1098             memset (mbcnt, '\0', len);
1099           else
1100             {
1101               memcpy (mbcnt, startp->mbs, len);
1102
1103               /* And increment it so that the value is the first one we will
1104                  try to insert.  */
1105               for (cnt = len - 1; cnt >= 0; --cnt)
1106                 if (++mbcnt[cnt] != '\0')
1107                   break;
1108             }
1109           mbcnt[len] = '\0';
1110
1111           /* And the end sequence.  */
1112           if (endp == NULL || endp->mbs == NULL)
1113             memset (mbend, '\0', len);
1114           else
1115             memcpy (mbend, endp->mbs, len);
1116           mbend[len] = '\0';
1117
1118           /* Test whether we have a correct range.  */
1119           ret = memcmp (mbcnt, mbend, len);
1120           if (ret >= 0)
1121             {
1122               if (ret > 0)
1123                 lr_error (ldfile, _("%s: byte sequence of first character of \
1124 sequence is not lower than that of the last character"), "LC_COLLATE");
1125               return;
1126             }
1127
1128           /* Generate the byte sequences data.  */
1129           while (1)
1130             {
1131               struct charseq *seq;
1132
1133               /* Quite a bit of work ahead.  We have to find the character
1134                  definition for the byte sequence and then determine the
1135                  wide character belonging to it.  */
1136               seq = charmap_find_symbol (charmap, mbcnt, len);
1137               if (seq != NULL)
1138                 {
1139                   struct element_t *elem;
1140                   size_t namelen;
1141
1142                   /* I don't this this can ever happen.  */
1143                   assert (seq->name != NULL);
1144                   namelen = strlen (seq->name);
1145
1146                   if (seq->ucs4 == UNINITIALIZED_CHAR_VALUE)
1147                     seq->ucs4 = repertoire_find_value (repertoire, seq->name,
1148                                                        namelen);
1149
1150                   /* Now we are ready to insert the new value in the
1151                      sequence.  Find out whether the element is
1152                      already known.  */
1153                   if (find_entry (&collate->seq_table, seq->name, namelen,
1154                                   (void **) &elem) != 0)
1155                     {
1156                       uint32_t wcs[2] = { seq->ucs4, 0 };
1157
1158                       /* We have to allocate an entry.  */
1159                       elem = new_element (collate, mbcnt, len,
1160                                           seq->ucs4 == ILLEGAL_CHAR_VALUE
1161                                           ? NULL : wcs, seq->name,
1162                                           namelen, 1);
1163
1164                       /* And add it to the table.  */
1165                       if (insert_entry (&collate->seq_table, seq->name,
1166                                         namelen, elem) != 0)
1167                         /* This cannot happen.  */
1168                         assert (! "Internal error");
1169                     }
1170
1171                   /* Test whether this element is not already in the list.  */
1172                   if (elem->next != NULL || (collate->cursor != NULL
1173                                              && elem->next == collate->cursor))
1174                     {
1175                       lr_error (ldfile, _("\
1176 order for `%.*s' already defined at %s:%Zu"),
1177                                 (int) namelen, seq->name,
1178                                 elem->file, elem->line);
1179                       goto increment;
1180                     }
1181
1182                   /* Enqueue the new element.  */
1183                   elem->last = collate->cursor;
1184                   if (collate->cursor == NULL)
1185                     elem->next = NULL;
1186                   else
1187                     {
1188                       elem->next = collate->cursor->next;
1189                       elem->last->next = elem;
1190                       if (elem->next != NULL)
1191                         elem->next->last = elem;
1192                     }
1193                   if (collate->start == NULL)
1194                     {
1195                       assert (collate->cursor == NULL);
1196                       collate->start = elem;
1197                     }
1198                   collate->cursor = elem;
1199
1200                  /* Add the weight value.  We take them from the
1201                     `ellipsis_weights' member of `collate'.  */
1202                   elem->weights = (struct element_list_t *)
1203                     obstack_alloc (&collate->mempool,
1204                                    nrules * sizeof (struct element_list_t));
1205                   for (cnt = 0; cnt < nrules; ++cnt)
1206                     if (collate->ellipsis_weight.weights[cnt].cnt == 1
1207                         && (collate->ellipsis_weight.weights[cnt].w[0]
1208                             == ELEMENT_ELLIPSIS2))
1209                       {
1210                         elem->weights[cnt].w = (struct element_t **)
1211                           obstack_alloc (&collate->mempool,
1212                                          sizeof (struct element_t *));
1213                         elem->weights[cnt].w[0] = elem;
1214                         elem->weights[cnt].cnt = 1;
1215                       }
1216                     else
1217                       {
1218                         /* Simply use the weight from `ellipsis_weight'.  */
1219                         elem->weights[cnt].w =
1220                           collate->ellipsis_weight.weights[cnt].w;
1221                         elem->weights[cnt].cnt =
1222                           collate->ellipsis_weight.weights[cnt].cnt;
1223                       }
1224                 }
1225
1226               /* Increment for the next round.  */
1227             increment:
1228               for (cnt = len - 1; cnt >= 0; --cnt)
1229                 if (++mbcnt[cnt] != '\0')
1230                   break;
1231
1232               /* Find out whether this was all.  */
1233               if (cnt < 0 || memcmp (mbcnt, mbend, len) >= 0)
1234                 /* Yep, that's all.  */
1235                 break;
1236             }
1237         }
1238     }
1239   else
1240     {
1241       /* For symbolic range we naturally must have a beginning and an
1242          end specified by the user.  */
1243       if (startp == NULL)
1244         lr_error (ldfile, _("\
1245 %s: symbolic range ellipsis must not directly follow `order_start'"),
1246                   "LC_COLLATE");
1247       else if (endp == NULL)
1248         lr_error (ldfile, _("\
1249 %s: symbolic range ellipsis must not be directly followed by `order_end'"),
1250                   "LC_COLLATE");
1251       else
1252         {
1253           /* Determine the range.  To do so we have to determine the
1254              common prefix of the both names and then the numeric
1255              values of both ends.  */
1256           size_t lenfrom = strlen (startp->name);
1257           size_t lento = strlen (endp->name);
1258           char buf[lento + 1];
1259           int preflen = 0;
1260           long int from;
1261           long int to;
1262           char *cp;
1263           int base = ellipsis == tok_ellipsis2 ? 16 : 10;
1264
1265           if (lenfrom != lento)
1266             {
1267             invalid_range:
1268               lr_error (ldfile, _("\
1269 `%s' and `%.*s' are no valid names for symbolic range"),
1270                         startp->name, (int) lento, endp->name);
1271               return;
1272             }
1273
1274           while (startp->name[preflen] == endp->name[preflen])
1275             if (startp->name[preflen] == '\0')
1276               /* Nothing to be done.  The start and end point are identical
1277                  and while inserting the end point we have already given
1278                  the user an error message.  */
1279               return;
1280             else
1281               ++preflen;
1282
1283           errno = 0;
1284           from = strtol (startp->name + preflen, &cp, base);
1285           if ((from == UINT_MAX && errno == ERANGE) || *cp != '\0')
1286             goto invalid_range;
1287
1288           errno = 0;
1289           to = strtol (endp->name + preflen, &cp, base);
1290           if ((to == UINT_MAX && errno == ERANGE) || *cp != '\0')
1291             goto invalid_range;
1292
1293           /* Copy the prefix.  */
1294           memcpy (buf, startp->name, preflen);
1295
1296           /* Loop over all values.  */
1297           for (++from; from < to; ++from)
1298             {
1299               struct element_t *elem = NULL;
1300               struct charseq *seq;
1301               uint32_t wc;
1302               int cnt;
1303
1304               /* Generate the the name.  */
1305               sprintf (buf + preflen, base == 10 ? "%ld" : "%lx", from);
1306
1307               /* Look whether this name is already defined.  */
1308               if (find_entry (&collate->seq_table, buf, symlen,
1309                               (void **) &elem) == 0)
1310                 {
1311                   if (elem->next != NULL || (collate->cursor != NULL
1312                                              && elem->next == collate->cursor))
1313                     {
1314                       lr_error (ldfile, _("\
1315 %s: order for `%.*s' already defined at %s:%Zu"),
1316                                 "LC_COLLATE", (int) lenfrom, buf,
1317                                 elem->file, elem->line);
1318                       continue;
1319                     }
1320
1321                   if (elem->name == NULL)
1322                     {
1323                       lr_error (ldfile, _("%s: `%s' must be a character"),
1324                                 "LC_COLLATE", buf);
1325                       continue;
1326                     }
1327                 }
1328
1329               if (elem == NULL || (elem->mbs == NULL && elem->wcs == NULL))
1330                 {
1331                   /* Search for a character of this name.  */
1332                   seq = charmap_find_value (charmap, buf, lenfrom);
1333                   if (seq == NULL || seq->ucs4 == UNINITIALIZED_CHAR_VALUE)
1334                     {
1335                       wc = repertoire_find_value (repertoire, buf, lenfrom);
1336
1337                       if (seq != NULL)
1338                         seq->ucs4 = wc;
1339                     }
1340                   else
1341                     wc = seq->ucs4;
1342
1343                   if (wc == ILLEGAL_CHAR_VALUE && seq == NULL)
1344                     /* We don't know anything about a character with this
1345                        name.  XXX Should we warn?  */
1346                     continue;
1347
1348                   if (elem == NULL)
1349                     {
1350                       uint32_t wcs[2] = { wc, 0 };
1351
1352                       /* We have to allocate an entry.  */
1353                       elem = new_element (collate,
1354                                           seq != NULL ? seq->bytes : NULL,
1355                                           seq != NULL ? seq->nbytes : 0,
1356                                           wc == ILLEGAL_CHAR_VALUE
1357                                           ? NULL : wcs, buf, lenfrom, 1);
1358                     }
1359                   else
1360                     {
1361                       /* Update the element.  */
1362                       if (seq != NULL)
1363                         {
1364                           elem->mbs = obstack_copy0 (&collate->mempool,
1365                                                      seq->bytes, seq->nbytes);
1366                           elem->nmbs = seq->nbytes;
1367                         }
1368
1369                       if (wc != ILLEGAL_CHAR_VALUE)
1370                         {
1371                           uint32_t zero = 0;
1372
1373                           obstack_grow (&collate->mempool,
1374                                         &wc, sizeof (uint32_t));
1375                           obstack_grow (&collate->mempool,
1376                                         &zero, sizeof (uint32_t));
1377                           elem->wcs = obstack_finish (&collate->mempool);
1378                           elem->nwcs = 1;
1379                         }
1380                     }
1381
1382                   elem->file = ldfile->fname;
1383                   elem->line = ldfile->lineno;
1384                   elem->section = collate->current_section;
1385                 }
1386
1387               /* Enqueue the new element.  */
1388               elem->last = collate->cursor;
1389               elem->next = collate->cursor->next;
1390               elem->last->next = elem;
1391               if (elem->next != NULL)
1392                 elem->next->last = elem;
1393               collate->cursor = elem;
1394
1395               /* Now add the weights.  They come from the `ellipsis_weights'
1396                  member of `collate'.  */
1397               elem->weights = (struct element_list_t *)
1398                 obstack_alloc (&collate->mempool,
1399                                nrules * sizeof (struct element_list_t));
1400               for (cnt = 0; cnt < nrules; ++cnt)
1401                 if (collate->ellipsis_weight.weights[cnt].cnt == 1
1402                     && (collate->ellipsis_weight.weights[cnt].w[0]
1403                         == ELEMENT_ELLIPSIS2))
1404                   {
1405                     elem->weights[cnt].w = (struct element_t **)
1406                       obstack_alloc (&collate->mempool,
1407                                      sizeof (struct element_t *));
1408                     elem->weights[cnt].w[0] = elem;
1409                     elem->weights[cnt].cnt = 1;
1410                   }
1411                 else
1412                   {
1413                     /* Simly use the weight from `ellipsis_weight'.  */
1414                     elem->weights[cnt].w =
1415                       collate->ellipsis_weight.weights[cnt].w;
1416                     elem->weights[cnt].cnt =
1417                       collate->ellipsis_weight.weights[cnt].cnt;
1418                   }
1419             }
1420         }
1421     }
1422 }
1423
1424
1425 static void
1426 collate_startup (struct linereader *ldfile, struct localedef_t *locale,
1427                  struct localedef_t *copy_locale, int ignore_content)
1428 {
1429   if (!ignore_content && locale->categories[LC_COLLATE].collate == NULL)
1430     {
1431       struct locale_collate_t *collate;
1432
1433       if (copy_locale == NULL)
1434         {
1435           collate = locale->categories[LC_COLLATE].collate =
1436             (struct locale_collate_t *)
1437             xcalloc (1, sizeof (struct locale_collate_t));
1438
1439           /* Init the various data structures.  */
1440           init_hash (&collate->elem_table, 100);
1441           init_hash (&collate->sym_table, 100);
1442           init_hash (&collate->seq_table, 500);
1443           obstack_init (&collate->mempool);
1444
1445           collate->col_weight_max = -1;
1446         }
1447       else
1448         /* Reuse the copy_locale's data structures.  */
1449         collate = locale->categories[LC_COLLATE].collate =
1450           copy_locale->categories[LC_COLLATE].collate;
1451     }
1452
1453   ldfile->translate_strings = 0;
1454   ldfile->return_widestr = 0;
1455 }
1456
1457
1458 void
1459 collate_finish (struct localedef_t *locale, struct charmap_t *charmap)
1460 {
1461   /* Now is the time when we can assign the individual collation
1462      values for all the symbols.  We have possibly different values
1463      for the wide- and the multibyte-character symbols.  This is done
1464      since it might make a difference in the encoding if there is in
1465      some cases no multibyte-character but there are wide-characters.
1466      (The other way around it is not important since theencoded
1467      collation value in the wide-character case is 32 bits wide and
1468      therefore requires no encoding).
1469
1470      The lowest collation value assigned is 2.  Zero is reserved for
1471      the NUL byte terminating the strings in the `strxfrm'/`wcsxfrm'
1472      functions and 1 is used to separate the individual passes for the
1473      different rules.
1474
1475      We also have to construct is list with all the bytes/words which
1476      can come first in a sequence, followed by all the elements which
1477      also start with this byte/word.  The order is reverse which has
1478      among others the important effect that longer strings are located
1479      first in the list.  This is required for the output data since
1480      the algorithm used in `strcoll' etc depends on this.
1481
1482      The multibyte case is easy.  We simply sort into an array with
1483      256 elements.  */
1484   struct locale_collate_t *collate = locale->categories[LC_COLLATE].collate;
1485   int mbact[nrules];
1486   int wcact;
1487   int mbseqact;
1488   int wcseqact;
1489   struct element_t *runp;
1490   int i;
1491   int need_undefined = 0;
1492   struct section_list *sect;
1493   int ruleidx;
1494   int nr_wide_elems = 0;
1495
1496   if (collate == NULL)
1497     {
1498       /* No data, no check.  */
1499       if (! be_quiet)
1500         error (0, 0, _("No definition for %s category found"), "LC_COLLATE");
1501       return;
1502     }
1503
1504   /* If this assertion is hit change the type in `element_t'.  */
1505   assert (nrules <= sizeof (runp->used_in_level) * 8);
1506
1507   /* Make sure that the `position' rule is used either in all sections
1508      or in none.  */
1509   for (i = 0; i < nrules; ++i)
1510     for (sect = collate->sections; sect != NULL; sect = sect->next)
1511       if (sect->rules != NULL
1512           && ((sect->rules[i] & sort_position)
1513               != (collate->sections->rules[i] & sort_position)))
1514         {
1515           error (0, 0, _("\
1516 %s: `position' must be used for a specific level in all sections or none"),
1517                  "LC_COLLATE");
1518           break;
1519         }
1520
1521   /* Find out which elements are used at which level.  At the same
1522      time we find out whether we have any undefined symbols.  */
1523   runp = collate->start;
1524   while (runp != NULL)
1525     {
1526       if (runp->mbs != NULL)
1527         {
1528           for (i = 0; i < nrules; ++i)
1529             {
1530               int j;
1531
1532               for (j = 0; j < runp->weights[i].cnt; ++j)
1533                 /* A NULL pointer as the weight means IGNORE.  */
1534                 if (runp->weights[i].w[j] != NULL)
1535                   {
1536                     if (runp->weights[i].w[j]->weights == NULL)
1537                       {
1538                         error_at_line (0, 0, runp->file, runp->line,
1539                                        _("symbol `%s' not defined"),
1540                                        runp->weights[i].w[j]->name);
1541
1542                         need_undefined = 1;
1543                         runp->weights[i].w[j] = &collate->undefined;
1544                       }
1545                     else
1546                       /* Set the bit for the level.  */
1547                       runp->weights[i].w[j]->used_in_level |= 1 << i;
1548                   }
1549             }
1550         }
1551
1552       /* Up to the next entry.  */
1553       runp = runp->next;
1554     }
1555
1556   /* Walk through the list of defined sequences and assign weights.  Also
1557      create the data structure which will allow generating the single byte
1558      character based tables.
1559
1560      Since at each time only the weights for each of the rules are
1561      only compared to other weights for this rule it is possible to
1562      assign more compact weight values than simply counting all
1563      weights in sequence.  We can assign weights from 3, one for each
1564      rule individually and only for those elements, which are actually
1565      used for this rule.
1566
1567      Why is this important?  It is not for the wide char table.  But
1568      it is for the singlebyte output since here larger numbers have to
1569      be encoded to make it possible to emit the value as a byte
1570      string.  */
1571   for (i = 0; i < nrules; ++i)
1572     mbact[i] = 2;
1573   wcact = 2;
1574   mbseqact = 0;
1575   wcseqact = 0;
1576   runp = collate->start;
1577   while (runp != NULL)
1578     {
1579       /* Determine the order.  */
1580       if (runp->used_in_level != 0)
1581         {
1582           runp->mborder = (int *) obstack_alloc (&collate->mempool,
1583                                                  nrules * sizeof (int));
1584
1585           for (i = 0; i < nrules; ++i)
1586             if ((runp->used_in_level & (1 << i)) != 0)
1587               runp->mborder[i] = mbact[i]++;
1588             else
1589               runp->mborder[i] = 0;
1590         }
1591
1592       if (runp->mbs != NULL)
1593         {
1594           struct element_t **eptr;
1595           struct element_t *lastp = NULL;
1596
1597           /* Find the point where to insert in the list.  */
1598           eptr = &collate->mbheads[((unsigned char *) runp->mbs)[0]];
1599           while (*eptr != NULL)
1600             {
1601               if ((*eptr)->nmbs < runp->nmbs)
1602                 break;
1603
1604               if ((*eptr)->nmbs == runp->nmbs)
1605                 {
1606                   int c = memcmp ((*eptr)->mbs, runp->mbs, runp->nmbs);
1607
1608                   if (c == 0)
1609                     {
1610                       /* This should not happen.  It means that we have
1611                          to symbols with the same byte sequence.  It is
1612                          of course an error.  */
1613                       error_at_line (0, 0, (*eptr)->file, (*eptr)->line,
1614                                      _("symbol `%s' has the same encoding as"),
1615                                      (*eptr)->name);
1616                       error_at_line (0, 0, runp->file, runp->line,
1617                                      _("symbol `%s'"), runp->name);
1618                       goto dont_insert;
1619                     }
1620                   else if (c < 0)
1621                     /* Insert it here.  */
1622                     break;
1623                 }
1624
1625               /* To the next entry.  */
1626               lastp = *eptr;
1627               eptr = &(*eptr)->mbnext;
1628             }
1629
1630           /* Set the pointers.  */
1631           runp->mbnext = *eptr;
1632           runp->mblast = lastp;
1633           if (*eptr != NULL)
1634             (*eptr)->mblast = runp;
1635           *eptr = runp;
1636         dont_insert:
1637           ;
1638         }
1639
1640       if (runp->used_in_level)
1641         {
1642           runp->wcorder = wcact++;
1643
1644           /* We take the opportunity to count the elements which have
1645              wide characters.  */
1646           ++nr_wide_elems;
1647         }
1648
1649       if (runp->is_character)
1650         {
1651           if (runp->nmbs == 1)
1652             collate->mbseqorder[((unsigned char *) runp->mbs)[0]] = mbseqact++;
1653
1654           runp->wcseqorder = wcseqact++;
1655         }
1656
1657       /* Up to the next entry.  */
1658       runp = runp->next;
1659     }
1660
1661   /* Find out whether any of the `mbheads' entries is unset.  In this
1662      case we use the UNDEFINED entry.  */
1663   for (i = 1; i < 256; ++i)
1664     if (collate->mbheads[i] == NULL)
1665       {
1666         need_undefined = 1;
1667         collate->mbheads[i] = &collate->undefined;
1668       }
1669
1670   /* Now to the wide character case.  */
1671   collate->wcheads.p = 6;
1672   collate->wcheads.q = 10;
1673   wchead_table_init (&collate->wcheads);
1674
1675   collate->wcseqorder.p = 6;
1676   collate->wcseqorder.q = 10;
1677   collseq_table_init (&collate->wcseqorder);
1678
1679   /* Start adding.  */
1680   runp = collate->start;
1681   while (runp != NULL)
1682     {
1683       if (runp->wcs != NULL)
1684         {
1685           struct element_t *e;
1686           struct element_t **eptr;
1687           struct element_t *lastp;
1688
1689           /* Insert the collation sequence value.  */
1690           collseq_table_add (&collate->wcseqorder, runp->wcs[0],
1691                              runp->wcseqorder);
1692
1693           /* Find the point where to insert in the list.  */
1694           e = wchead_table_get (&collate->wcheads, runp->wcs[0]);
1695           eptr = &e;
1696           lastp = NULL;
1697           while (*eptr != NULL)
1698             {
1699               if ((*eptr)->nwcs < runp->nwcs)
1700                 break;
1701
1702               if ((*eptr)->nwcs == runp->nwcs)
1703                 {
1704                   int c = wmemcmp ((wchar_t *) (*eptr)->wcs,
1705                                    (wchar_t *) runp->wcs, runp->nwcs);
1706
1707                   if (c == 0)
1708                     {
1709                       /* This should not happen.  It means that we have
1710                          two symbols with the same byte sequence.  It is
1711                          of course an error.  */
1712                       error_at_line (0, 0, (*eptr)->file, (*eptr)->line,
1713                                      _("symbol `%s' has the same encoding as"),
1714                                      (*eptr)->name);
1715                       error_at_line (0, 0, runp->file, runp->line,
1716                                      _("symbol `%s'"), runp->name);
1717                       goto dont_insertwc;
1718                     }
1719                   else if (c < 0)
1720                     /* Insert it here.  */
1721                     break;
1722                 }
1723
1724               /* To the next entry.  */
1725               lastp = *eptr;
1726               eptr = &(*eptr)->wcnext;
1727             }
1728
1729           /* Set the pointers.  */
1730           runp->wcnext = *eptr;
1731           runp->wclast = lastp;
1732           if (*eptr != NULL)
1733             (*eptr)->wclast = runp;
1734           *eptr = runp;
1735           if (eptr == &e)
1736             wchead_table_add (&collate->wcheads, runp->wcs[0], e);
1737         dont_insertwc:
1738           ;
1739         }
1740
1741       /* Up to the next entry.  */
1742       runp = runp->next;
1743     }
1744
1745   collseq_table_finalize (&collate->wcseqorder);
1746
1747   /* Now determine whether the UNDEFINED entry is needed and if yes,
1748      whether it was defined.  */
1749   collate->undefined.used_in_level = need_undefined ? ~0ul : 0;
1750   if (collate->undefined.file == NULL)
1751     {
1752       if (need_undefined)
1753         {
1754           /* This seems not to be enforced by recent standards.  Don't
1755              emit an error, simply append UNDEFINED at the end.  */
1756           if (0)
1757             error (0, 0, _("no definition of `UNDEFINED'"));
1758
1759           /* Add UNDEFINED at the end.  */
1760           collate->undefined.mborder =
1761             (int *) obstack_alloc (&collate->mempool, nrules * sizeof (int));
1762
1763           for (i = 0; i < nrules; ++i)
1764             collate->undefined.mborder[i] = mbact[i]++;
1765         }
1766
1767       /* In any case we will need the definition for the wide character
1768          case.  But we will not complain that it is missing since the
1769          specification strangely enough does not seem to account for
1770          this.  */
1771       collate->undefined.wcorder = wcact++;
1772     }
1773
1774   /* Finally, try to unify the rules for the sections.  Whenever the rules
1775      for a section are the same as those for another section give the
1776      ruleset the same index.  Since there are never many section we can
1777      use an O(n^2) algorithm here.  */
1778   sect = collate->sections;
1779   while (sect != NULL && sect->rules == NULL)
1780     sect = sect->next;
1781   assert (sect != NULL);
1782   ruleidx = 0;
1783   do
1784     {
1785       struct section_list *osect = collate->sections;
1786
1787       while (osect != sect)
1788         if (osect->rules != NULL
1789             && memcmp (osect->rules, sect->rules, nrules) == 0)
1790           break;
1791         else
1792           osect = osect->next;
1793
1794       if (osect == sect)
1795         sect->ruleidx = ruleidx++;
1796       else
1797         sect->ruleidx = osect->ruleidx;
1798
1799       /* Next section.  */
1800       do
1801         sect = sect->next;
1802       while (sect != NULL && sect->rules == NULL);
1803     }
1804   while (sect != NULL);
1805   /* We are currently not prepared for more than 128 rulesets.  But this
1806      should never really be a problem.  */
1807   assert (ruleidx <= 128);
1808 }
1809
1810
1811 static int32_t
1812 output_weight (struct obstack *pool, struct locale_collate_t *collate,
1813                struct element_t *elem)
1814 {
1815   size_t cnt;
1816   int32_t retval;
1817
1818   /* Optimize the use of UNDEFINED.  */
1819   if (elem == &collate->undefined)
1820     /* The weights are already inserted.  */
1821     return 0;
1822
1823   /* This byte can start exactly one collation element and this is
1824      a single byte.  We can directly give the index to the weights.  */
1825   retval = obstack_object_size (pool);
1826
1827   /* Construct the weight.  */
1828   for (cnt = 0; cnt < nrules; ++cnt)
1829     {
1830       char buf[elem->weights[cnt].cnt * 7];
1831       int len = 0;
1832       int i;
1833
1834       for (i = 0; i < elem->weights[cnt].cnt; ++i)
1835         /* Encode the weight value.  We do nothing for IGNORE entries.  */
1836         if (elem->weights[cnt].w[i] != NULL)
1837           len += utf8_encode (&buf[len],
1838                               elem->weights[cnt].w[i]->mborder[cnt]);
1839
1840       /* And add the buffer content.  */
1841       obstack_1grow (pool, len);
1842       obstack_grow (pool, buf, len);
1843     }
1844
1845   return retval | ((elem->section->ruleidx & 0x7f) << 24);
1846 }
1847
1848
1849 static int32_t
1850 output_weightwc (struct obstack *pool, struct locale_collate_t *collate,
1851                  struct element_t *elem)
1852 {
1853   size_t cnt;
1854   int32_t retval;
1855
1856   /* Optimize the use of UNDEFINED.  */
1857   if (elem == &collate->undefined)
1858     /* The weights are already inserted.  */
1859     return 0;
1860
1861   /* This byte can start exactly one collation element and this is
1862      a single byte.  We can directly give the index to the weights.  */
1863   retval = obstack_object_size (pool) / sizeof (int32_t);
1864
1865   /* Construct the weight.  */
1866   for (cnt = 0; cnt < nrules; ++cnt)
1867     {
1868       int32_t buf[elem->weights[cnt].cnt];
1869       int i;
1870       int32_t j;
1871
1872       for (i = 0, j = 0; i < elem->weights[cnt].cnt; ++i)
1873         if (elem->weights[cnt].w[i] != NULL)
1874           buf[j++] = elem->weights[cnt].w[i]->wcorder;
1875
1876       /* And add the buffer content.  */
1877       obstack_int32_grow (pool, j);
1878
1879       obstack_grow (pool, buf, j * sizeof (int32_t));
1880     }
1881
1882   return retval | ((elem->section->ruleidx & 0x7f) << 24);
1883 }
1884
1885
1886 void
1887 collate_output (struct localedef_t *locale, struct charmap_t *charmap,
1888                 const char *output_path)
1889 {
1890   struct locale_collate_t *collate = locale->categories[LC_COLLATE].collate;
1891   const size_t nelems = _NL_ITEM_INDEX (_NL_NUM_LC_COLLATE);
1892   struct iovec iov[2 + nelems];
1893   struct locale_file data;
1894   uint32_t idx[nelems];
1895   size_t cnt;
1896   size_t ch;
1897   int32_t tablemb[256];
1898   struct obstack weightpool;
1899   struct obstack extrapool;
1900   struct obstack indirectpool;
1901   struct section_list *sect;
1902   struct collidx_table tablewc;
1903   uint32_t elem_size;
1904   uint32_t *elem_table;
1905   int i;
1906   struct element_t *runp;
1907
1908   data.magic = LIMAGIC (LC_COLLATE);
1909   data.n = nelems;
1910   iov[0].iov_base = (void *) &data;
1911   iov[0].iov_len = sizeof (data);
1912
1913   iov[1].iov_base = (void *) idx;
1914   iov[1].iov_len = sizeof (idx);
1915
1916   idx[0] = iov[0].iov_len + iov[1].iov_len;
1917   cnt = 0;
1918
1919   assert (cnt == _NL_ITEM_INDEX (_NL_COLLATE_NRULES));
1920   iov[2 + cnt].iov_base = &nrules;
1921   iov[2 + cnt].iov_len = sizeof (uint32_t);
1922   idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len;
1923   ++cnt;
1924
1925   /* If we have no LC_COLLATE data emit only the number of rules as zero.  */
1926   if (collate == NULL)
1927     {
1928       int32_t dummy = 0;
1929
1930       while (cnt < _NL_ITEM_INDEX (_NL_NUM_LC_COLLATE))
1931         {
1932           /* The words have to be handled specially.  */
1933           if (cnt == _NL_ITEM_INDEX (_NL_COLLATE_SYMB_HASH_SIZEMB))
1934             {
1935               iov[2 + cnt].iov_base = &dummy;
1936               iov[2 + cnt].iov_len = sizeof (int32_t);
1937             }
1938           else
1939             {
1940               iov[2 + cnt].iov_base = NULL;
1941               iov[2 + cnt].iov_len = 0;
1942             }
1943
1944           if (cnt + 1 < _NL_ITEM_INDEX (_NL_NUM_LC_COLLATE))
1945             idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len;
1946           ++cnt;
1947         }
1948
1949       assert (cnt == _NL_ITEM_INDEX (_NL_NUM_LC_COLLATE));
1950
1951       write_locale_data (output_path, "LC_COLLATE", 2 + cnt, iov);
1952
1953       return;
1954     }
1955
1956   obstack_init (&weightpool);
1957   obstack_init (&extrapool);
1958   obstack_init (&indirectpool);
1959
1960   /* Since we are using the sign of an integer to mark indirection the
1961      offsets in the arrays we are indirectly referring to must not be
1962      zero since -0 == 0.  Therefore we add a bit of dummy content.  */
1963   obstack_int32_grow (&extrapool, 0);
1964   obstack_int32_grow (&indirectpool, 0);
1965
1966   /* Prepare the ruleset table.  */
1967   for (sect = collate->sections, i = 0; sect != NULL; sect = sect->next)
1968     if (sect->rules != NULL && sect->ruleidx == i)
1969       {
1970         int j;
1971
1972         obstack_make_room (&weightpool, nrules);
1973
1974         for (j = 0; j < nrules; ++j)
1975           obstack_1grow_fast (&weightpool, sect->rules[j]);
1976         ++i;
1977       }
1978   /* And align the output.  */
1979   i = (nrules * i) % __alignof__ (int32_t);
1980   if (i > 0)
1981     do
1982       obstack_1grow (&weightpool, '\0');
1983     while (++i < __alignof__ (int32_t));
1984
1985   assert (cnt == _NL_ITEM_INDEX (_NL_COLLATE_RULESETS));
1986   iov[2 + cnt].iov_len = obstack_object_size (&weightpool);
1987   iov[2 + cnt].iov_base = obstack_finish (&weightpool);
1988   idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len;
1989   ++cnt;
1990
1991   /* Generate the 8-bit table.  Walk through the lists of sequences
1992      starting with the same byte and add them one after the other to
1993      the table.  In case we have more than one sequence starting with
1994      the same byte we have to use extra indirection.
1995
1996      First add a record for the NUL byte.  This entry will never be used
1997      so it does not matter.  */
1998   tablemb[0] = 0;
1999
2000   /* Now insert the `UNDEFINED' value if it is used.  Since this value
2001      will probably be used more than once it is good to store the
2002      weights only once.  */
2003   if (collate->undefined.used_in_level != 0)
2004     output_weight (&weightpool, collate, &collate->undefined);
2005
2006   for (ch = 1; ch < 256; ++ch)
2007     if (collate->mbheads[ch]->mbnext == NULL
2008         && collate->mbheads[ch]->nmbs <= 1)
2009       {
2010         tablemb[ch] = output_weight (&weightpool, collate,
2011                                      collate->mbheads[ch]);
2012       }
2013     else
2014       {
2015         /* The entries in the list are sorted by length and then
2016            alphabetically.  This is the order in which we will add the
2017            elements to the collation table.  This allows simply walking
2018            the table in sequence and stopping at the first matching
2019            entry.  Since the longer sequences are coming first in the
2020            list they have the possibility to match first, just as it
2021            has to be.  In the worst case we are walking to the end of
2022            the list where we put, if no singlebyte sequence is defined
2023            in the locale definition, the weights for UNDEFINED.
2024
2025            To reduce the length of the search list we compress them a bit.
2026            This happens by collecting sequences of consecutive byte
2027            sequences in one entry (having and begin and end byte sequence)
2028            and add only one index into the weight table.  We can find the
2029            consecutive entries since they are also consecutive in the list.  */
2030         struct element_t *runp = collate->mbheads[ch];
2031         struct element_t *lastp;
2032
2033         assert ((obstack_object_size (&extrapool)
2034                  & (__alignof__ (int32_t) - 1)) == 0);
2035
2036         tablemb[ch] = -obstack_object_size (&extrapool);
2037
2038         do
2039           {
2040             /* Store the current index in the weight table.  We know that
2041                the current position in the `extrapool' is aligned on a
2042                32-bit address.  */
2043             int32_t weightidx;
2044             int added;
2045
2046             /* Find out wether this is a single entry or we have more than
2047                one consecutive entry.  */
2048             if (runp->mbnext != NULL
2049                 && runp->nmbs == runp->mbnext->nmbs
2050                 && memcmp (runp->mbs, runp->mbnext->mbs, runp->nmbs - 1) == 0
2051                 && (runp->mbs[runp->nmbs - 1]
2052                     == runp->mbnext->mbs[runp->nmbs - 1] + 1))
2053               {
2054                 int i;
2055                 struct element_t *series_startp = runp;
2056                 struct element_t *curp;
2057
2058                 /* Compute how much space we will need.  */
2059                 added = ((sizeof (int32_t) + 1 + 2 * (runp->nmbs - 1)
2060                           + __alignof__ (int32_t) - 1)
2061                          & ~(__alignof__ (int32_t) - 1));
2062                 assert ((obstack_object_size (&extrapool)
2063                          & (__alignof__ (int32_t) - 1)) == 0);
2064                 obstack_make_room (&extrapool, added);
2065
2066                 /* More than one consecutive entry.  We mark this by having
2067                    a negative index into the indirect table.  */
2068                 obstack_int32_grow_fast (&extrapool,
2069                                          -(obstack_object_size (&indirectpool)
2070                                            / sizeof (int32_t)));
2071
2072                 /* Now search first the end of the series.  */
2073                 do
2074                   runp = runp->mbnext;
2075                 while (runp->mbnext != NULL
2076                        && runp->nmbs == runp->mbnext->nmbs
2077                        && memcmp (runp->mbs, runp->mbnext->mbs,
2078                                   runp->nmbs - 1) == 0
2079                        && (runp->mbs[runp->nmbs - 1]
2080                            == runp->mbnext->mbs[runp->nmbs - 1] + 1));
2081
2082                 /* Now walk backward from here to the beginning.  */
2083                 curp = runp;
2084
2085                 assert (runp->nmbs <= 256);
2086                 obstack_1grow_fast (&extrapool, curp->nmbs - 1);
2087                 for (i = 1; i < curp->nmbs; ++i)
2088                   obstack_1grow_fast (&extrapool, curp->mbs[i]);
2089
2090                 /* Now find the end of the consecutive sequence and
2091                    add all the indeces in the indirect pool.  */
2092                 do
2093                   {
2094                     weightidx = output_weight (&weightpool, collate, curp);
2095                     obstack_int32_grow (&indirectpool, weightidx);
2096
2097                     curp = curp->mblast;
2098                   }
2099                 while (curp != series_startp);
2100
2101                 /* Add the final weight.  */
2102                 weightidx = output_weight (&weightpool, collate, curp);
2103                 obstack_int32_grow (&indirectpool, weightidx);
2104
2105                 /* And add the end byte sequence.  Without length this
2106                    time.  */
2107                 for (i = 1; i < curp->nmbs; ++i)
2108                   obstack_1grow_fast (&extrapool, curp->mbs[i]);
2109               }
2110             else
2111               {
2112                 /* A single entry.  Simply add the index and the length and
2113                    string (except for the first character which is already
2114                    tested for).  */
2115                 int i;
2116
2117                 /* Output the weight info.  */
2118                 weightidx = output_weight (&weightpool, collate, runp);
2119
2120                 added = ((sizeof (int32_t) + 1 + runp->nmbs - 1
2121                           + __alignof__ (int32_t) - 1)
2122                          & ~(__alignof__ (int32_t) - 1));
2123                 assert ((obstack_object_size (&extrapool)
2124                          & (__alignof__ (int32_t) - 1)) == 0);
2125                 obstack_make_room (&extrapool, added);
2126
2127                 obstack_int32_grow_fast (&extrapool, weightidx);
2128                 assert (runp->nmbs <= 256);
2129                 obstack_1grow_fast (&extrapool, runp->nmbs - 1);
2130
2131                 for (i = 1; i < runp->nmbs; ++i)
2132                   obstack_1grow_fast (&extrapool, runp->mbs[i]);
2133               }
2134
2135             /* Add alignment bytes if necessary.  */
2136             while ((obstack_object_size (&extrapool)
2137                     & (__alignof__ (int32_t) - 1)) != 0)
2138               obstack_1grow_fast (&extrapool, '\0');
2139
2140             /* Next entry.  */
2141             lastp = runp;
2142             runp = runp->mbnext;
2143           }
2144         while (runp != NULL);
2145
2146         assert ((obstack_object_size (&extrapool)
2147                  & (__alignof__ (int32_t) - 1)) == 0);
2148
2149         /* If the final entry in the list is not a single character we
2150            add an UNDEFINED entry here.  */
2151         if (lastp->nmbs != 1)
2152           {
2153             int added = ((sizeof (int32_t) + 1 + 1 + __alignof__ (int32_t) - 1)
2154                          & ~(__alignof__ (int32_t) - 1));
2155             obstack_make_room (&extrapool, added);
2156
2157             obstack_int32_grow_fast (&extrapool, 0);
2158             /* XXX What rule? We just pick the first.  */
2159             obstack_1grow_fast (&extrapool, 0);
2160             /* Length is zero.  */
2161             obstack_1grow_fast (&extrapool, 0);
2162
2163             /* Add alignment bytes if necessary.  */
2164             while ((obstack_object_size (&extrapool)
2165                     & (__alignof__ (int32_t) - 1)) != 0)
2166               obstack_1grow_fast (&extrapool, '\0');
2167           }
2168       }
2169
2170   /* Add padding to the tables if necessary.  */
2171   while ((obstack_object_size (&weightpool) & (__alignof__ (int32_t) - 1))
2172          != 0)
2173     obstack_1grow (&weightpool, 0);
2174
2175   /* Now add the four tables.  */
2176   assert (cnt == _NL_ITEM_INDEX (_NL_COLLATE_TABLEMB));
2177   iov[2 + cnt].iov_base = tablemb;
2178   iov[2 + cnt].iov_len = sizeof (tablemb);
2179   idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len;
2180   assert ((iov[2 + cnt].iov_len & (__alignof__ (int32_t) - 1)) == 0);
2181   ++cnt;
2182
2183   assert (cnt == _NL_ITEM_INDEX (_NL_COLLATE_WEIGHTMB));
2184   iov[2 + cnt].iov_len = obstack_object_size (&weightpool);
2185   iov[2 + cnt].iov_base = obstack_finish (&weightpool);
2186   idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len;
2187   ++cnt;
2188
2189   assert (cnt == _NL_ITEM_INDEX (_NL_COLLATE_EXTRAMB));
2190   iov[2 + cnt].iov_len = obstack_object_size (&extrapool);
2191   iov[2 + cnt].iov_base = obstack_finish (&extrapool);
2192   idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len;
2193   ++cnt;
2194
2195   assert (cnt == _NL_ITEM_INDEX (_NL_COLLATE_INDIRECTMB));
2196   iov[2 + cnt].iov_len = obstack_object_size (&indirectpool);
2197   iov[2 + cnt].iov_base = obstack_finish (&indirectpool);
2198   idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len;
2199   assert ((iov[2 + cnt].iov_len & (__alignof__ (int32_t) - 1)) == 0);
2200   ++cnt;
2201
2202
2203   /* Now the same for the wide character table.  We need to store some
2204      more information here.  */
2205   assert (cnt == _NL_ITEM_INDEX (_NL_COLLATE_GAP1));
2206   iov[2 + cnt].iov_base = NULL;
2207   iov[2 + cnt].iov_len = 0;
2208   idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len;
2209   assert (idx[cnt] % __alignof__ (int32_t) == 0);
2210   ++cnt;
2211
2212   assert (cnt == _NL_ITEM_INDEX (_NL_COLLATE_GAP2));
2213   iov[2 + cnt].iov_base = NULL;
2214   iov[2 + cnt].iov_len = 0;
2215   idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len;
2216   assert (idx[cnt] % __alignof__ (int32_t) == 0);
2217   ++cnt;
2218
2219   assert (cnt == _NL_ITEM_INDEX (_NL_COLLATE_GAP3));
2220   iov[2 + cnt].iov_base = NULL;
2221   iov[2 + cnt].iov_len = 0;
2222   idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len;
2223   assert (idx[cnt] % __alignof__ (int32_t) == 0);
2224   ++cnt;
2225
2226   /* Since we are using the sign of an integer to mark indirection the
2227      offsets in the arrays we are indirectly referring to must not be
2228      zero since -0 == 0.  Therefore we add a bit of dummy content.  */
2229   obstack_int32_grow (&extrapool, 0);
2230   obstack_int32_grow (&indirectpool, 0);
2231
2232   /* Now insert the `UNDEFINED' value if it is used.  Since this value
2233      will probably be used more than once it is good to store the
2234      weights only once.  */
2235   if (output_weightwc (&weightpool, collate, &collate->undefined) != 0)
2236     abort ();
2237
2238   /* Generate the table.  Walk through the lists of sequences starting
2239      with the same wide character and add them one after the other to
2240      the table.  In case we have more than one sequence starting with
2241      the same byte we have to use extra indirection.  */
2242   {
2243     auto void add_to_tablewc (uint32_t ch, struct element_t *runp);
2244
2245     void add_to_tablewc (uint32_t ch, struct element_t *runp)
2246       {
2247         if (runp->wcnext == NULL && runp->nwcs == 1)
2248           {
2249             int32_t weigthidx = output_weightwc (&weightpool, collate, runp);
2250             collidx_table_add (&tablewc, ch, weigthidx);
2251           }
2252         else
2253           {
2254             /* As for the singlebyte table, we recognize sequences and
2255                compress them.  */
2256             struct element_t *lastp;
2257
2258             collidx_table_add (&tablewc, ch,
2259                                -(obstack_object_size (&extrapool) / sizeof (uint32_t)));
2260
2261             do
2262               {
2263                 /* Store the current index in the weight table.  We know that
2264                    the current position in the `extrapool' is aligned on a
2265                    32-bit address.  */
2266                 int32_t weightidx;
2267                 int added;
2268
2269                 /* Find out wether this is a single entry or we have more than
2270                    one consecutive entry.  */
2271                 if (runp->wcnext != NULL
2272                     && runp->nwcs == runp->wcnext->nwcs
2273                     && wmemcmp ((wchar_t *) runp->wcs,
2274                                 (wchar_t *)runp->wcnext->wcs,
2275                                 runp->nwcs - 1) == 0
2276                     && (runp->wcs[runp->nwcs - 1]
2277                         == runp->wcnext->wcs[runp->nwcs - 1] + 1))
2278                   {
2279                     int i;
2280                     struct element_t *series_startp = runp;
2281                     struct element_t *curp;
2282
2283                     /* Now add first the initial byte sequence.  */
2284                     added = (1 + 1 + 2 * (runp->nwcs - 1)) * sizeof (int32_t);
2285                     if (sizeof (int32_t) == sizeof (int))
2286                       obstack_make_room (&extrapool, added);
2287
2288                     /* More than one consecutive entry.  We mark this by having
2289                        a negative index into the indirect table.  */
2290                     obstack_int32_grow_fast (&extrapool,
2291                                              -(obstack_object_size (&indirectpool)
2292                                                / sizeof (int32_t)));
2293                     obstack_int32_grow_fast (&extrapool, runp->nwcs - 1);
2294
2295                     do
2296                       runp = runp->wcnext;
2297                     while (runp->wcnext != NULL
2298                            && runp->nwcs == runp->wcnext->nwcs
2299                            && wmemcmp ((wchar_t *) runp->wcs,
2300                                        (wchar_t *)runp->wcnext->wcs,
2301                                        runp->nwcs - 1) == 0
2302                            && (runp->wcs[runp->nwcs - 1]
2303                                == runp->wcnext->wcs[runp->nwcs - 1] + 1));
2304
2305                     /* Now walk backward from here to the beginning.  */
2306                     curp = runp;
2307
2308                     for (i = 1; i < runp->nwcs; ++i)
2309                       obstack_int32_grow_fast (&extrapool, curp->wcs[i]);
2310
2311                     /* Now find the end of the consecutive sequence and
2312                        add all the indeces in the indirect pool.  */
2313                     do
2314                       {
2315                         weightidx = output_weightwc (&weightpool, collate,
2316                                                      curp);
2317                         obstack_int32_grow (&indirectpool, weightidx);
2318
2319                         curp = curp->wclast;
2320                       }
2321                     while (curp != series_startp);
2322
2323                     /* Add the final weight.  */
2324                     weightidx = output_weightwc (&weightpool, collate, curp);
2325                     obstack_int32_grow (&indirectpool, weightidx);
2326
2327                     /* And add the end byte sequence.  Without length this
2328                        time.  */
2329                     for (i = 1; i < curp->nwcs; ++i)
2330                       obstack_int32_grow (&extrapool, curp->wcs[i]);
2331                   }
2332                 else
2333                   {
2334                     /* A single entry.  Simply add the index and the length and
2335                        string (except for the first character which is already
2336                        tested for).  */
2337                     int i;
2338
2339                     /* Output the weight info.  */
2340                     weightidx = output_weightwc (&weightpool, collate, runp);
2341
2342                     added = (1 + 1 + runp->nwcs - 1) * sizeof (int32_t);
2343                     if (sizeof (int) == sizeof (int32_t))
2344                       obstack_make_room (&extrapool, added);
2345
2346                     obstack_int32_grow_fast (&extrapool, weightidx);
2347                     obstack_int32_grow_fast (&extrapool, runp->nwcs - 1);
2348                     for (i = 1; i < runp->nwcs; ++i)
2349                       obstack_int32_grow_fast (&extrapool, runp->wcs[i]);
2350                   }
2351
2352                 /* Next entry.  */
2353                 lastp = runp;
2354                 runp = runp->wcnext;
2355               }
2356             while (runp != NULL);
2357           }
2358       }
2359
2360     tablewc.p = 6;
2361     tablewc.q = 10;
2362     collidx_table_init (&tablewc);
2363
2364     wchead_table_iterate (&collate->wcheads, add_to_tablewc);
2365
2366     collidx_table_finalize (&tablewc);
2367   }
2368
2369   /* Now add the four tables.  */
2370   assert (cnt == _NL_ITEM_INDEX (_NL_COLLATE_TABLEWC));
2371   iov[2 + cnt].iov_base = tablewc.result;
2372   iov[2 + cnt].iov_len = tablewc.result_size;
2373   idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len;
2374   assert (iov[2 + cnt].iov_len % sizeof (int32_t) == 0);
2375   assert (idx[cnt] % __alignof__ (int32_t) == 0);
2376   ++cnt;
2377
2378   assert (cnt == _NL_ITEM_INDEX (_NL_COLLATE_WEIGHTWC));
2379   iov[2 + cnt].iov_len = obstack_object_size (&weightpool);
2380   iov[2 + cnt].iov_base = obstack_finish (&weightpool);
2381   idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len;
2382   assert (iov[2 + cnt].iov_len % sizeof (int32_t) == 0);
2383   assert (idx[cnt] % __alignof__ (int32_t) == 0);
2384   ++cnt;
2385
2386   assert (cnt == _NL_ITEM_INDEX (_NL_COLLATE_EXTRAWC));
2387   iov[2 + cnt].iov_len = obstack_object_size (&extrapool);
2388   iov[2 + cnt].iov_base = obstack_finish (&extrapool);
2389   idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len;
2390   assert (iov[2 + cnt].iov_len % sizeof (int32_t) == 0);
2391   assert (idx[cnt] % __alignof__ (int32_t) == 0);
2392   ++cnt;
2393
2394   assert (cnt == _NL_ITEM_INDEX (_NL_COLLATE_INDIRECTWC));
2395   iov[2 + cnt].iov_len = obstack_object_size (&indirectpool);
2396   iov[2 + cnt].iov_base = obstack_finish (&indirectpool);
2397   idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len;
2398   assert (iov[2 + cnt].iov_len % sizeof (int32_t) == 0);
2399   assert (idx[cnt] % __alignof__ (int32_t) == 0);
2400   ++cnt;
2401
2402
2403   /* Finally write the table with collation element names out.  It is
2404      a hash table with a simple function which gets the name of the
2405      character as the input.  One character might have many names.  The
2406      value associated with the name is an index into the weight table
2407      where we are then interested in the first-level weight value.
2408
2409      To determine how large the table should be we are counting the
2410      elements have to put in.  Since we are using internal chaining
2411      using a secondary hash function we have to make the table a bit
2412      larger to avoid extremely long search times.  We can achieve
2413      good results with a 40% larger table than there are entries.  */
2414   elem_size = 0;
2415   runp = collate->start;
2416   while (runp != NULL)
2417     {
2418       if (runp->mbs != NULL && runp->weights != NULL)
2419         /* Yep, the element really counts.  */
2420         ++elem_size;
2421
2422       runp = runp->next;
2423     }
2424   /* Add 40% and find the next prime number.  */
2425   elem_size = MIN (next_prime (elem_size * 1.4), 257);
2426
2427   /* Allocate the table.  Each entry consists of two words: the hash
2428      value and an index in a secondary table which provides the index
2429      into the weight table and the string itself (so that a match can
2430      be determined).  */
2431   elem_table = (uint32_t *) obstack_alloc (&extrapool,
2432                                            elem_size * 2 * sizeof (uint32_t));
2433   memset (elem_table, '\0', elem_size * 2 * sizeof (uint32_t));
2434
2435   /* Now add the elements.  */
2436   runp = collate->start;
2437   while (runp != NULL)
2438     {
2439       if (runp->mbs != NULL && runp->weights != NULL)
2440         {
2441           /* Compute the hash value of the name.  */
2442           uint32_t namelen = strlen (runp->name);
2443           uint32_t hash = elem_hash (runp->name, namelen);
2444           size_t idx = hash % elem_size;
2445
2446           if (elem_table[idx * 2] != 0)
2447             {
2448               /* The spot is already take.  Try iterating using the value
2449                  from the secondary hashing function.  */
2450               size_t iter = hash % (elem_size - 2);
2451
2452               do
2453                 {
2454                   idx += iter;
2455                   if (idx >= elem_size)
2456                     idx -= elem_size;
2457                 }
2458               while (elem_table[idx * 2] != 0);
2459
2460               /* This is the spot where we will insert the value.  */
2461               elem_table[idx * 2] = hash;
2462               elem_table[idx * 2 + 1] = obstack_object_size (&extrapool);
2463
2464               /* The the string itself including length.  */
2465               obstack_1grow (&extrapool, namelen);
2466               obstack_grow (&extrapool, runp->name, namelen);
2467
2468               /* And the multibyte representation.  */
2469               obstack_1grow (&extrapool, runp->nmbs);
2470               obstack_grow (&extrapool, runp->mbs, runp->nmbs);
2471
2472               /* And align again to 32 bits.  */
2473               if ((1 + namelen + 1 + runp->nmbs) % sizeof (int32_t) != 0)
2474                 obstack_grow (&extrapool, "\0\0",
2475                               (sizeof (int32_t)
2476                                - ((1 + namelen + 1 + runp->nmbs)
2477                                   % sizeof (int32_t))));
2478
2479               /* Now some 32-bit values: multibyte collation sequence,
2480                  wide char string (including length), and wide char
2481                  collation sequence.  */
2482               obstack_int32_grow (&extrapool, runp->mbseqorder);
2483
2484               obstack_int32_grow (&extrapool, runp->nwcs);
2485               obstack_grow (&extrapool, runp->wcs,
2486                             runp->nwcs * sizeof (uint32_t));
2487
2488               obstack_int32_grow (&extrapool, runp->wcseqorder);
2489             }
2490         }
2491
2492       runp = runp->next;
2493     }
2494
2495   /* Prepare to write out this data.  */
2496   assert (cnt == _NL_ITEM_INDEX (_NL_COLLATE_SYMB_HASH_SIZEMB));
2497   iov[2 + cnt].iov_base = &elem_size;
2498   iov[2 + cnt].iov_len = sizeof (int32_t);
2499   idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len;
2500   assert (idx[cnt] % __alignof__ (int32_t) == 0);
2501   ++cnt;
2502
2503   assert (cnt == _NL_ITEM_INDEX (_NL_COLLATE_SYMB_TABLEMB));
2504   iov[2 + cnt].iov_base = elem_table;
2505   iov[2 + cnt].iov_len = elem_size * 2 * sizeof (int32_t);
2506   idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len;
2507   assert (idx[cnt] % __alignof__ (int32_t) == 0);
2508   ++cnt;
2509
2510   assert (cnt == _NL_ITEM_INDEX (_NL_COLLATE_SYMB_EXTRAMB));
2511   iov[2 + cnt].iov_len = obstack_object_size (&extrapool);
2512   iov[2 + cnt].iov_base = obstack_finish (&extrapool);
2513   idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len;
2514   ++cnt;
2515
2516   assert (cnt == _NL_ITEM_INDEX (_NL_COLLATE_COLLSEQMB));
2517   iov[2 + cnt].iov_base = collate->mbseqorder;
2518   iov[2 + cnt].iov_len = 256;
2519   idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len;
2520   ++cnt;
2521
2522   assert (cnt == _NL_ITEM_INDEX (_NL_COLLATE_COLLSEQWC));
2523   iov[2 + cnt].iov_base = collate->wcseqorder.result;
2524   iov[2 + cnt].iov_len = collate->wcseqorder.result_size;
2525   idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len;
2526   assert (idx[cnt] % __alignof__ (int32_t) == 0);
2527   ++cnt;
2528
2529   assert (cnt == _NL_ITEM_INDEX (_NL_COLLATE_CODESET));
2530   iov[2 + cnt].iov_base = (void *) charmap->code_set_name;
2531   iov[2 + cnt].iov_len = strlen (iov[2 + cnt].iov_base) + 1;
2532   ++cnt;
2533
2534   assert (cnt == _NL_ITEM_INDEX (_NL_NUM_LC_COLLATE));
2535
2536   write_locale_data (output_path, "LC_COLLATE", 2 + cnt, iov);
2537
2538   obstack_free (&weightpool, NULL);
2539   obstack_free (&extrapool, NULL);
2540   obstack_free (&indirectpool, NULL);
2541 }
2542
2543
2544 void
2545 collate_read (struct linereader *ldfile, struct localedef_t *result,
2546               struct charmap_t *charmap, const char *repertoire_name,
2547               int ignore_content)
2548 {
2549   struct repertoire_t *repertoire = NULL;
2550   struct locale_collate_t *collate;
2551   struct token *now;
2552   struct token *arg = NULL;
2553   enum token_t nowtok;
2554   enum token_t was_ellipsis = tok_none;
2555   struct localedef_t *copy_locale = NULL;
2556   /* Parsing state:
2557      0 - start
2558      1 - between `order-start' and `order-end'
2559      2 - after `order-end'
2560      3 - after `reorder-after', waiting for `reorder-end'
2561      4 - after `reorder-end'
2562      5 - after `reorder-sections-after', waiting for `reorder-sections-end'
2563      6 - after `reorder-sections-end'
2564   */
2565   int state = 0;
2566
2567   /* Get the repertoire we have to use.  */
2568   if (repertoire_name != NULL)
2569     repertoire = repertoire_read (repertoire_name);
2570
2571   /* The rest of the line containing `LC_COLLATE' must be free.  */
2572   lr_ignore_rest (ldfile, 1);
2573
2574   do
2575     {
2576       now = lr_token (ldfile, charmap, NULL, verbose);
2577       nowtok = now->tok;
2578     }
2579   while (nowtok == tok_eol);
2580
2581   if (nowtok == tok_copy)
2582     {
2583       state = 2;
2584       now = lr_token (ldfile, charmap, NULL, verbose);
2585       if (now->tok != tok_string)
2586         {
2587           SYNTAX_ERROR (_("%s: syntax error"), "LC_COLLATE");
2588
2589         skip_category:
2590           do
2591             now = lr_token (ldfile, charmap, NULL, verbose);
2592           while (now->tok != tok_eof && now->tok != tok_end);
2593
2594           if (now->tok != tok_eof
2595               || (now = lr_token (ldfile, charmap, NULL, verbose),
2596                   now->tok == tok_eof))
2597             lr_error (ldfile, _("%s: premature end of file"), "LC_COLLATE");
2598           else if (now->tok != tok_lc_collate)
2599             {
2600               lr_error (ldfile, _("\
2601 %1$s: definition does not end with `END %1$s'"), "LC_COLLATE");
2602               lr_ignore_rest (ldfile, 0);
2603             }
2604           else
2605             lr_ignore_rest (ldfile, 1);
2606
2607           return;
2608         }
2609
2610       if (! ignore_content)
2611         {
2612           /* Get the locale definition.  */
2613           copy_locale = load_locale (LC_COLLATE, now->val.str.startmb,
2614                                      repertoire_name, charmap, NULL);
2615           if ((copy_locale->avail & COLLATE_LOCALE) == 0)
2616             {
2617               /* Not yet loaded.  So do it now.  */
2618               if (locfile_read (copy_locale, charmap) != 0)
2619                 goto skip_category;
2620             }
2621         }
2622
2623       lr_ignore_rest (ldfile, 1);
2624
2625       now = lr_token (ldfile, charmap, NULL, verbose);
2626       nowtok = now->tok;
2627     }
2628
2629   /* Prepare the data structures.  */
2630   collate_startup (ldfile, result, copy_locale, ignore_content);
2631   collate = result->categories[LC_COLLATE].collate;
2632
2633   while (1)
2634     {
2635       char ucs4buf[10];
2636       char *symstr;
2637       size_t symlen;
2638
2639       /* Of course we don't proceed beyond the end of file.  */
2640       if (nowtok == tok_eof)
2641         break;
2642
2643       /* Ingore empty lines.  */
2644       if (nowtok == tok_eol)
2645         {
2646           now = lr_token (ldfile, charmap, NULL, verbose);
2647           nowtok = now->tok;
2648           continue;
2649         }
2650
2651       switch (nowtok)
2652         {
2653         case tok_copy:
2654           /* Allow copying other locales.  */
2655           now = lr_token (ldfile, charmap, NULL, verbose);
2656           if (now->tok != tok_string)
2657             goto err_label;
2658
2659           if (! ignore_content)
2660             load_locale (LC_COLLATE, now->val.str.startmb, repertoire_name,
2661                          charmap, result);
2662
2663           lr_ignore_rest (ldfile, 1);
2664           break;
2665
2666         case tok_coll_weight_max:
2667           /* Ignore the rest of the line if we don't need the input of
2668              this line.  */
2669           if (ignore_content)
2670             {
2671               lr_ignore_rest (ldfile, 0);
2672               break;
2673             }
2674
2675           if (state != 0)
2676             goto err_label;
2677
2678           arg = lr_token (ldfile, charmap, NULL, verbose);
2679           if (arg->tok != tok_number)
2680             goto err_label;
2681           if (collate->col_weight_max != -1)
2682             lr_error (ldfile, _("%s: duplicate definition of `%s'"),
2683                       "LC_COLLATE", "col_weight_max");
2684           else
2685             collate->col_weight_max = arg->val.num;
2686           lr_ignore_rest (ldfile, 1);
2687           break;
2688
2689         case tok_section_symbol:
2690           /* Ignore the rest of the line if we don't need the input of
2691              this line.  */
2692           if (ignore_content)
2693             {
2694               lr_ignore_rest (ldfile, 0);
2695               break;
2696             }
2697
2698           if (state != 0)
2699             goto err_label;
2700
2701           arg = lr_token (ldfile, charmap, repertoire, verbose);
2702           if (arg->tok != tok_bsymbol)
2703             goto err_label;
2704           else if (!ignore_content)
2705             {
2706               /* Check whether this section is already known.  */
2707               struct section_list *known = collate->sections;
2708               while (known != NULL)
2709                 {
2710                   if (strcmp (known->name, arg->val.str.startmb) == 0)
2711                     break;
2712                   known = known->next;
2713                 }
2714
2715               if (known != NULL)
2716                 {
2717                   lr_error (ldfile,
2718                             _("%s: duplicate declaration of section `%s'"),
2719                             "LC_COLLATE", arg->val.str.startmb);
2720                   free (arg->val.str.startmb);
2721                 }
2722               else
2723                 collate->sections = make_seclist_elem (collate,
2724                                                        arg->val.str.startmb,
2725                                                        collate->sections);
2726
2727               lr_ignore_rest (ldfile, known == NULL);
2728             }
2729           else
2730             {
2731               free (arg->val.str.startmb);
2732               lr_ignore_rest (ldfile, 0);
2733             }
2734           break;
2735
2736         case tok_collating_element:
2737           /* Ignore the rest of the line if we don't need the input of
2738              this line.  */
2739           if (ignore_content)
2740             {
2741               lr_ignore_rest (ldfile, 0);
2742               break;
2743             }
2744
2745           if (state != 0 && state != 2)
2746             goto err_label;
2747
2748           arg = lr_token (ldfile, charmap, repertoire, verbose);
2749           if (arg->tok != tok_bsymbol)
2750             goto err_label;
2751           else
2752             {
2753               const char *symbol = arg->val.str.startmb;
2754               size_t symbol_len = arg->val.str.lenmb;
2755
2756               /* Next the `from' keyword.  */
2757               arg = lr_token (ldfile, charmap, repertoire, verbose);
2758               if (arg->tok != tok_from)
2759                 {
2760                   free ((char *) symbol);
2761                   goto err_label;
2762                 }
2763
2764               ldfile->return_widestr = 1;
2765               ldfile->translate_strings = 1;
2766
2767               /* Finally the string with the replacement.  */
2768               arg = lr_token (ldfile, charmap, repertoire, verbose);
2769
2770               ldfile->return_widestr = 0;
2771               ldfile->translate_strings = 0;
2772
2773               if (arg->tok != tok_string)
2774                 goto err_label;
2775
2776               if (!ignore_content && symbol != NULL)
2777                 {
2778                   /* The name is already defined.  */
2779                   if (check_duplicate (ldfile, collate, charmap,
2780                                        repertoire, symbol, symbol_len))
2781                     goto col_elem_free;
2782
2783                   if (arg->val.str.startmb != NULL)
2784                     insert_entry (&collate->elem_table, symbol, symbol_len,
2785                                   new_element (collate,
2786                                                arg->val.str.startmb,
2787                                                arg->val.str.lenmb - 1,
2788                                                arg->val.str.startwc,
2789                                                symbol, symbol_len, 0));
2790                 }
2791               else
2792                 {
2793                 col_elem_free:
2794                   if (symbol != NULL)
2795                     free ((char *) symbol);
2796                   if (arg->val.str.startmb != NULL)
2797                     free (arg->val.str.startmb);
2798                   if (arg->val.str.startwc != NULL)
2799                     free (arg->val.str.startwc);
2800                 }
2801               lr_ignore_rest (ldfile, 1);
2802             }
2803           break;
2804
2805         case tok_collating_symbol:
2806           /* Ignore the rest of the line if we don't need the input of
2807              this line.  */
2808           if (ignore_content)
2809             {
2810               lr_ignore_rest (ldfile, 0);
2811               break;
2812             }
2813
2814           if (state != 0 && state != 2)
2815             goto err_label;
2816
2817           arg = lr_token (ldfile, charmap, repertoire, verbose);
2818           if (arg->tok != tok_bsymbol)
2819             goto err_label;
2820           else
2821             {
2822               char *symbol = arg->val.str.startmb;
2823               size_t symbol_len = arg->val.str.lenmb;
2824               char *endsymbol = NULL;
2825               size_t endsymbol_len = 0;
2826               enum token_t ellipsis = tok_none;
2827
2828               arg = lr_token (ldfile, charmap, repertoire, verbose);
2829               if (arg->tok == tok_ellipsis2 || arg->tok == tok_ellipsis4)
2830                 {
2831                   ellipsis = arg->tok;
2832
2833                   arg = lr_token (ldfile, charmap, repertoire, verbose);
2834                   if (arg->tok != tok_bsymbol)
2835                     {
2836                       free (symbol);
2837                       goto err_label;
2838                     }
2839
2840                   endsymbol = arg->val.str.startmb;
2841                   endsymbol_len = arg->val.str.lenmb;
2842
2843                   lr_ignore_rest (ldfile, 1);
2844                 }
2845               else if (arg->tok != tok_eol)
2846                 {
2847                   free (symbol);
2848                   goto err_label;
2849                 }
2850
2851               if (!ignore_content)
2852                 {
2853                   if (symbol == NULL
2854                       || (ellipsis != tok_none && endsymbol == NULL))
2855                     {
2856                       lr_error (ldfile, _("\
2857 %s: unknown character in collating symbol name"),
2858                                 "LC_COLLATE");
2859                       goto col_sym_free;
2860                     }
2861                   else if (ellipsis == tok_none)
2862                     {
2863                       /* A single symbol, no ellipsis.  */
2864                       if (check_duplicate (ldfile, collate, charmap,
2865                                            repertoire, symbol, symbol_len))
2866                         /* The name is already defined.  */
2867                         goto col_sym_free;
2868
2869                       insert_entry (&collate->sym_table, symbol, symbol_len,
2870                                     new_symbol (collate, symbol, symbol_len));
2871                     }
2872                   else if (symbol_len != endsymbol_len)
2873                     {
2874                     col_sym_inv_range:
2875                       lr_error (ldfile,
2876                                 _("invalid names for character range"));
2877                       goto col_sym_free;
2878                     }
2879                   else
2880                     {
2881                       /* Oh my, we have to handle an ellipsis.  First, as
2882                          usual, determine the common prefix and then
2883                          convert the rest into a range.  */
2884                       size_t prefixlen;
2885                       unsigned long int from;
2886                       unsigned long int to;
2887                       char *endp;
2888
2889                       for (prefixlen = 0; prefixlen < symbol_len; ++prefixlen)
2890                         if (symbol[prefixlen] != endsymbol[prefixlen])
2891                           break;
2892
2893                       /* Convert the rest into numbers.  */
2894                       symbol[symbol_len] = '\0';
2895                       from = strtoul (&symbol[prefixlen], &endp,
2896                                       ellipsis == tok_ellipsis2 ? 16 : 10);
2897                       if (*endp != '\0')
2898                         goto col_sym_inv_range;
2899
2900                       endsymbol[symbol_len] = '\0';
2901                       to = strtoul (&endsymbol[prefixlen], &endp,
2902                                     ellipsis == tok_ellipsis2 ? 16 : 10);
2903                       if (*endp != '\0')
2904                         goto col_sym_inv_range;
2905
2906                       if (from > to)
2907                         goto col_sym_inv_range;
2908
2909                       /* Now loop over all entries.  */
2910                       while (from <= to)
2911                         {
2912                           char *symbuf;
2913
2914                           symbuf = (char *) obstack_alloc (&collate->mempool,
2915                                                            symbol_len + 1);
2916
2917                           /* Create the name.  */
2918                           sprintf (symbuf,
2919                                    ellipsis == tok_ellipsis2
2920                                    ? "%.*s%.*lX" : "%.*s%.*lu",
2921                                    (int) prefixlen, symbol,
2922                                    (int) (symbol_len - prefixlen), from);
2923
2924                           if (check_duplicate (ldfile, collate, charmap,
2925                                                repertoire, symbuf, symbol_len))
2926                             /* The name is already defined.  */
2927                             goto col_sym_free;
2928
2929                           insert_entry (&collate->sym_table, symbuf,
2930                                         symbol_len,
2931                                         new_symbol (collate, symbuf,
2932                                                     symbol_len));
2933
2934                           /* Increment the counter.  */
2935                           ++from;
2936                         }
2937
2938                       goto col_sym_free;
2939                     }
2940                 }
2941               else
2942                 {
2943                 col_sym_free:
2944                   if (symbol != NULL)
2945                     free (symbol);
2946                   if (endsymbol != NULL)
2947                     free (endsymbol);
2948                 }
2949             }
2950           break;
2951
2952         case tok_symbol_equivalence:
2953           /* Ignore the rest of the line if we don't need the input of
2954              this line.  */
2955           if (ignore_content)
2956             {
2957               lr_ignore_rest (ldfile, 0);
2958               break;
2959             }
2960
2961           if (state != 0)
2962             goto err_label;
2963
2964           arg = lr_token (ldfile, charmap, repertoire, verbose);
2965           if (arg->tok != tok_bsymbol)
2966             goto err_label;
2967           else
2968             {
2969               const char *newname = arg->val.str.startmb;
2970               size_t newname_len = arg->val.str.lenmb;
2971               const char *symname;
2972               size_t symname_len;
2973               struct symbol_t *symval;
2974
2975               arg = lr_token (ldfile, charmap, repertoire, verbose);
2976               if (arg->tok != tok_bsymbol)
2977                 {
2978                   if (newname != NULL)
2979                     free ((char *) newname);
2980                   goto err_label;
2981                 }
2982
2983               symname = arg->val.str.startmb;
2984               symname_len = arg->val.str.lenmb;
2985
2986               if (newname == NULL)
2987                 {
2988                   lr_error (ldfile, _("\
2989 %s: unknown character in equivalent definition name"),
2990                             "LC_COLLATE");
2991
2992                 sym_equiv_free:
2993                   if (newname != NULL)
2994                     free ((char *) newname);
2995                   if (symname != NULL)
2996                     free ((char *) symname);
2997                   break;
2998                 }
2999               if (symname == NULL)
3000                 {
3001                   lr_error (ldfile, _("\
3002 %s: unknown character in equivalent definition value"),
3003                             "LC_COLLATE");
3004                   goto sym_equiv_free;
3005                 }
3006
3007               /* See whether the symbol name is already defined.  */
3008               if (find_entry (&collate->sym_table, symname, symname_len,
3009                               (void **) &symval) != 0)
3010                 {
3011                   lr_error (ldfile, _("\
3012 %s: unknown symbol `%s' in equivalent definition"),
3013                             "LC_COLLATE", symname);
3014                   goto col_sym_free;
3015                 }
3016
3017               if (insert_entry (&collate->sym_table,
3018                                 newname, newname_len, symval) < 0)
3019                 {
3020                   lr_error (ldfile, _("\
3021 error while adding equivalent collating symbol"));
3022                   goto sym_equiv_free;
3023                 }
3024
3025               free ((char *) symname);
3026             }
3027           lr_ignore_rest (ldfile, 1);
3028           break;
3029
3030         case tok_script:
3031           /* We get told about the scripts we know.  */
3032           arg = lr_token (ldfile, charmap, repertoire, verbose);
3033           if (arg->tok != tok_bsymbol)
3034             goto err_label;
3035           else
3036             {
3037               struct section_list *runp = collate->known_sections;
3038               char *name;
3039
3040               while (runp != NULL)
3041                 if (strncmp (runp->name, arg->val.str.startmb,
3042                              arg->val.str.lenmb) == 0
3043                     && runp->name[arg->val.str.lenmb] == '\0')
3044                   break;
3045                 else
3046                   runp = runp->def_next;
3047
3048               if (runp != NULL)
3049                 {
3050                   lr_error (ldfile, _("duplicate definition of script `%s'"),
3051                             runp->name);
3052                   lr_ignore_rest (ldfile, 0);
3053                   break;
3054                 }
3055
3056               runp = (struct section_list *) xcalloc (1, sizeof (*runp));
3057               name = (char *) xmalloc (arg->val.str.lenmb + 1);
3058               memcpy (name, arg->val.str.startmb, arg->val.str.lenmb);
3059               name[arg->val.str.lenmb] = '\0';
3060               runp->name = name;
3061
3062               runp->def_next = collate->known_sections;
3063               collate->known_sections = runp;
3064             }
3065           lr_ignore_rest (ldfile, 1);
3066           break;
3067
3068         case tok_order_start:
3069           /* Ignore the rest of the line if we don't need the input of
3070              this line.  */
3071           if (ignore_content)
3072             {
3073               lr_ignore_rest (ldfile, 0);
3074               break;
3075             }
3076
3077           if (state != 0 && state != 1)
3078             goto err_label;
3079           state = 1;
3080
3081           /* The 14652 draft does not specify whether all `order_start' lines
3082              must contain the same number of sort-rules, but 14651 does.  So
3083              we require this here as well.  */
3084           arg = lr_token (ldfile, charmap, repertoire, verbose);
3085           if (arg->tok == tok_bsymbol)
3086             {
3087               /* This better should be a section name.  */
3088               struct section_list *sp = collate->known_sections;
3089               while (sp != NULL
3090                      && (sp->name == NULL
3091                          || strncmp (sp->name, arg->val.str.startmb,
3092                                      arg->val.str.lenmb) != 0
3093                          || sp->name[arg->val.str.lenmb] != '\0'))
3094                 sp = sp->def_next;
3095
3096               if (sp == NULL)
3097                 {
3098                   lr_error (ldfile, _("\
3099 %s: unknown section name `%s'"),
3100                             "LC_COLLATE", arg->val.str.startmb);
3101                   /* We use the error section.  */
3102                   collate->current_section = &collate->error_section;
3103
3104                   if (collate->error_section.first == NULL)
3105                     {
3106                       /* Insert &collate->error_section at the end of
3107                          the collate->sections list.  */
3108                       if (collate->sections == NULL)
3109                         collate->sections = &collate->error_section;
3110                       else
3111                         {
3112                           sp = collate->sections;
3113                           while (sp->next != NULL)
3114                             sp = sp->next;
3115
3116                           sp->next = &collate->error_section;
3117                         }
3118                       collate->error_section.next = NULL;
3119                     }
3120                 }
3121               else
3122                 {
3123                   /* One should not be allowed to open the same
3124                      section twice.  */
3125                   if (sp->first != NULL)
3126                     lr_error (ldfile, _("\
3127 %s: multiple order definitions for section `%s'"),
3128                               "LC_COLLATE", sp->name);
3129                   else
3130                     {
3131                       /* Insert sp in the collate->sections list,
3132                          right after collate->current_section.  */
3133                       if (collate->current_section == NULL)
3134                         collate->current_section = sp;
3135                       else
3136                         {
3137                           sp->next = collate->current_section->next;
3138                           collate->current_section->next = sp;
3139                         }
3140                     }
3141
3142                   /* Next should come the end of the line or a semicolon.  */
3143                   arg = lr_token (ldfile, charmap, repertoire, verbose);
3144                   if (arg->tok == tok_eol)
3145                     {
3146                       uint32_t cnt;
3147
3148                       /* This means we have exactly one rule: `forward'.  */
3149                       if (nrules > 1)
3150                         lr_error (ldfile, _("\
3151 %s: invalid number of sorting rules"),
3152                                   "LC_COLLATE");
3153                       else
3154                         nrules = 1;
3155                       sp->rules = obstack_alloc (&collate->mempool,
3156                                                  (sizeof (enum coll_sort_rule)
3157                                                   * nrules));
3158                       for (cnt = 0; cnt < nrules; ++cnt)
3159                         sp->rules[cnt] = sort_forward;
3160
3161                       /* Next line.  */
3162                       break;
3163                     }
3164
3165                   /* Get the next token.  */
3166                   arg = lr_token (ldfile, charmap, repertoire, verbose);
3167                 }
3168             }
3169           else
3170             {
3171               /* There is no section symbol.  Therefore we use the unnamed
3172                  section.  */
3173               collate->current_section = &collate->unnamed_section;
3174
3175               if (collate->unnamed_section.first != NULL)
3176                 lr_error (ldfile, _("\
3177 %s: multiple order definitions for unnamed section"),
3178                           "LC_COLLATE");
3179               else
3180                 {
3181                   /* Insert &collate->unnamed_section at the beginning of
3182                      the collate->sections list.  */
3183                   collate->unnamed_section.next = collate->sections;
3184                   collate->sections = &collate->unnamed_section;
3185                 }
3186             }
3187
3188           /* Now read the direction names.  */
3189           read_directions (ldfile, arg, charmap, repertoire, collate);
3190
3191           /* From now we need the strings untranslated.  */
3192           ldfile->translate_strings = 0;
3193           break;
3194
3195         case tok_order_end:
3196           /* Ignore the rest of the line if we don't need the input of
3197              this line.  */
3198           if (ignore_content)
3199             {
3200               lr_ignore_rest (ldfile, 0);
3201               break;
3202             }
3203
3204           if (state != 1)
3205             goto err_label;
3206
3207           /* Handle ellipsis at end of list.  */
3208           if (was_ellipsis != tok_none)
3209             {
3210               handle_ellipsis (ldfile, NULL, 0, was_ellipsis, charmap,
3211                                repertoire, collate);
3212               was_ellipsis = tok_none;
3213             }
3214
3215           state = 2;
3216           lr_ignore_rest (ldfile, 1);
3217           break;
3218
3219         case tok_reorder_after:
3220           /* Ignore the rest of the line if we don't need the input of
3221              this line.  */
3222           if (ignore_content)
3223             {
3224               lr_ignore_rest (ldfile, 0);
3225               break;
3226             }
3227
3228           if (state == 1)
3229             {
3230               lr_error (ldfile, _("%s: missing `order_end' keyword"),
3231                         "LC_COLLATE");
3232               state = 2;
3233
3234               /* Handle ellipsis at end of list.  */
3235               if (was_ellipsis != tok_none)
3236                 {
3237                   handle_ellipsis (ldfile, arg->val.str.startmb,
3238                                    arg->val.str.lenmb, was_ellipsis, charmap,
3239                                    repertoire, collate);
3240                   was_ellipsis = tok_none;
3241                 }
3242             }
3243           else if (state != 2 && state != 3)
3244             goto err_label;
3245           state = 3;
3246
3247           arg = lr_token (ldfile, charmap, repertoire, verbose);
3248           if (arg->tok == tok_bsymbol || arg->tok == tok_ucs4)
3249             {
3250               /* Find this symbol in the sequence table.  */
3251               char ucsbuf[10];
3252               char *startmb;
3253               size_t lenmb;
3254               struct element_t *insp;
3255               int no_error = 1;
3256
3257               if (arg->tok == tok_bsymbol)
3258                 {
3259                   startmb = arg->val.str.startmb;
3260                   lenmb = arg->val.str.lenmb;
3261                 }
3262               else
3263                 {
3264                   sprintf (ucsbuf, "U%08X", arg->val.ucs4);
3265                   startmb = ucsbuf;
3266                   lenmb = 9;
3267                 }
3268
3269               if (find_entry (&collate->seq_table, startmb, lenmb,
3270                               (void **) &insp) == 0)
3271                 /* Yes, the symbol exists.  Simply point the cursor
3272                    to it.  */
3273                 collate->cursor = insp;
3274               else
3275                 {
3276                   struct symbol_t *symbp;
3277
3278                   if (find_entry (&collate->sym_table, startmb, lenmb,
3279                                   (void **) &symbp) == 0)
3280                     {
3281                       if (symbp->order->last != NULL
3282                           || symbp->order->next != NULL)
3283                         collate->cursor = symbp->order;
3284                       else
3285                         {
3286                           /* This is a collating symbol but its position
3287                              is not yet defined.  */
3288                           lr_error (ldfile, _("\
3289 %s: order for collating symbol %.*s not yet defined"),
3290                                     "LC_COLLATE", (int) lenmb, startmb);
3291                           collate->cursor = NULL;
3292                           no_error = 0;
3293                         }
3294                     }
3295                   else if (find_entry (&collate->elem_table, startmb, lenmb,
3296                                        (void **) &insp) == 0)
3297                     {
3298                       if (insp->last != NULL || insp->next != NULL)
3299                         collate->cursor = insp;
3300                       else
3301                         {
3302                           /* This is a collating element but its position
3303                              is not yet defined.  */
3304                           lr_error (ldfile, _("\
3305 %s: order for collating element %.*s not yet defined"),
3306                                     "LC_COLLATE", (int) lenmb, startmb);
3307                           collate->cursor = NULL;
3308                           no_error = 0;
3309                         }
3310                     }
3311                   else
3312                     {
3313                       /* This is bad.  The symbol after which we have to
3314                          insert does not exist.  */
3315                       lr_error (ldfile, _("\
3316 %s: cannot reorder after %.*s: symbol not known"),
3317                                 "LC_COLLATE", (int) lenmb, startmb);
3318                       collate->cursor = NULL;
3319                       no_error = 0;
3320                     }
3321                 }
3322
3323               lr_ignore_rest (ldfile, no_error);
3324             }
3325           else
3326             /* This must not happen.  */
3327             goto err_label;
3328           break;
3329
3330         case tok_reorder_end:
3331           /* Ignore the rest of the line if we don't need the input of
3332              this line.  */
3333           if (ignore_content)
3334             break;
3335
3336           if (state != 3)
3337             goto err_label;
3338           state = 4;
3339           lr_ignore_rest (ldfile, 1);
3340           break;
3341
3342         case tok_reorder_sections_after:
3343           /* Ignore the rest of the line if we don't need the input of
3344              this line.  */
3345           if (ignore_content)
3346             {
3347               lr_ignore_rest (ldfile, 0);
3348               break;
3349             }
3350
3351           if (state == 1)
3352             {
3353               lr_error (ldfile, _("%s: missing `order_end' keyword"),
3354                         "LC_COLLATE");
3355               state = 2;
3356
3357               /* Handle ellipsis at end of list.  */
3358               if (was_ellipsis != tok_none)
3359                 {
3360                   handle_ellipsis (ldfile, NULL, 0, was_ellipsis, charmap,
3361                                    repertoire, collate);
3362                   was_ellipsis = tok_none;
3363                 }
3364             }
3365           else if (state == 3)
3366             {
3367               error (0, 0, _("%s: missing `reorder-end' keyword"),
3368                      "LC_COLLATE");
3369               state = 4;
3370             }
3371           else if (state != 2 && state != 4)
3372             goto err_label;
3373           state = 5;
3374
3375           /* Get the name of the sections we are adding after.  */
3376           arg = lr_token (ldfile, charmap, repertoire, verbose);
3377           if (arg->tok == tok_bsymbol)
3378             {
3379               /* Now find a section with this name.  */
3380               struct section_list *runp = collate->sections;
3381
3382               while (runp != NULL)
3383                 {
3384                   if (runp->name != NULL
3385                       && strlen (runp->name) == arg->val.str.lenmb
3386                       && memcmp (runp->name, arg->val.str.startmb,
3387                                  arg->val.str.lenmb) == 0)
3388                     break;
3389
3390                   runp = runp->next;
3391                 }
3392
3393               if (runp != NULL)
3394                 collate->current_section = runp;
3395               else
3396                 {
3397                   /* This is bad.  The section after which we have to
3398                      reorder does not exist.  Therefore we cannot
3399                      process the whole rest of this reorder
3400                      specification.  */
3401                   lr_error (ldfile, _("%s: section `%.*s' not known"),
3402                             "LC_COLLATE", (int) arg->val.str.lenmb,
3403                             arg->val.str.startmb);
3404
3405                   do
3406                     {
3407                       lr_ignore_rest (ldfile, 0);
3408
3409                       now = lr_token (ldfile, charmap, NULL, verbose);
3410                     }
3411                   while (now->tok == tok_reorder_sections_after
3412                          || now->tok == tok_reorder_sections_end
3413                          || now->tok == tok_end);
3414
3415                   /* Process the token we just saw.  */
3416                   nowtok = now->tok;
3417                   continue;
3418                 }
3419             }
3420           else
3421             /* This must not happen.  */
3422             goto err_label;
3423           break;
3424
3425         case tok_reorder_sections_end:
3426           /* Ignore the rest of the line if we don't need the input of
3427              this line.  */
3428           if (ignore_content)
3429             break;
3430
3431           if (state != 5)
3432             goto err_label;
3433           state = 6;
3434           lr_ignore_rest (ldfile, 1);
3435           break;
3436
3437         case tok_bsymbol:
3438         case tok_ucs4:
3439           /* Ignore the rest of the line if we don't need the input of
3440              this line.  */
3441           if (ignore_content)
3442             {
3443               lr_ignore_rest (ldfile, 0);
3444               break;
3445             }
3446
3447           if (state != 0 && state != 1 && state != 3 && state != 5)
3448             goto err_label;
3449
3450           if ((state == 0 || state == 5) && nowtok == tok_ucs4)
3451             goto err_label;
3452
3453           if (nowtok == tok_ucs4)
3454             {
3455               snprintf (ucs4buf, sizeof (ucs4buf), "U%08X", now->val.ucs4);
3456               symstr = ucs4buf;
3457               symlen = 9;
3458             }
3459           else
3460             {
3461               symstr = arg->val.str.startmb;
3462               symlen = arg->val.str.lenmb;
3463             }
3464
3465           if (state == 0)
3466             {
3467               /* We are outside an `order_start' region.  This means
3468                  we must only accept definitions of values for
3469                  collation symbols since these are purely abstract
3470                  values and don't need directions associated.  */
3471               struct element_t *seqp;
3472
3473               if (find_entry (&collate->seq_table, symstr, symlen,
3474                               (void **) &seqp) == 0)
3475                 {
3476                   /* It's already defined.  First check whether this
3477                      is really a collating symbol.  */
3478                   if (seqp->is_character)
3479                     goto err_label;
3480
3481                   goto move_entry;
3482                 }
3483               else
3484                 {
3485                   void *result;
3486
3487                   if (find_entry (&collate->sym_table, symstr, symlen,
3488                                   &result) != 0)
3489                     /* No collating symbol, it's an error.  */
3490                     goto err_label;
3491
3492                   /* Maybe this is the first time we define a symbol
3493                      value and it is before the first actual section.  */
3494                   if (collate->sections == NULL)
3495                     collate->sections = collate->current_section =
3496                       &collate->symbol_section;
3497                 }
3498
3499               if (was_ellipsis != tok_none)
3500                 {
3501
3502                   handle_ellipsis (ldfile, symstr, symlen, was_ellipsis,
3503                                    charmap, repertoire, collate);
3504
3505                   /* Remember that we processed the ellipsis.  */
3506                   was_ellipsis = tok_none;
3507
3508                   /* And don't add the value a second time.  */
3509                   break;
3510                 }
3511             }
3512           else if (state == 3)
3513             {
3514               /* It is possible that we already have this collation sequence.
3515                  In this case we move the entry.  */
3516               struct element_t *seqp;
3517               void *sym;
3518
3519               /* If the symbol after which we have to insert was not found
3520                  ignore all entries.  */
3521               if (collate->cursor == NULL)
3522                 {
3523                   lr_ignore_rest (ldfile, 0);
3524                   break;
3525                 }
3526
3527               if (find_entry (&collate->seq_table, symstr, symlen,
3528                               (void **) &seqp) == 0)
3529                 goto move_entry;
3530
3531               if (find_entry (&collate->sym_table, symstr, symlen, &sym) == 0
3532                   && (seqp = ((struct symbol_t *) sym)->order) != NULL)
3533                 goto move_entry;
3534
3535               if (find_entry (&collate->elem_table, symstr, symlen,
3536                               (void **) &seqp) == 0
3537                   && (seqp->last != NULL || seqp->next != NULL
3538                       || (collate->start != NULL && seqp == collate->start)))
3539                 {
3540                 move_entry:
3541                   /* Remove the entry from the old position.  */
3542                   if (seqp->last == NULL)
3543                     collate->start = seqp->next;
3544                   else
3545                     seqp->last->next = seqp->next;
3546                   if (seqp->next != NULL)
3547                     seqp->next->last = seqp->last;
3548
3549                   /* We also have to check whether this entry is the
3550                      first or last of a section.  */
3551                   if (seqp->section->first == seqp)
3552                     {
3553                       if (seqp->section->first == seqp->section->last)
3554                         /* This section has no content anymore.  */
3555                         seqp->section->first = seqp->section->last = NULL;
3556                       else
3557                         seqp->section->first = seqp->next;
3558                     }
3559                   else if (seqp->section->last == seqp)
3560                     seqp->section->last = seqp->last;
3561
3562                   /* Now insert it in the new place.  */
3563                   insert_weights (ldfile, seqp, charmap, repertoire, collate,
3564                                   tok_none);
3565                   break;
3566                 }
3567
3568               /* Otherwise we just add a new entry.  */
3569             }
3570           else if (state == 5)
3571             {
3572               /* We are reordering sections.  Find the named section.  */
3573               struct section_list *runp = collate->sections;
3574               struct section_list *prevp = NULL;
3575
3576               while (runp != NULL)
3577                 {
3578                   if (runp->name != NULL
3579                       && strlen (runp->name) == symlen
3580                       && memcmp (runp->name, symstr, symlen) == 0)
3581                     break;
3582
3583                   prevp = runp;
3584                   runp = runp->next;
3585                 }
3586
3587               if (runp == NULL)
3588                 {
3589                   lr_error (ldfile, _("%s: section `%.*s' not known"),
3590                             "LC_COLLATE", (int) symlen, symstr);
3591                   lr_ignore_rest (ldfile, 0);
3592                 }
3593               else
3594                 {
3595                   if (runp != collate->current_section)
3596                     {
3597                       /* Remove the named section from the old place and
3598                          insert it in the new one.  */
3599                       prevp->next = runp->next;
3600
3601                       runp->next = collate->current_section->next;
3602                       collate->current_section->next = runp;
3603                       collate->current_section = runp;
3604                     }
3605
3606                   /* Process the rest of the line which might change
3607                      the collation rules.  */
3608                   arg = lr_token (ldfile, charmap, repertoire, verbose);
3609                   if (arg->tok != tok_eof && arg->tok != tok_eol)
3610                     read_directions (ldfile, arg, charmap, repertoire,
3611                                      collate);
3612                 }
3613               break;
3614             }
3615           else if (was_ellipsis != tok_none)
3616             {
3617               /* Using the information in the `ellipsis_weight'
3618                  element and this and the last value we have to handle
3619                  the ellipsis now.  */
3620               assert (state == 1);
3621
3622               handle_ellipsis (ldfile, symstr, symlen, was_ellipsis, charmap,
3623                                repertoire, collate);
3624
3625               /* Remember that we processed the ellipsis.  */
3626               was_ellipsis = tok_none;
3627
3628               /* And don't add the value a second time.  */
3629               break;
3630             }
3631
3632           /* Now insert in the new place.  */
3633           insert_value (ldfile, symstr, symlen, charmap, repertoire, collate);
3634           break;
3635
3636         case tok_undefined:
3637           /* Ignore the rest of the line if we don't need the input of
3638              this line.  */
3639           if (ignore_content)
3640             {
3641               lr_ignore_rest (ldfile, 0);
3642               break;
3643             }
3644
3645           if (state != 1)
3646             goto err_label;
3647
3648           if (was_ellipsis != tok_none)
3649             {
3650               lr_error (ldfile,
3651                         _("%s: cannot have `%s' as end of ellipsis range"),
3652                         "LC_COLLATE", "UNDEFINED");
3653
3654               unlink_element (collate);
3655               was_ellipsis = tok_none;
3656             }
3657
3658           /* See whether UNDEFINED already appeared somewhere.  */
3659           if (collate->undefined.next != NULL
3660               || &collate->undefined == collate->cursor)
3661             {
3662               lr_error (ldfile,
3663                         _("%s: order for `%.*s' already defined at %s:%Zu"),
3664                         "LC_COLLATE", 9, "UNDEFINED",
3665                         collate->undefined.file,
3666                         collate->undefined.line);
3667               lr_ignore_rest (ldfile, 0);
3668             }
3669           else
3670             /* Parse the weights.  */
3671              insert_weights (ldfile, &collate->undefined, charmap,
3672                              repertoire, collate, tok_none);
3673           break;
3674
3675         case tok_ellipsis2: /* symbolic hexadecimal ellipsis */
3676         case tok_ellipsis3: /* absolute ellipsis */
3677         case tok_ellipsis4: /* symbolic decimal ellipsis */
3678           /* This is the symbolic (decimal or hexadecimal) or absolute
3679              ellipsis.  */
3680           if (was_ellipsis != tok_none)
3681             goto err_label;
3682
3683           if (state != 0 && state != 1 && state != 3)
3684             goto err_label;
3685
3686           was_ellipsis = nowtok;
3687
3688           insert_weights (ldfile, &collate->ellipsis_weight, charmap,
3689                           repertoire, collate, nowtok);
3690           break;
3691
3692         case tok_end:
3693           /* Next we assume `LC_COLLATE'.  */
3694           if (!ignore_content)
3695             {
3696               if (state == 0)
3697                 /* We must either see a copy statement or have
3698                    ordering values.  */
3699                 lr_error (ldfile,
3700                           _("%s: empty category description not allowed"),
3701                           "LC_COLLATE");
3702               else if (state == 1)
3703                 {
3704                   lr_error (ldfile, _("%s: missing `order_end' keyword"),
3705                             "LC_COLLATE");
3706
3707                   /* Handle ellipsis at end of list.  */
3708                   if (was_ellipsis != tok_none)
3709                     {
3710                       handle_ellipsis (ldfile, NULL, 0, was_ellipsis, charmap,
3711                                        repertoire, collate);
3712                       was_ellipsis = tok_none;
3713                     }
3714                 }
3715               else if (state == 3)
3716                 error (0, 0, _("%s: missing `reorder-end' keyword"),
3717                        "LC_COLLATE");
3718               else if (state == 5)
3719                 error (0, 0, _("%s: missing `reorder-sections-end' keyword"),
3720                        "LC_COLLATE");
3721             }
3722           arg = lr_token (ldfile, charmap, NULL, verbose);
3723           if (arg->tok == tok_eof)
3724             break;
3725           if (arg->tok == tok_eol)
3726             lr_error (ldfile, _("%s: incomplete `END' line"), "LC_COLLATE");
3727           else if (arg->tok != tok_lc_collate)
3728             lr_error (ldfile, _("\
3729 %1$s: definition does not end with `END %1$s'"), "LC_COLLATE");
3730           lr_ignore_rest (ldfile, arg->tok == tok_lc_collate);
3731           return;
3732
3733         default:
3734         err_label:
3735           SYNTAX_ERROR (_("%s: syntax error"), "LC_COLLATE");
3736         }
3737
3738       /* Prepare for the next round.  */
3739       now = lr_token (ldfile, charmap, NULL, verbose);
3740       nowtok = now->tok;
3741     }
3742
3743   /* When we come here we reached the end of the file.  */
3744   lr_error (ldfile, _("%s: premature end of file"), "LC_COLLATE");
3745 }