LC_COLLATE, "LC_COLLATE",
(
DEFINE_ELEMENT (_NL_COLLATE_NRULES, "collate-nrules", std, word)
- DEFINE_ELEMENT (_NL_COLLATE_RULES, "collate-rules", std, string)
- DEFINE_ELEMENT (_NL_COLLATE_HASH_SIZE, "collate-hash-size", std, word)
- DEFINE_ELEMENT (_NL_COLLATE_HASH_LAYERS, "collate-hash-layers", std, word)
- DEFINE_ELEMENT (_NL_COLLATE_TABLEWC, "collate-tablewc", std, string)
- DEFINE_ELEMENT (_NL_COLLATE_UNDEFINED_WC, "collate-undefined-wc", std, word)
- DEFINE_ELEMENT (_NL_COLLATE_EXTRAWC, "collate-extrawc", std, string)
- DEFINE_ELEMENT (_NL_COLLATE_ELEM_HASH_SIZE, "collate-elem-hash-size", std, word)
- DEFINE_ELEMENT (_NL_COLLATE_ELEM_HASH, "collate-elem-hash", std, string)
- DEFINE_ELEMENT (_NL_COLLATE_ELEM_STR_POOL, "collate-elem-str-pool", std, string)
- DEFINE_ELEMENT (_NL_COLLATE_ELEM_VAL, "collate-elem-val", std, string)
- DEFINE_ELEMENT (_NL_COLLATE_SYMB_HASH_SIZE, "collate-symb-hash-size", std, word)
- DEFINE_ELEMENT (_NL_COLLATE_SYMB_HASH, "collate-symb-hash", std, string)
- DEFINE_ELEMENT (_NL_COLLATE_SYMB_STR_POOL, "collate-symb-str-pool", std, string)
- DEFINE_ELEMENT (_NL_COLLATE_SYMB_CLASSWC, "collate-symb-classwc", std, string)
), _nl_postload_collate)
const char *mbs;
const uint32_t *wcs;
- int mborder;
+ int *mborder;
int wcorder;
+ /* The following is a bit mask which bits are set if this element is
+ used in the appropriate level. Interesting for the singlebyte
+ weight computation.
+
+ XXX The type here restricts the number of levels to 32. It could
+ we changed if necessary but I doubt this is necessary. */
+ unsigned int used_in_level;
+
struct element_list_t *weights;
/* Where does the definition come from. */
newp = (struct element_t *) obstack_alloc (&collate->mempool,
sizeof (*newp));
- newp->name = name == NULL ? NULL : obstack_copy (&collate->mempool,
- name, namelen);
+ newp->name = name == NULL ? NULL : obstack_copy0 (&collate->mempool,
+ name, namelen);
if (mbs != NULL)
newp->mbs = obstack_copy0 (&collate->mempool, mbs, mbslen);
else
}
else
newp->wcs = NULL;
- newp->mborder = 0;
+ newp->mborder = NULL;
newp->wcorder = 0;
+ newp->used_in_level = 0;
/* Will be allocated later. */
newp->weights = NULL;
else if (find_entry (&collate->elem_table, str, len,
(void **) &result) != 0)
{
- /* It's also no collation element. So it is an character
+ /* It's also no collation element. So it is a character
element defined later. */
result = new_element (collate, NULL, 0, NULL, str, len);
if (result != NULL)
static void
unlink_element (struct locale_collate_t *collate)
{
- if (collate->cursor->next != NULL)
- collate->cursor->next->last = collate->cursor->last;
- if (collate->cursor->last != NULL)
- collate->cursor->last->next = collate->cursor->next;
- collate->cursor = collate->cursor->last;
+ if (collate->cursor == collate->start)
+ {
+ assert (collate->cursor->next == NULL);
+ assert (collate->cursor->last == NULL);
+ collate->cursor = NULL;
+ }
+ else
+ {
+ if (collate->cursor->next != NULL)
+ collate->cursor->next->last = collate->cursor->last;
+ if (collate->cursor->last != NULL)
+ collate->cursor->last->next = collate->cursor->next;
+ collate->cursor = collate->cursor->last;
+ }
}
elem->next = collate->cursor ? collate->cursor->next : NULL;
if (collate->cursor != NULL)
collate->cursor->next = elem;
+ if (collate->start == NULL)
+ {
+ assert (collate->cursor == NULL);
+ collate->start = elem;
+ }
elem->weights = (struct element_list_t *)
obstack_alloc (&collate->mempool, nrules * sizeof (struct element_list_t));
memset (elem->weights, '\0', nrules * sizeof (struct element_list_t));
const char *cp = arg->val.str.startmb;
int cnt = 0;
struct element_t *charelem;
- void *base = obstack_base (&collate->mempool);
+ struct element_t **weights = NULL;
+ int max = 0;
if (*cp == '\0')
{
if (*cp == '<')
{
/* Ahh, it's a bsymbol. That's what we want. */
- const char *startp = cp;
+ const char *startp = ++cp;
- while (*++cp != '>')
+ while (*cp != '>')
{
if (*cp == ldfile->escape_char)
++cp;
if (*cp == '\0')
- {
- /* It's a syntax error. */
- obstack_free (&collate->mempool, base);
- goto syntax;
- }
+ /* It's a syntax error. */
+ goto syntax;
+
+ ++cp;
}
charelem = find_element (ldfile, collate, startp,
what this means. We interpret all characters in the
string as if that would be bsymbols. Otherwise we
would have to match back to bsymbols somehow and this
- is also not what people normally expect. */
+ is normally not what people normally expect. */
charelem = find_element (ldfile, collate, cp++, 1, NULL);
}
}
/* Add the pointer. */
- obstack_ptr_grow (&collate->mempool, charelem);
- ++cnt;
+ if (cnt >= max)
+ {
+ struct element_t **newp;
+ max += 10;
+ newp = (struct element_t **)
+ alloca (max * sizeof (struct element_t *));
+ memcpy (newp, weights, cnt * sizeof (struct element_t *));
+ weights = newp;
+ }
+ weights[cnt++] = charelem;
}
while (*cp != '\0');
/* Now store the information. */
elem->weights[weight_cnt].w = (struct element_t **)
- obstack_finish (&collate->mempool);
+ obstack_alloc (&collate->mempool,
+ cnt * sizeof (struct element_t *));
+ memcpy (elem->weights[weight_cnt].w, weights,
+ cnt * sizeof (struct element_t *));
elem->weights[weight_cnt].cnt = cnt;
/* We don't need the string anymore. */
/* Enqueue the new element. */
elem->last = collate->cursor;
- elem->next = collate->cursor->next;
- elem->last->next = elem;
- if (elem->next != NULL)
- elem->next->last = elem;
+ if (collate->cursor != NULL)
+ elem->next = NULL;
+ else
+ {
+ elem->next = collate->cursor->next;
+ elem->last->next = elem;
+ if (elem->next != NULL)
+ elem->next->last = elem;
+ }
+ if (collate->start == NULL)
+ {
+ assert (collate->cursor == NULL);
+ collate->start = elem;
+ }
collate->cursor = elem;
/* Add the weight value. We take them from the
The multibyte case is easy. We simply sort into an array with
256 elements. */
struct locale_collate_t *collate = locale->categories[LC_COLLATE].collate;
- int mbact = 2;
- int wcact = 2;
- struct element_t *runp = collate->start;
+ int mbact[nrules];
+ int wcact;
+ struct element_t *runp;
+ int i;
+ int need_undefined = 0;
+
+ /* If this assertion is hit change the type in `element_t'. */
+ assert (nrules <= sizeof (runp->used_in_level) * 8);
+
+ /* Find out which elements are used at which level. At the same
+ time we find out whether we have any undefined symbols. */
+ runp = collate->start;
+ while (runp != NULL)
+ {
+ if (runp->mbs != NULL)
+ {
+ for (i = 0; i < nrules; ++i)
+ {
+ int j;
+
+ for (j = 0; j < runp->weights[i].cnt; ++j)
+ /* A NULL pointer as the weight means IGNORE. */
+ if (runp->weights[i].w[j] != NULL)
+ {
+ if (runp->weights[i].w[j]->weights == NULL)
+ {
+ error_at_line (0, 0, runp->file, runp->line,
+ _("symbol `%s' not defined"),
+ runp->weights[i].w[j]->name);
+ need_undefined = 1;
+ runp->weights[i].w[j] = &collate->undefined;
+ }
+ else
+ /* Set the bit for the level. */
+ runp->weights[i].w[j]->used_in_level |= 1 << i;
+ }
+ }
+ }
+
+ /* Up to the next entry. */
+ runp = runp->next;
+ }
+
+ /* Walk through the list of defined sequences and assign weights. Also
+ create the data structure which will allow generating the single byte
+ character based tables.
+
+ Since at each time only the weights for each of the rules are
+ only compared to other weights for this rule it is possible to
+ assign more compact weight values than simply counting all
+ weights in sequence. We can assign weights from 2 one for each
+ rule individually and only for those elements, which are actually
+ used for this rule.
+
+ Why is this important? It is not for the wide char table. But
+ it is for the singlebyte output since here larger numbers have to
+ be encoded to make it possible to emit the value as a byte
+ string. */
+ for (i = 0; i < nrules; ++i)
+ mbact[i] = 2;
+ wcact = 2;
+ runp = collate->start;
while (runp != NULL)
{
if (runp->mbs != NULL)
struct element_t **eptr;
/* Determine the order. */
- runp->mborder = mbact++;
+ if (runp->used_in_level != 0)
+ {
+ runp->mborder = (int *) obstack_alloc (&collate->mempool,
+ nrules * sizeof (int));
+
+ for (i = 0; i < nrules; ++i)
+ if ((runp->used_in_level & (1 << i)) != 0)
+ runp->mborder[i] = mbact[i]++;
+ else
+ runp->mborder[i] = 0;
+ }
/* Find the point where to insert in the list. */
- eptr = &collate->mbheads[(unsigned int) runp->mbs[0]];
+ eptr = &collate->mbheads[((unsigned char *) runp->mbs)[0]];
while (*eptr != NULL)
{
/* Check which string is larger, the one we want to insert
/* Up to the next entry. */
runp = runp->next;
}
+
+ /* Find out whether any of the `mbheads' entries is unset. In this
+ case we use the UNDEFINED entry. */
+ for (i = 1; i < 256; ++i)
+ if (collate->mbheads[i] == NULL)
+ {
+ need_undefined = 1;
+ collate->mbheads[i] = &collate->undefined;
+ }
+
+ /* Now determine whether the UNDEFINED entry is needed and if yes,
+ whether it was defined. */
+ if (need_undefined && collate->undefined.file == NULL)
+ {
+ error (0, 0, _("no definition of `UNDEFINED'"));
+
+ /* Add UNDEFINED at the end. */
+ collate->undefined.mborder =
+ (int *) obstack_alloc (&collate->mempool, nrules * sizeof (int));
+
+ for (i = 0; i < nrules; ++i)
+ collate->undefined.mborder[i] = mbact[i]++;
+
+ collate->undefined.wcorder = wcact++;
+ }
}
collate_output (struct localedef_t *locale, struct charmap_t *charmap,
const char *output_path)
{
+ struct locale_collate_t *collate = locale->categories[LC_COLLATE].collate;
+ const size_t nelems = _NL_ITEM_INDEX (_NL_NUM_LC_COLLATE);
+ struct iovec iov[2 + nelems];
+ struct locale_file data;
+ uint32_t idx[nelems];
+ size_t cnt;
+
+ data.magic = LIMAGIC (LC_COLLATE);
+ data.n = nelems;
+ iov[0].iov_base = (void *) &data;
+ iov[0].iov_len = sizeof (data);
+
+ iov[1].iov_base = (void *) idx;
+ iov[1].iov_len = sizeof (idx);
+
+ idx[0] = iov[0].iov_len + iov[1].iov_len;
+ cnt = 0;
+
+ assert (cnt == _NL_ITEM_INDEX (_NL_COLLATE_NRULES));
+ iov[2 + cnt].iov_base = &collate->nrules;
+ iov[2 + cnt].iov_len = sizeof (uint32_t);
+ idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len;
+ ++cnt;
+
+ assert (cnt == _NL_ITEM_INDEX (_NL_NUM_LC_COLLATE));
+
+ write_locale_data (output_path, "LC_COLLATE", 2 + cnt, iov);
}