2000-07-22 Ulrich Drepper <drepper@redhat.com>
+ * iconv/gconv_trans.c: Correct a few bugs in the search loop. Remove
+ remainders of hash table.
+ * locale/categories.def: Remove remainders of transliteration
+ hash table.
+ * locale/langinfo.h: Likewise.
+ * locale/programs/ld-ctype.c: Likewise. Fix code to write out
+ transliteration tables.
+
+ * locale/gen-translit.pl: New file.
+ * locale/C-translit.h.in: New file.
+ * locale/C-ctype.c: Include C-translit.h. Initialize transliteration
+ data pointers with data from this file.
+ * locale/Makefile (distribute): Add C-translit.h.in, C-translit.h,
+ and gen-translit.pl.
+ Add rule to generate C-translit.h.
+
* stdio-common/vfscanf.c: Handle input -- with format %f correctly
(it's no input error).
* stdio-common/tstscanf.c: Add test case for format %f with input --.
{
/* Find out about the locale's transliteration. */
uint_fast32_t size;
- uint_fast32_t layers;
uint32_t *from_idx;
uint32_t *from_tbl;
uint32_t *to_idx;
/* If there is no transliteration information in the locale don't do
anything and return the error. */
- size = _NL_CURRENT_WORD (LC_CTYPE, _NL_CTYPE_TRANSLIT_HASH_SIZE);
+ size = _NL_CURRENT_WORD (LC_CTYPE, _NL_CTYPE_TRANSLIT_TAB_SIZE);
if (size == 0)
goto no_rules;
/* Get the rest of the values. */
- layers = _NL_CURRENT_WORD (LC_CTYPE, _NL_CTYPE_TRANSLIT_HASH_LAYERS);
from_idx = (uint32_t *) _NL_CURRENT (LC_CTYPE, _NL_CTYPE_TRANSLIT_FROM_IDX);
from_tbl = (uint32_t *) _NL_CURRENT (LC_CTYPE, _NL_CTYPE_TRANSLIT_FROM_TBL);
to_idx = (uint32_t *) _NL_CURRENT (LC_CTYPE, _NL_CTYPE_TRANSLIT_TO_IDX);
return __GCONV_INCOMPLETE_INPUT;
if (winbuf + cnt >= winbufend || from_tbl[idx + cnt] < winbuf[cnt])
- low = idx;
+ low = med + 1;
else
- high = idx;
+ high = med;
}
no_rules:
#include "localeinfo.h"
#include <endian.h>
+#include "C-translit.h"
+
/* This table's entries are taken from POSIX.2 Table 2-6
``LC_CTYPE Category Definition in the POSIX Locale''.
{ word: L'7' },
{ word: L'8' },
{ word: L'9' },
- { word: 0 },
- { word: 0 },
- { string: NULL },
- { string: NULL },
- { string: NULL },
- { string: NULL },
+ { word: NTRANSLIT },
+ { wstr: translit_from_idx },
+ { wstr: (uint32_t *) translit_from_tbl },
+ { wstr: translit_to_idx },
+ { wstr: (uint32_t *) translit_to_tbl },
{ word: 1 },
{ wstr: (uint32_t *) L"?" },
{ word: 0 },
--- /dev/null
+#define NTRANSLIT 20
+static const uint32_t translit_from_idx[] =
+{
+ 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22,
+ 24, 26, 28, 30, 32, 34, 36, 38
+};
+static const wchar_t translit_from_tbl[] =
+ L"\xa9" L"\0" L"\xab" L"\0" L"\xae" L"\0" L"\xbb" L"\0" L"\xbc" L"\0"
+ L"\xbd" L"\0" L"\xbe" L"\0" L"\xc4" L"\0" L"\xc5" L"\0" L"\xc6" L"\0"
+ L"\xd6" L"\0" L"\xdc" L"\0" L"\xdf" L"\0" L"\xe4" L"\0" L"\xe5" L"\0"
+ L"\xe6" L"\0" L"\xf6" L"\0" L"\xfc" L"\0" L"\x201c" L"\0" L"\x201d";
+static const uint32_t translit_to_idx[] =
+{
+ 0, 5, 9, 14, 18, 23, 28, 33, 37, 41, 45, 49,
+ 53, 57, 61, 65, 69, 73, 77, 80
+};
+static const wchar_t translit_to_tbl[] =
+ L"(C)\0" L"\0" L"<<\0" L"\0" L"(R)\0" L"\0" L">>\0" L"\0" L"1/4\0" L"\0"
+ L"1/2\0" L"\0" L"3/4\0" L"\0" L"AE\0" L"\0" L"AA\0" L"\0" L"AE\0" L"\0"
+ L"OE\0" L"\0" L"UE\0" L"\0" L"ss\0" L"\0" L"ae\0" L"\0" L"aa\0" L"\0"
+ L"ae\0" L"\0" L"oe\0" L"\0" L"ue\0" L"\0" L"\"\0" L"\0" L"\"\0";
--- /dev/null
+/* Transliteration for the C locale.
+ Copyright (C) 2000 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+ Contributed by Ulrich Drepper <drepper@redhat.com>, 2000.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Library General Public License as
+ published by the Free Software Foundation; either version 2 of the
+ License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Library General Public License for more details.
+
+ You should have received a copy of the GNU Library General Public
+ License along with the GNU C Library; see the file COPYING.LIB. If not,
+ write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ Boston, MA 02111-1307, USA. */
+
+/* The entries here have to be sorted relative to the input string. */
+
+/* <U00A9> COPYRIGHT SIGN. */
+"\xa9" "(C)"
+
+/* <U00AB> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK. */
+"\xab" "<<"
+
+/* <U00AE> REGISTERED SIGN. */
+"\xae" "(R)"
+
+/* <U00BB> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK. */
+"\xbb" ">>"
+
+/* <U00BC> VULGAR FRACTION ONE QUARTER. */
+"\xbc" "1/4"
+
+/* <U00BD> VULGAR FRACTION ONE HALF. */
+"\xbd" "1/2"
+
+/* <U00BE> VULGAR FRACTION THREE QUARTERS. */
+"\xbe" "3/4"
+
+/* <U00C4> LATIN CAPITAL LETTER A WITH DIAERESIS. */
+/* XXX It is not clear whether this is the best transliteration for
+ all locales. If not, we probably have to take it out completely. */
+"\xc4" "AE"
+
+/* <U00C5> LATIN CAPITAL LETTER A WITH RING ABOVE. */
+/* XXX It is not clear whether this is the best transliteration for
+ all locales. If not, we probably have to take it out completely. */
+"\xc5" "AA"
+
+/* <U00C6> LATIN CAPITAL LETTER AE. */
+"\xc6" "AE"
+
+/* <U00D6> LATIN CAPITAL LETTER O WITH DIAERESIS. */
+/* XXX It is not clear whether this is the best transliteration for
+ all locales. If not, we probably have to take it out completely. */
+"\xd6" "OE"
+
+/* <U00DC> LATIN CAPITAL LETTER U WITH DIAERESIS. */
+/* XXX It is not clear whether this is the best transliteration for
+ all locales. If not, we probably have to take it out completely. */
+"\xdc" "UE"
+
+/* <U00DF> LATIN SMALL LETTER SHARP S. */
+"\xdf" "ss"
+
+/* <U00E4> LATIN SMALL LETTER A WITH DIAERESIS. */
+/* XXX It is not clear whether this is the best transliteration for
+ all locales. If not, we probably have to take it out completely. */
+"\xe4" "ae"
+
+/* <U00E5> LATIN SMALL LETTER A WITH RING ABOVE. */
+/* XXX It is not clear whether this is the best transliteration for
+ all locales. If not, we probably have to take it out completely. */
+"\xe5" "aa"
+
+/* <U00E6> LATIN SMALL LETTER AE. */
+"\xe6" "ae"
+
+/* <U00F6> LATIN SMALL LETTER O WITH DIAERESIS. */
+/* XXX It is not clear whether this is the best transliteration for
+ all locales. If not, we probably have to take it out completely. */
+"\xf6" "oe"
+
+/* <U00FC> LATIN SMALL LETTER U WITH DIAERESIS. */
+/* XXX It is not clear whether this is the best transliteration for
+ all locales. If not, we probably have to take it out completely. */
+"\xfc" "ue"
+
+/* <U201C> LEFT DOUBLE QUOTATION MARK. */
+"\x201c" "\""
+
+/* <U201D> RIGHT DOUBLE QUOTATION MARK. */
+"\x201d" "\""
distribute = localeinfo.h categories.def iso-639.def iso-3166.def \
iso-4217.def weight.h weightwc.h strlen-hash.h elem-hash.h \
indigits.h indigitswc.h outdigits.h outdigitswc.h \
+ C-translit.h.in C-translit.h gen-translit.pl \
$(addprefix programs/, \
locale.c localedef.c \
$(localedef-modules:=.c) $(locale-modules:=.c) \
$(objpfx)locale: $(locale-modules:%=$(objpfx)%.o)
$(objpfx)localedef $(objpfx)locale: $(lib-modules:%=$(objpfx)%.o)
+C-translit.h: C-translit.h.in gen-translit.pl
+ $(PERL) gen-translit.pl < $< > $@.tmp
+ $(move-if-change) $@.tmp $@
+ifeq ($(with-cvs),yes)
+ test ! -d CVS || cvs $(CVSOPTS) commit -mRegenerated $@
+endif
+
localepath = "$(localedir):$(i18ndir)"
locale-CPPFLAGS := -DLOCALE_PATH='$(localepath)' \
DEFINE_ELEMENT (_NL_CTYPE_OUTDIGIT7_WC, "ctype-outdigit7_wc", std, word)
DEFINE_ELEMENT (_NL_CTYPE_OUTDIGIT8_WC, "ctype-outdigit8_wc", std, word)
DEFINE_ELEMENT (_NL_CTYPE_OUTDIGIT9_WC, "ctype-outdigit9_wc", std, word)
- DEFINE_ELEMENT (_NL_CTYPE_TRANSLIT_HASH_SIZE, "ctype-translit-hash-size", std, word)
- DEFINE_ELEMENT (_NL_CTYPE_TRANSLIT_HASH_LAYERS, "ctype-translit-hash-layers", std, word)
+ DEFINE_ELEMENT (_NL_CTYPE_TRANSLIT_TAB_SIZE, "ctype-translit-tab-size", std, word)
DEFINE_ELEMENT (_NL_CTYPE_TRANSLIT_FROM_IDX, "ctype-translit-from-idx", std, string)
DEFINE_ELEMENT (_NL_CTYPE_TRANSLIT_FROM_TBL, "ctype-translit-from-tbl", std, string)
DEFINE_ELEMENT (_NL_CTYPE_TRANSLIT_TO_IDX, "ctype-translit-to-idx", std, string)
--- /dev/null
+#! /usr/bin/perl -w
+open F, "cat C-translit.h.in | gcc -E - |" || die "Cannot preprocess input file";
+
+
+sub cstrlen {
+ my($str) = @_;
+ my($len) = length($str);
+ my($cnt);
+ my($res) = 0;
+
+ for ($cnt = 0; $cnt < $len; ++$cnt) {
+ if (substr($str, $cnt, 1) eq '\\') {
+ # Recognize the escape sequence.
+ if (substr($str, $cnt + 1, 1) eq 'x') {
+ my($inner);
+ for ($inner = $cnt + 2; $inner < $len && $inner < $cnt + 10; ++$inner) {
+ my($ch) = substr($str, $inner, 1);
+ next if (($ch ge '0' && $ch le '9')
+ || ($ch ge 'a' && $ch le 'f')
+ || ($ch ge 'A' && $ch le 'F'));
+ last;
+ }
+ $cnt = $inner;
+ ++$res;
+ } else {
+ die "invalid input" if ($cnt + 1 >= $len);
+ ++$res;
+ ++$cnt;
+ }
+ } else {
+ ++$res;
+ }
+ }
+
+ return $res;
+}
+
+while (<F>) {
+ next if (/^#/);
+ next if (/^[ ]*$/);
+ chop;
+
+ if (/"([^\"]*)"[ ]*"(.*)"/) {
+ my($from) = $1;
+ my($to) = $2;
+ my($fromlen) = cstrlen($from);
+ my($tolen) = cstrlen($to);
+
+ push(@froms, $from);
+ push(@fromlens, $fromlen);
+ push(@tos, $to);
+ push(@tolens, $tolen);
+ }
+}
+
+printf "#define NTRANSLIT %d\n", $#froms + 1;
+
+printf "static const uint32_t translit_from_idx[] =\n{\n ";
+$col = 2;
+$total = 0;
+for ($cnt = 0; $cnt <= $#fromlens; ++$cnt) {
+ if ($cnt != 0) {
+ if ($col + 7 >= 79) {
+ printf(",\n ");
+ $col = 2;
+ } else {
+ printf(", ");
+ $col += 2;
+ }
+ }
+ printf("%4d", $total);
+ $total += $fromlens[$cnt] + 1;
+ $col += 4;
+}
+printf("\n};\n");
+
+printf "static const wchar_t translit_from_tbl[] =\n ";
+$col = 1;
+for ($cnt = 0; $cnt <= $#froms; ++$cnt) {
+ if ($cnt != 0) {
+ if ($col + 6 >= 79) {
+ printf("\n ");
+ $col = 1;
+ }
+ printf(" L\"\\0\"");
+ $col += 6;
+ }
+ if ($col > 2 && $col + length($froms[$cnt]) + 4 >= 79) {
+ printf("\n ");
+ $col = 2;
+ } else {
+ printf(" ");
+ ++$col;
+ }
+ printf("L\"$froms[$cnt]\"");
+ $col += length($froms[$cnt]) + 3;
+}
+printf(";\n");
+
+printf "static const uint32_t translit_to_idx[] =\n{\n ";
+$col = 2;
+$total = 0;
+for ($cnt = 0; $cnt <= $#tolens; ++$cnt) {
+ if ($cnt != 0) {
+ if ($col + 7 >= 79) {
+ printf(",\n ");
+ $col = 2;
+ } else {
+ printf(", ");
+ $col += 2;
+ }
+ }
+ printf("%4d", $total);
+ $total += $tolens[$cnt] + 2;
+ $col += 4;
+}
+printf("\n};\n");
+
+printf "static const wchar_t translit_to_tbl[] =\n ";
+$col = 1;
+for ($cnt = 0; $cnt <= $#tos; ++$cnt) {
+ if ($cnt != 0) {
+ if ($col + 6 >= 79) {
+ printf("\n ");
+ $col = 1;
+ }
+ printf(" L\"\\0\"");
+ $col += 6;
+ }
+ if ($col > 2 && $col + length($tos[$cnt]) + 6 >= 79) {
+ printf("\n ");
+ $col = 2;
+ } else {
+ printf(" ");
+ ++$col;
+ }
+ printf("L\"$tos[$cnt]\\0\"");
+ $col += length($tos[$cnt]) + 5;
+}
+printf(";\n");
+
+exit 0;
_NL_CTYPE_OUTDIGIT7_WC,
_NL_CTYPE_OUTDIGIT8_WC,
_NL_CTYPE_OUTDIGIT9_WC,
- _NL_CTYPE_TRANSLIT_HASH_SIZE,
- _NL_CTYPE_TRANSLIT_HASH_LAYERS,
+ _NL_CTYPE_TRANSLIT_TAB_SIZE,
_NL_CTYPE_TRANSLIT_FROM_IDX,
_NL_CTYPE_TRANSLIT_FROM_TBL,
_NL_CTYPE_TRANSLIT_TO_IDX,
unsigned char *width;
uint32_t mb_cur_max;
const char *codeset_name;
- uint32_t translit_hash_size;
- uint32_t translit_hash_layers;
uint32_t *translit_from_idx;
uint32_t *translit_from_tbl;
uint32_t *translit_to_idx;
uint32_t *translit_to_tbl;
- size_t translit_idx_size;
+ uint32_t translit_idx_size;
size_t translit_from_tbl_size;
size_t translit_to_tbl_size;
{
#define CTYPE_EMPTY(name) \
case name: \
- iov[2 + elem + offset].iov_base = ""; \
+ iov[2 + elem + offset].iov_base = (void *) ""; \
iov[2 + elem + offset].iov_len = 0; \
idx[elem + 1] = idx[elem]; \
break
ctype->names, (ctype->plane_size * ctype->plane_cnt
* sizeof (uint32_t)));
- CTYPE_DATA (_NL_CTYPE_TRANSLIT_HASH_SIZE,
- &ctype->translit_hash_size, sizeof (uint32_t));
- CTYPE_DATA (_NL_CTYPE_TRANSLIT_HASH_LAYERS,
- &ctype->translit_hash_layers, sizeof (uint32_t));
+ CTYPE_DATA (_NL_CTYPE_TRANSLIT_TAB_SIZE,
+ &ctype->translit_idx_size, sizeof (uint32_t));
CTYPE_DATA (_NL_CTYPE_TRANSLIT_FROM_IDX,
ctype->translit_from_idx,
- ctype->translit_idx_size);
+ ctype->translit_idx_size * sizeof (uint32_t));
CTYPE_DATA (_NL_CTYPE_TRANSLIT_FROM_TBL,
ctype->translit_from_tbl,
CTYPE_DATA (_NL_CTYPE_TRANSLIT_TO_IDX,
ctype->translit_to_idx,
- ctype->translit_idx_size);
+ ctype->translit_idx_size * sizeof (uint32_t));
CTYPE_DATA (_NL_CTYPE_TRANSLIT_TO_TBL,
ctype->translit_to_tbl, ctype->translit_to_tbl_size);
}
/* Store the information about the length. */
- ctype->translit_idx_size = number * sizeof (uint32_t);
+ ctype->translit_idx_size = number;
ctype->translit_from_tbl_size = from_len * sizeof (uint32_t);
ctype->translit_to_tbl_size = to_len * sizeof (uint32_t);
}