From bb39c4ef8f3f54d36462ea271cdcbc648b7409d7 Mon Sep 17 00:00:00 2001 From: Ulrich Drepper Date: Wed, 8 Nov 2000 23:08:32 +0000 Subject: [PATCH] Update. 2000-10-27 Bruno Haible * locale/programs/charmap.c (charmap_read): Verify ASCII compatibility of charmap. --- ChangeLog | 5 ++++ locale/programs/charmap.c | 59 +++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 64 insertions(+) diff --git a/ChangeLog b/ChangeLog index e49ccb2..db6bb96 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,8 @@ +2000-10-27 Bruno Haible + + * locale/programs/charmap.c (charmap_read): Verify ASCII + compatibility of charmap. + 2000-11-08 Ulrich Drepper * catgets/gencat.c (main): Don't use exit() to avoid warnings with diff --git a/locale/programs/charmap.c b/locale/programs/charmap.c index 776d6ff..59f317a 100644 --- a/locale/programs/charmap.c +++ b/locale/programs/charmap.c @@ -26,6 +26,7 @@ #include #include #include +#include #include #include @@ -192,6 +193,64 @@ charmap_read (const char *filename) DEFAULT_CHARMAP); } + /* Test of ASCII compatibility of locale encoding. + + Verify that the encoding to be used in a locale is ASCII compatible, + at least for the graphic characters, excluding the control characters, + '$' and '@'. This constraint comes from an ISO C 99 restriction. + + ISO C 99 section 7.17.(2) (about wchar_t): + the null character shall have the code value zero and each member of + the basic character set shall have a code value equal to its value + when used as the lone character in an integer character constant. + ISO C 99 section 5.2.1.(3): + Both the basic source and basic execution character sets shall have + the following members: the 26 uppercase letters of the Latin alphabet + A B C D E F G H I J K L M N O P Q R S T U V W X Y Z + the 26 lowercase letters of the Latin alphabet + a b c d e f g h i j k l m n o p q r s t u v w x y z + the 10 decimal digits + 0 1 2 3 4 5 6 7 8 9 + the following 29 graphic characters + ! " # % & ' ( ) * + , - . / : ; < = > ? [ \ ] ^ _ { | } ~ + the space character, and control characters representing horizontal + tab, vertical tab, and form feed. + + Therefore, for all members of the "basic character set", the 'char' code + must have the same value as the 'wchar_t' code, which in glibc is the + same as the Unicode code, which for all of the enumerated characters + is identical to the ASCII code. */ + if (result != NULL) + { + static const char basic_charset[] = + { + 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', + 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', + 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', + 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', + '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', + '!', '"', '#', '%', '&', '\'', '(', ')', '*', '+', ',', '-', + '.', '/', ':', ';', '<', '=', '>', '?', '[', '\\', ']', '^', + '_', '{', '|', '}', '~', ' ', '\t', '\v', '\f', '\0' + }; + int failed = 0; + const char *p = basic_charset; + + do + { + struct charseq * seq = charmap_find_symbol (result, p, 1); + + if (seq == NULL || seq->ucs4 != *p) + failed = 1; + } + while (*p++ != '\0'); + + if (failed) + fprintf (stderr, _("\ +character map `%s' is not ASCII compatible, locale not ISO C compliant\n"), + result->code_set_name); + } + return result; } -- 2.7.4