From 4e82c610255f6a186c20c73e74f8e71dcda98efc Mon Sep 17 00:00:00 2001 From: Ulrich Drepper Date: Mon, 4 Apr 2005 22:37:38 +0000 Subject: [PATCH] * intl/tst-gettext3.c: New file. * intl/tst-gettext3.sh: New file. * intl/Makefile (distribute): Add tst-gettext3.sh. (test-srcs): Add tst-gettext3. (tests): Depend on tst-gettext3.out. (tst-gettext3.out): New rule. (CFLAGS-tst-gettext3.c): New variable. Fix bug exposed by tst-gettext3. * intl/gettextP.h (struct converted_domain): New type. (struct loaded_domain): Remove the conv, conv_tab fields. Add conversions, nconversions fields. (_nl_init_domain_conv): Remove declaration. (_nl_free_domain_conv): Remove declaration. (_nl_find_msg): Add convert argument. * intl/dcigettext.c (DCIGETTEXT): Call _nl_find_msg with convert=1. (_nl_find_msg): Add convert argument. When a conversion to a different charset is needed, create a new converted_domain element, instead of throwing away the old converted translations. (get_output_charset): New function. * intl/loadmsgcat.c (_nl_init_domain_conv): Remove function. (_nl_free_domain_conv): Remove function. (_nl_load_domain): Initialize the conversions array to empty. Use _nl_find_msg instead of _nl_init_domain_conv to retrieve the header entry. (_nl_unload_domain): Free the conversions array and its contents. * intl/gettextP.h (struct loaded_domain): Remove codeset_cntr field. (struct binding): Likewise. * intl/bindtextdom.c (set_binding_values): Drop codeset_cntr modifications. --- ChangeLog | 34 ++++ intl/bindtextdom.c | 4 - intl/dcigettext.c | 485 +++++++++++++++++++++++++++++++++++---------------- intl/gettextP.h | 45 +++-- intl/loadmsgcat.c | 172 +++--------------- intl/tst-gettext3.c | 60 +++++++ intl/tst-gettext3.sh | 45 +++++ 7 files changed, 523 insertions(+), 322 deletions(-) create mode 100644 intl/tst-gettext3.c create mode 100644 intl/tst-gettext3.sh diff --git a/ChangeLog b/ChangeLog index fbad32c..dc9e074 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,37 @@ +2005-03-27 Bruno Haible + + * intl/tst-gettext3.c: New file. + * intl/tst-gettext3.sh: New file. + * intl/Makefile (distribute): Add tst-gettext3.sh. + (test-srcs): Add tst-gettext3. + (tests): Depend on tst-gettext3.out. + (tst-gettext3.out): New rule. + (CFLAGS-tst-gettext3.c): New variable. + + Fix bug exposed by tst-gettext3. + * intl/gettextP.h (struct converted_domain): New type. + (struct loaded_domain): Remove the conv, conv_tab fields. Add + conversions, nconversions fields. + (_nl_init_domain_conv): Remove declaration. + (_nl_free_domain_conv): Remove declaration. + (_nl_find_msg): Add convert argument. + * intl/dcigettext.c (DCIGETTEXT): Call _nl_find_msg with convert=1. + (_nl_find_msg): Add convert argument. When a conversion to a different + charset is needed, create a new converted_domain element, instead of + throwing away the old converted translations. + (get_output_charset): New function. + * intl/loadmsgcat.c (_nl_init_domain_conv): Remove function. + (_nl_free_domain_conv): Remove function. + (_nl_load_domain): Initialize the conversions array to empty. Use + _nl_find_msg instead of _nl_init_domain_conv to retrieve the header + entry. + (_nl_unload_domain): Free the conversions array and its contents. + + * intl/gettextP.h (struct loaded_domain): Remove codeset_cntr field. + (struct binding): Likewise. + * intl/bindtextdom.c (set_binding_values): Drop codeset_cntr + modifications. + 2005-04-04 Jakub Jelinek * sunrpc/pmap_rmt.c (xdr_rmtcall_args): Use a dummy arglen instead diff --git a/intl/bindtextdom.c b/intl/bindtextdom.c index 546da8e..fd527a1 100644 --- a/intl/bindtextdom.c +++ b/intl/bindtextdom.c @@ -207,7 +207,6 @@ set_binding_values (domainname, dirnamep, codesetp) free (binding->codeset); binding->codeset = result; - ++binding->codeset_cntr; modified = 1; } } @@ -271,8 +270,6 @@ set_binding_values (domainname, dirnamep, codesetp) /* The default value. */ new_binding->dirname = (char *) _nl_default_dirname; - new_binding->codeset_cntr = 0; - if (codesetp) { const char *codeset = *codesetp; @@ -293,7 +290,6 @@ set_binding_values (domainname, dirnamep, codesetp) memcpy (result, codeset, len); #endif codeset = result; - ++new_binding->codeset_cntr; } *codesetp = codeset; new_binding->codeset = (char *) codeset; diff --git a/intl/dcigettext.c b/intl/dcigettext.c index 8163064..c73c719 100644 --- a/intl/dcigettext.c +++ b/intl/dcigettext.c @@ -326,6 +326,10 @@ static struct transmem_list *transmem_list; #else typedef unsigned char transmem_block_t; #endif +#if defined _LIBC || HAVE_ICONV +static const char *get_output_charset PARAMS ((struct binding *domainbinding)) + internal_function; +#endif /* Names for the libintl functions are a problem. They must not clash @@ -597,7 +601,7 @@ DCIGETTEXT (domainname, msgid1, msgid2, plural, n, category) if (domain != NULL) { - retval = _nl_find_msg (domain, binding, msgid1, &retlen); + retval = _nl_find_msg (domain, binding, msgid1, 1, &retlen); if (retval == NULL) { @@ -606,7 +610,7 @@ DCIGETTEXT (domainname, msgid1, msgid2, plural, n, category) for (cnt = 0; domain->successor[cnt] != NULL; ++cnt) { retval = _nl_find_msg (domain->successor[cnt], binding, - msgid1, &retlen); + msgid1, 1, &retlen); if (retval != NULL) { @@ -683,10 +687,11 @@ DCIGETTEXT (domainname, msgid1, msgid2, plural, n, category) char * internal_function -_nl_find_msg (domain_file, domainbinding, msgid, lengthp) +_nl_find_msg (domain_file, domainbinding, msgid, convert, lengthp) struct loaded_l10nfile *domain_file; struct binding *domainbinding; const char *msgid; + int convert; size_t *lengthp; { struct loaded_domain *domain; @@ -793,192 +798,317 @@ _nl_find_msg (domain_file, domainbinding, msgid, lengthp) } #if defined _LIBC || HAVE_ICONV - if (domain->codeset_cntr - != (domainbinding != NULL ? domainbinding->codeset_cntr : 0)) + if (convert) { - /* The domain's codeset has changed through bind_textdomain_codeset() - since the message catalog was initialized or last accessed. We - have to reinitialize the converter. */ - _nl_free_domain_conv (domain); - _nl_init_domain_conv (domain_file, domain, domainbinding); - } + /* We are supposed to do a conversion. */ + const char *encoding = get_output_charset (domainbinding); + + /* Search whether a table with converted translations for this + encoding has already been allocated. */ + size_t nconversions = domain->nconversions; + struct converted_domain *convd = NULL; + size_t i; + + for (i = nconversions; i > 0; ) + { + i--; + if (strcmp (domain->conversions[i].encoding, encoding) == 0) + { + convd = &domain->conversions[i]; + break; + } + } - if ( + if (convd == NULL) + { + /* Allocate a table for the converted translations for this + encoding. */ + struct converted_domain *new_conversions = + (struct converted_domain *) + (domain->conversions != NULL + ? realloc (domain->conversions, + (nconversions + 1) * sizeof (struct converted_domain)) + : malloc ((nconversions + 1) * sizeof (struct converted_domain))); + + if (__builtin_expect (new_conversions == NULL, 0)) + /* Nothing we can do, no more memory. */ + goto converted; + domain->conversions = new_conversions; + + /* Copy the 'encoding' string to permanent storage. */ + encoding = strdup (encoding); + if (__builtin_expect (encoding == NULL, 0)) + /* Nothing we can do, no more memory. */ + goto converted; + + convd = &new_conversions[nconversions]; + convd->encoding = encoding; + + /* Find out about the character set the file is encoded with. + This can be found (in textual form) in the entry "". If this + entry does not exist or if this does not contain the 'charset=' + information, we will assume the charset matches the one the + current locale and we don't have to perform any conversion. */ # ifdef _LIBC - domain->conv != (__gconv_t) -1 + convd->conv = (__gconv_t) -1; # else # if HAVE_ICONV - domain->conv != (iconv_t) -1 + convd->conv = (iconv_t) -1; # endif # endif - ) - { - /* We are supposed to do a conversion. First allocate an - appropriate table with the same structure as the table - of translations in the file, where we can put the pointers - to the converted strings in. - There is a slight complication with plural entries. They - are represented by consecutive NUL terminated strings. We - handle this case by converting RESULTLEN bytes, including - NULs. */ - - if (domain->conv_tab == NULL - && ((domain->conv_tab = - (char **) calloc (nstrings + domain->n_sysdep_strings, - sizeof (char *))) - == NULL)) - /* Mark that we didn't succeed allocating a table. */ - domain->conv_tab = (char **) -1; - - if (__builtin_expect (domain->conv_tab == (char **) -1, 0)) - /* Nothing we can do, no more memory. */ - goto converted; - - if (domain->conv_tab[act] == NULL) + { + char *nullentry; + size_t nullentrylen; + + /* Get the header entry. This is a recursion, but it doesn't + reallocate domain->conversions because we pass convert = 0. */ + nullentry = + _nl_find_msg (domain_file, domainbinding, "", 0, &nullentrylen); + + if (nullentry != NULL) + { + const char *charsetstr; + + charsetstr = strstr (nullentry, "charset="); + if (charsetstr != NULL) + { + size_t len; + char *charset; + const char *outcharset; + + charsetstr += strlen ("charset="); + len = strcspn (charsetstr, " \t\n"); + + charset = (char *) alloca (len + 1); +# if defined _LIBC || HAVE_MEMPCPY + *((char *) mempcpy (charset, charsetstr, len)) = '\0'; +# else + memcpy (charset, charsetstr, len); + charset[len] = '\0'; +# endif + + outcharset = encoding; + +# ifdef _LIBC + /* We always want to use transliteration. */ + outcharset = norm_add_slashes (outcharset, "TRANSLIT"); + charset = norm_add_slashes (charset, ""); + if (__gconv_open (outcharset, charset, &convd->conv, + GCONV_AVOID_NOCONV) + != __GCONV_OK) + convd->conv = (__gconv_t) -1; +# else +# if HAVE_ICONV + /* When using GNU libc >= 2.2 or GNU libiconv >= 1.5, + we want to use transliteration. */ +# if (__GLIBC__ == 2 && __GLIBC_MINOR__ >= 2) || __GLIBC__ > 2 \ + || _LIBICONV_VERSION >= 0x0105 + if (strchr (outcharset, '/') == NULL) + { + char *tmp; + + len = strlen (outcharset); + tmp = (char *) alloca (len + 10 + 1); + memcpy (tmp, outcharset, len); + memcpy (tmp + len, "//TRANSLIT", 10 + 1); + outcharset = tmp; + + convd->conv = iconv_open (outcharset, charset); + + freea (outcharset); + } + else +# endif + convd->conv = iconv_open (outcharset, charset); +# endif +# endif + + freea (charset); + } + } + } + convd->conv_tab = NULL; + /* Here domain->conversions is still == new_conversions. */ + domain->nconversions++; + } + + if ( +# ifdef _LIBC + convd->conv != (__gconv_t) -1 +# else +# if HAVE_ICONV + convd->conv != (iconv_t) -1 +# endif +# endif + ) { - /* We haven't used this string so far, so it is not - translated yet. Do this now. */ - /* We use a bit more efficient memory handling. - We allocate always larger blocks which get used over - time. This is faster than many small allocations. */ - __libc_lock_define_initialized (static, lock) + /* We are supposed to do a conversion. First allocate an + appropriate table with the same structure as the table + of translations in the file, where we can put the pointers + to the converted strings in. + There is a slight complication with plural entries. They + are represented by consecutive NUL terminated strings. We + handle this case by converting RESULTLEN bytes, including + NULs. */ + + if (convd->conv_tab == NULL + && ((convd->conv_tab = + (char **) calloc (nstrings + domain->n_sysdep_strings, + sizeof (char *))) + == NULL)) + /* Mark that we didn't succeed allocating a table. */ + convd->conv_tab = (char **) -1; + + if (__builtin_expect (convd->conv_tab == (char **) -1, 0)) + /* Nothing we can do, no more memory. */ + goto converted; + + if (convd->conv_tab[act] == NULL) + { + /* We haven't used this string so far, so it is not + translated yet. Do this now. */ + /* We use a bit more efficient memory handling. + We allocate always larger blocks which get used over + time. This is faster than many small allocations. */ + __libc_lock_define_initialized (static, lock) # define INITIAL_BLOCK_SIZE 4080 - static unsigned char *freemem; - static size_t freemem_size; + static unsigned char *freemem; + static size_t freemem_size; - const unsigned char *inbuf; - unsigned char *outbuf; - int malloc_count; + const unsigned char *inbuf; + unsigned char *outbuf; + int malloc_count; # ifndef _LIBC - transmem_block_t *transmem_list = NULL; + transmem_block_t *transmem_list = NULL; # endif - __libc_lock_lock (lock); + __libc_lock_lock (lock); - inbuf = (const unsigned char *) result; - outbuf = freemem + sizeof (size_t); + inbuf = (const unsigned char *) result; + outbuf = freemem + sizeof (size_t); - malloc_count = 0; - while (1) - { - transmem_block_t *newmem; + malloc_count = 0; + while (1) + { + transmem_block_t *newmem; # ifdef _LIBC - size_t non_reversible; - int res; + size_t non_reversible; + int res; - if (freemem_size < sizeof (size_t)) - goto resize_freemem; + if (freemem_size < sizeof (size_t)) + goto resize_freemem; - res = __gconv (domain->conv, - &inbuf, inbuf + resultlen, - &outbuf, - outbuf + freemem_size - sizeof (size_t), - &non_reversible); + res = __gconv (convd->conv, + &inbuf, inbuf + resultlen, + &outbuf, + outbuf + freemem_size - sizeof (size_t), + &non_reversible); - if (res == __GCONV_OK || res == __GCONV_EMPTY_INPUT) - break; + if (res == __GCONV_OK || res == __GCONV_EMPTY_INPUT) + break; - if (res != __GCONV_FULL_OUTPUT) - { - __libc_lock_unlock (lock); - goto converted; - } + if (res != __GCONV_FULL_OUTPUT) + { + __libc_lock_unlock (lock); + goto converted; + } - inbuf = (const unsigned char *) result; + inbuf = (const unsigned char *) result; # else # if HAVE_ICONV - const char *inptr = (const char *) inbuf; - size_t inleft = resultlen; - char *outptr = (char *) outbuf; - size_t outleft; - - if (freemem_size < sizeof (size_t)) - goto resize_freemem; - - outleft = freemem_size - sizeof (size_t); - if (iconv (domain->conv, - (ICONV_CONST char **) &inptr, &inleft, - &outptr, &outleft) - != (size_t) (-1)) - { - outbuf = (unsigned char *) outptr; - break; - } - if (errno != E2BIG) - { - __libc_lock_unlock (lock); - goto converted; - } + const char *inptr = (const char *) inbuf; + size_t inleft = resultlen; + char *outptr = (char *) outbuf; + size_t outleft; + + if (freemem_size < sizeof (size_t)) + goto resize_freemem; + + outleft = freemem_size - sizeof (size_t); + if (iconv (convd->conv, + (ICONV_CONST char **) &inptr, &inleft, + &outptr, &outleft) + != (size_t) (-1)) + { + outbuf = (unsigned char *) outptr; + break; + } + if (errno != E2BIG) + { + __libc_lock_unlock (lock); + goto converted; + } # endif # endif - resize_freemem: - /* We must allocate a new buffer or resize the old one. */ - if (malloc_count > 0) - { - ++malloc_count; - freemem_size = malloc_count * INITIAL_BLOCK_SIZE; - newmem = (transmem_block_t *) realloc (transmem_list, - freemem_size); + resize_freemem: + /* We must allocate a new buffer or resize the old one. */ + if (malloc_count > 0) + { + ++malloc_count; + freemem_size = malloc_count * INITIAL_BLOCK_SIZE; + newmem = (transmem_block_t *) realloc (transmem_list, + freemem_size); # ifdef _LIBC - if (newmem != NULL) - transmem_list = transmem_list->next; + if (newmem != NULL) + transmem_list = transmem_list->next; + else + { + struct transmem_list *old = transmem_list; + + transmem_list = transmem_list->next; + free (old); + } +# endif + } else { - struct transmem_list *old = transmem_list; - - transmem_list = transmem_list->next; - free (old); + malloc_count = 1; + freemem_size = INITIAL_BLOCK_SIZE; + newmem = (transmem_block_t *) malloc (freemem_size); + } + if (__builtin_expect (newmem == NULL, 0)) + { + freemem = NULL; + freemem_size = 0; + __libc_lock_unlock (lock); + goto converted; } -# endif - } - else - { - malloc_count = 1; - freemem_size = INITIAL_BLOCK_SIZE; - newmem = (transmem_block_t *) malloc (freemem_size); - } - if (__builtin_expect (newmem == NULL, 0)) - { - freemem = NULL; - freemem_size = 0; - __libc_lock_unlock (lock); - goto converted; - } # ifdef _LIBC - /* Add the block to the list of blocks we have to free - at some point. */ - newmem->next = transmem_list; - transmem_list = newmem; + /* Add the block to the list of blocks we have to free + at some point. */ + newmem->next = transmem_list; + transmem_list = newmem; - freemem = (unsigned char *) newmem->data; - freemem_size -= offsetof (struct transmem_list, data); + freemem = (unsigned char *) newmem->data; + freemem_size -= offsetof (struct transmem_list, data); # else - transmem_list = newmem; - freemem = newmem; + transmem_list = newmem; + freemem = newmem; # endif - outbuf = freemem + sizeof (size_t); + outbuf = freemem + sizeof (size_t); + } + + /* We have now in our buffer a converted string. Put this + into the table of conversions. */ + *(size_t *) freemem = outbuf - freemem - sizeof (size_t); + convd->conv_tab[act] = (char *) freemem; + /* Shrink freemem, but keep it aligned. */ + freemem_size -= outbuf - freemem; + freemem = outbuf; + freemem += freemem_size & (alignof (size_t) - 1); + freemem_size = freemem_size & ~ (alignof (size_t) - 1); + + __libc_lock_unlock (lock); } - /* We have now in our buffer a converted string. Put this - into the table of conversions. */ - *(size_t *) freemem = outbuf - freemem - sizeof (size_t); - domain->conv_tab[act] = (char *) freemem; - /* Shrink freemem, but keep it aligned. */ - freemem_size -= outbuf - freemem; - freemem = outbuf; - freemem += freemem_size & (alignof (size_t) - 1); - freemem_size = freemem_size & ~ (alignof (size_t) - 1); - - __libc_lock_unlock (lock); + /* Now convd->conv_tab[act] contains the translation of all + the plural variants. */ + result = convd->conv_tab[act] + sizeof (size_t); + resultlen = *(size_t *) convd->conv_tab[act]; } - - /* Now domain->conv_tab[act] contains the translation of all - the plural variants. */ - result = domain->conv_tab[act] + sizeof (size_t); - resultlen = *(size_t *) domain->conv_tab[act]; } converted: @@ -1122,6 +1252,61 @@ guess_category_value (category, categoryname) return language != NULL && strcmp (retval, "C") != 0 ? language : retval; } +#if defined _LIBC || HAVE_ICONV +/* Returns the output charset. */ +static const char * +internal_function +get_output_charset (domainbinding) + struct binding *domainbinding; +{ + /* The output charset should normally be determined by the locale. But + sometimes the locale is not used or not correctly set up, so we provide + a possibility for the user to override this: the OUTPUT_CHARSET + environment variable. Moreover, the value specified through + bind_textdomain_codeset overrides both. */ + if (domainbinding != NULL && domainbinding->codeset != NULL) + return domainbinding->codeset; + else + { + /* For speed reasons, we look at the value of OUTPUT_CHARSET only + once. This is a user variable that is not supposed to change + during a program run. */ + static char *output_charset_cache; + static int output_charset_cached; + + if (!output_charset_cached) + { + const char *value = getenv ("OUTPUT_CHARSET"); + + if (value != NULL && value[0] != '\0') + { + size_t len = strlen (value) + 1; + char *value_copy = (char *) malloc (len); + + if (value_copy != NULL) + memcpy (value_copy, value, len); + output_charset_cache = value_copy; + } + output_charset_cached = 1; + } + + if (output_charset_cache != NULL) + return output_charset_cache; + else + { +# ifdef _LIBC + return _NL_CURRENT (LC_CTYPE, CODESET); +# else +# if HAVE_ICONV + extern const char *locale_charset PARAMS ((void); + return locale_charset (); +# endif +# endif + } + } +} +#endif + /* @@ begin of epilog @@ */ /* We don't want libintl.a to depend on any other library. So we diff --git a/intl/gettextP.h b/intl/gettextP.h index 46b51e1..f18535a 100644 --- a/intl/gettextP.h +++ b/intl/gettextP.h @@ -1,5 +1,5 @@ /* Header describing internals of libintl library. - Copyright (C) 1995-1999, 2000, 2001, 2004 Free Software Foundation, Inc. + Copyright (C) 1995-1999, 2000, 2001, 2004-2005 Free Software Foundation, Inc. This file is part of the GNU C Library. Written by Ulrich Drepper , 1995. @@ -88,6 +88,26 @@ struct sysdep_string_desc const char *pointer; }; +/* Cache of translated strings after charset conversion. + Note: The strings are converted to the target encoding only on an as-needed + basis. */ +struct converted_domain +{ + /* The target encoding name. */ + const char *encoding; + /* The descriptor for conversion from the message catalog's encoding to + this target encoding. */ +#ifdef _LIBC + __gconv_t conv; +#else +# if HAVE_ICONV + iconv_t conv; +# endif +#endif + /* The table of translated strings after charset conversion. */ + char **conv_tab; +}; + /* The representation of an opened message catalog. */ struct loaded_domain { @@ -123,15 +143,9 @@ struct loaded_domain /* 1 if the hash table uses a different endianness than this machine. */ int must_swap_hash_tab; - int codeset_cntr; -#ifdef _LIBC - __gconv_t conv; -#else -# if HAVE_ICONV - iconv_t conv; -# endif -#endif - char **conv_tab; + /* Cache of charset conversions of the translated strings. */ + struct converted_domain *conversions; + size_t nconversions; struct expression *plural; unsigned long int nplurals; @@ -151,7 +165,6 @@ struct binding { struct binding *next; char *dirname; - int codeset_cntr; /* Incremented each time codeset changes. */ char *codeset; char domainname[ZERO]; }; @@ -173,16 +186,10 @@ struct loaded_l10nfile *_nl_find_domain PARAMS ((const char *__dirname, void _nl_load_domain PARAMS ((struct loaded_l10nfile *__domain, struct binding *__domainbinding)) internal_function; -const char *_nl_init_domain_conv PARAMS ((struct loaded_l10nfile *__domain_file, - struct loaded_domain *__domain, - struct binding *__domainbinding)) - internal_function; -void _nl_free_domain_conv PARAMS ((struct loaded_domain *__domain)) - internal_function; char *_nl_find_msg PARAMS ((struct loaded_l10nfile *domain_file, - struct binding *domainbinding, - const char *msgid, size_t *lengthp)) + struct binding *domainbinding, const char *msgid, + int convert, size_t *lengthp)) internal_function; #ifdef _LIBC diff --git a/intl/loadmsgcat.c b/intl/loadmsgcat.c index efefc69..1f55531 100644 --- a/intl/loadmsgcat.c +++ b/intl/loadmsgcat.c @@ -1,5 +1,5 @@ /* Load needed message catalogs. - Copyright (C) 1995-2004 Free Software Foundation, Inc. + Copyright (C) 1995-2005 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -752,146 +752,6 @@ get_sysdep_segment_value (name) return NULL; } -/* Initialize the codeset dependent parts of an opened message catalog. - Return the header entry. */ -const char * -internal_function -_nl_init_domain_conv (domain_file, domain, domainbinding) - struct loaded_l10nfile *domain_file; - struct loaded_domain *domain; - struct binding *domainbinding; -{ - /* Find out about the character set the file is encoded with. - This can be found (in textual form) in the entry "". If this - entry does not exist or if this does not contain the `charset=' - information, we will assume the charset matches the one the - current locale and we don't have to perform any conversion. */ - char *nullentry; - size_t nullentrylen; - - /* Preinitialize fields, to avoid recursion during _nl_find_msg. */ - domain->codeset_cntr = - (domainbinding != NULL ? domainbinding->codeset_cntr : 0); -#ifdef _LIBC - domain->conv = (__gconv_t) -1; -#else -# if HAVE_ICONV - domain->conv = (iconv_t) -1; -# endif -#endif - domain->conv_tab = NULL; - - /* Get the header entry. */ - nullentry = _nl_find_msg (domain_file, domainbinding, "", &nullentrylen); - - if (nullentry != NULL) - { -#if defined _LIBC || HAVE_ICONV - const char *charsetstr; - - charsetstr = strstr (nullentry, "charset="); - if (charsetstr != NULL) - { - size_t len; - char *charset; - const char *outcharset; - - charsetstr += strlen ("charset="); - len = strcspn (charsetstr, " \t\n"); - - charset = (char *) alloca (len + 1); -# if defined _LIBC || HAVE_MEMPCPY - *((char *) mempcpy (charset, charsetstr, len)) = '\0'; -# else - memcpy (charset, charsetstr, len); - charset[len] = '\0'; -# endif - - /* The output charset should normally be determined by the - locale. But sometimes the locale is not used or not correctly - set up, so we provide a possibility for the user to override - this. Moreover, the value specified through - bind_textdomain_codeset overrides both. */ - if (domainbinding != NULL && domainbinding->codeset != NULL) - outcharset = domainbinding->codeset; - else - { - outcharset = getenv ("OUTPUT_CHARSET"); - if (outcharset == NULL || outcharset[0] == '\0') - { -# ifdef _LIBC - outcharset = _NL_CURRENT (LC_CTYPE, CODESET); -# else -# if HAVE_ICONV - extern const char *locale_charset PARAMS ((void)); - outcharset = locale_charset (); -# endif -# endif - } - } - -# ifdef _LIBC - /* We always want to use transliteration. */ - outcharset = norm_add_slashes (outcharset, "TRANSLIT"); - charset = norm_add_slashes (charset, ""); - if (__gconv_open (outcharset, charset, &domain->conv, - GCONV_AVOID_NOCONV) - != __GCONV_OK) - domain->conv = (__gconv_t) -1; -# else -# if HAVE_ICONV - /* When using GNU libc >= 2.2 or GNU libiconv >= 1.5, - we want to use transliteration. */ -# if (__GLIBC__ == 2 && __GLIBC_MINOR__ >= 2) || __GLIBC__ > 2 \ - || _LIBICONV_VERSION >= 0x0105 - if (strchr (outcharset, '/') == NULL) - { - char *tmp; - - len = strlen (outcharset); - tmp = (char *) alloca (len + 10 + 1); - memcpy (tmp, outcharset, len); - memcpy (tmp + len, "//TRANSLIT", 10 + 1); - outcharset = tmp; - - domain->conv = iconv_open (outcharset, charset); - - freea (outcharset); - } - else -# endif - domain->conv = iconv_open (outcharset, charset); -# endif -# endif - - freea (charset); - } -#endif /* _LIBC || HAVE_ICONV */ - } - - return nullentry; -} - -/* Frees the codeset dependent parts of an opened message catalog. */ -void -internal_function -_nl_free_domain_conv (domain) - struct loaded_domain *domain; -{ - if (domain->conv_tab != NULL && domain->conv_tab != (char **) -1) - free (domain->conv_tab); - -#ifdef _LIBC - if (domain->conv != (__gconv_t) -1) - __gconv_close (domain->conv); -#else -# if HAVE_ICONV - if (domain->conv != (iconv_t) -1) - iconv_close (domain->conv); -# endif -#endif -} - /* Load the message catalogs specified by FILENAME. If it is no valid message catalog do nothing. */ void @@ -913,6 +773,7 @@ _nl_load_domain (domain_file, domainbinding) struct loaded_domain *domain; int revision; const char *nullentry; + size_t nullentrylen; __libc_lock_lock_recursive (lock); if (domain_file->decided != 0) @@ -920,8 +781,8 @@ _nl_load_domain (domain_file, domainbinding) /* There are two possibilities: + is is the same thread calling again during this - initialization via _nl_init_domain_conv and _nl_find_msg. We - have initialized everything this call needs. + initialization via _nl_find_msg. We have initialized + everything this call needs. + this is another thread which tried to initialize this object. Not necessary anymore since if the lock is available this @@ -1388,12 +1249,12 @@ _nl_load_domain (domain_file, domainbinding) goto out; } - /* Now initialize the character set converter from the character set - the file is encoded with (found in the header entry) to the domain's - specified character set or the locale's character set. */ - nullentry = _nl_init_domain_conv (domain_file, domain, domainbinding); + /* No caches of converted translations so far. */ + domain->conversions = NULL; + domain->nconversions = 0; - /* Also look for a plural specification. */ + /* Get the header entry and look for a plural specification. */ + nullentry = _nl_find_msg (domain_file, domainbinding, "", 0, &nullentrylen); EXTRACT_PLURAL_EXPRESSION (nullentry, &domain->plural, &domain->nplurals); out: @@ -1412,10 +1273,23 @@ internal_function __libc_freeres_fn_section _nl_unload_domain (domain) struct loaded_domain *domain; { + size_t i; + if (domain->plural != &__gettext_germanic_plural) __gettext_free_exp (domain->plural); - _nl_free_domain_conv (domain); + for (i = 0; i < domain->nconversions; i++) + { + struct converted_domain *convd = &domain->conversions[i]; + + free ((char *) convd->encoding); + if (convd->conv_tab != NULL && convd->conv_tab != (char **) -1) + free (convd->conv_tab); + if (convd->conv != (__gconv_t) -1) + __gconv_close (convd->conv); + } + if (domain->conversions != NULL) + free (domain->conversions); if (domain->malloced) free (domain->malloced); diff --git a/intl/tst-gettext3.c b/intl/tst-gettext3.c new file mode 100644 index 0000000..917967b --- /dev/null +++ b/intl/tst-gettext3.c @@ -0,0 +1,60 @@ +/* Test that the gettext() results come out in the correct encoding for + locales that differ only in their encoding. + Copyright (C) 2001, 2005 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Bruno Haible , 2001, 2005. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, write to the Free + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + 02111-1307 USA. */ + +#include +#include +#include +#include +#include + +int +main (void) +{ + char *s; + int result = 0; + + unsetenv ("LANGUAGE"); + unsetenv ("OUTPUT_CHARSET"); + textdomain ("codeset"); + bindtextdomain ("codeset", OBJPFX "domaindir"); + + setlocale (LC_ALL, "de_DE.ISO-8859-1"); + + /* Here we expect output in ISO-8859-1. */ + s = gettext ("cheese"); + if (strcmp (s, "K\344se")) + { + printf ("call 1 returned: %s\n", s); + result = 1; + } + + setlocale (LC_ALL, "de_DE.UTF-8"); + + /* Here we expect output in UTF-8. */ + s = gettext ("cheese"); + if (strcmp (s, "K\303\244se")) + { + printf ("call 2 returned: %s\n", s); + result = 1; + } + + return result; +} diff --git a/intl/tst-gettext3.sh b/intl/tst-gettext3.sh new file mode 100644 index 0000000..7bd977c --- /dev/null +++ b/intl/tst-gettext3.sh @@ -0,0 +1,45 @@ +#! /bin/sh +# Test that the gettext() results come out in the correct encoding for +# locales that differ only in their encoding. +# Copyright (C) 2001, 2002, 2005 Free Software Foundation, Inc. +# This file is part of the GNU C Library. +# + +# The GNU C Library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. + +# The GNU C Library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. + +# You should have received a copy of the GNU Lesser General Public +# License along with the GNU C Library; if not, write to the Free +# Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA +# 02111-1307 USA. + +common_objpfx=$1 +objpfx=$2 + +LC_ALL=C +export LC_ALL + +# Generate the test data. +test -d ${objpfx}domaindir || mkdir ${objpfx}domaindir +# Create the domain directories. +test -d ${objpfx}domaindir/de_DE || mkdir ${objpfx}domaindir/de_DE +test -d ${objpfx}domaindir/de_DE/LC_MESSAGES || mkdir ${objpfx}domaindir/de_DE/LC_MESSAGES +# Populate them. +msgfmt -o ${objpfx}domaindir/de_DE/LC_MESSAGES/codeset.mo tstcodeset.po + +GCONV_PATH=${common_objpfx}iconvdata +export GCONV_PATH +LOCPATH=${common_objpfx}localedata +export LOCPATH + +${common_objpfx}elf/ld.so --library-path $common_objpfx \ +${objpfx}tst-gettext3 > ${objpfx}tst-gettext3.out + +exit $? -- 2.7.4