From b5fa5b9867eec91047a16d45f79888395cf89931 Mon Sep 17 00:00:00 2001 From: Owen Taylor Date: Thu, 27 Sep 2001 02:49:05 +0000 Subject: [PATCH] Fixes for #58195, based on some ideas from Hidetosh Tajima. Wed Sep 26 22:34:12 2001 Owen Taylor Fixes for #58195, based on some ideas from Hidetosh Tajima. * aclibcharset.m4 glib/libcharset: Add Bruno Haible's portable-current charset detection code from libiconv. * glib/gutf8.c (g_utf8_get_charset_internal): Rewrite to use _g_locale_charset(). * glib/gutf8.c (_g_charset_get_aliases): Private functions to get aliases from libcharset for a particular canonical name. * glib/gconvert.c: If loading a charset fails, try aliases to look for fallbacks. --- ChangeLog | 17 ++ ChangeLog.pre-2-0 | 17 ++ ChangeLog.pre-2-10 | 17 ++ ChangeLog.pre-2-12 | 17 ++ ChangeLog.pre-2-2 | 17 ++ ChangeLog.pre-2-4 | 17 ++ ChangeLog.pre-2-6 | 17 ++ ChangeLog.pre-2-8 | 17 ++ aclibcharset.m4 | 47 ++++ configure.in | 8 + docs/reference/glib/tmpl/glib-unused.sgml | 26 -- glib/Makefile.am | 6 +- glib/gconvert.c | 61 ++++- glib/gutf8.c | 119 +++++--- glib/libcharset/.cvsignore | 13 + glib/libcharset/Makefile.am | 59 ++++ glib/libcharset/README | 41 +++ glib/libcharset/config.charset | 438 ++++++++++++++++++++++++++++++ glib/libcharset/libcharset-glib.patch | 45 +++ glib/libcharset/libcharset.h | 41 +++ glib/libcharset/localcharset.c | 276 +++++++++++++++++++ glib/libcharset/make-patch.sh | 23 ++ glib/libcharset/ref-add.sin | 31 +++ glib/libcharset/ref-del.sin | 26 ++ glib/libcharset/update.sh | 30 ++ 25 files changed, 1361 insertions(+), 65 deletions(-) create mode 100644 aclibcharset.m4 create mode 100644 glib/libcharset/.cvsignore create mode 100644 glib/libcharset/Makefile.am create mode 100644 glib/libcharset/README create mode 100755 glib/libcharset/config.charset create mode 100644 glib/libcharset/libcharset-glib.patch create mode 100644 glib/libcharset/libcharset.h create mode 100644 glib/libcharset/localcharset.c create mode 100755 glib/libcharset/make-patch.sh create mode 100644 glib/libcharset/ref-add.sin create mode 100644 glib/libcharset/ref-del.sin create mode 100755 glib/libcharset/update.sh diff --git a/ChangeLog b/ChangeLog index 52f2abc..bf58b8f 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,20 @@ +Wed Sep 26 22:34:12 2001 Owen Taylor + + Fixes for #58195, based on some ideas from Hidetosh Tajima. + + * aclibcharset.m4 glib/libcharset: Add Bruno Haible's + portable-current charset detection code from libiconv. + + * glib/gutf8.c (g_utf8_get_charset_internal): Rewrite + to use _g_locale_charset(). + + * glib/gutf8.c (_g_charset_get_aliases): Private functions + to get aliases from libcharset for a particular canonical + name. + + * glib/gconvert.c: If loading a charset fails, try + aliases to look for fallbacks. + 2001-09-26 Matthias Clasen * gmem.c (g_mem_is_system_malloc): Return !vtable_set. diff --git a/ChangeLog.pre-2-0 b/ChangeLog.pre-2-0 index 52f2abc..bf58b8f 100644 --- a/ChangeLog.pre-2-0 +++ b/ChangeLog.pre-2-0 @@ -1,3 +1,20 @@ +Wed Sep 26 22:34:12 2001 Owen Taylor + + Fixes for #58195, based on some ideas from Hidetosh Tajima. + + * aclibcharset.m4 glib/libcharset: Add Bruno Haible's + portable-current charset detection code from libiconv. + + * glib/gutf8.c (g_utf8_get_charset_internal): Rewrite + to use _g_locale_charset(). + + * glib/gutf8.c (_g_charset_get_aliases): Private functions + to get aliases from libcharset for a particular canonical + name. + + * glib/gconvert.c: If loading a charset fails, try + aliases to look for fallbacks. + 2001-09-26 Matthias Clasen * gmem.c (g_mem_is_system_malloc): Return !vtable_set. diff --git a/ChangeLog.pre-2-10 b/ChangeLog.pre-2-10 index 52f2abc..bf58b8f 100644 --- a/ChangeLog.pre-2-10 +++ b/ChangeLog.pre-2-10 @@ -1,3 +1,20 @@ +Wed Sep 26 22:34:12 2001 Owen Taylor + + Fixes for #58195, based on some ideas from Hidetosh Tajima. + + * aclibcharset.m4 glib/libcharset: Add Bruno Haible's + portable-current charset detection code from libiconv. + + * glib/gutf8.c (g_utf8_get_charset_internal): Rewrite + to use _g_locale_charset(). + + * glib/gutf8.c (_g_charset_get_aliases): Private functions + to get aliases from libcharset for a particular canonical + name. + + * glib/gconvert.c: If loading a charset fails, try + aliases to look for fallbacks. + 2001-09-26 Matthias Clasen * gmem.c (g_mem_is_system_malloc): Return !vtable_set. diff --git a/ChangeLog.pre-2-12 b/ChangeLog.pre-2-12 index 52f2abc..bf58b8f 100644 --- a/ChangeLog.pre-2-12 +++ b/ChangeLog.pre-2-12 @@ -1,3 +1,20 @@ +Wed Sep 26 22:34:12 2001 Owen Taylor + + Fixes for #58195, based on some ideas from Hidetosh Tajima. + + * aclibcharset.m4 glib/libcharset: Add Bruno Haible's + portable-current charset detection code from libiconv. + + * glib/gutf8.c (g_utf8_get_charset_internal): Rewrite + to use _g_locale_charset(). + + * glib/gutf8.c (_g_charset_get_aliases): Private functions + to get aliases from libcharset for a particular canonical + name. + + * glib/gconvert.c: If loading a charset fails, try + aliases to look for fallbacks. + 2001-09-26 Matthias Clasen * gmem.c (g_mem_is_system_malloc): Return !vtable_set. diff --git a/ChangeLog.pre-2-2 b/ChangeLog.pre-2-2 index 52f2abc..bf58b8f 100644 --- a/ChangeLog.pre-2-2 +++ b/ChangeLog.pre-2-2 @@ -1,3 +1,20 @@ +Wed Sep 26 22:34:12 2001 Owen Taylor + + Fixes for #58195, based on some ideas from Hidetosh Tajima. + + * aclibcharset.m4 glib/libcharset: Add Bruno Haible's + portable-current charset detection code from libiconv. + + * glib/gutf8.c (g_utf8_get_charset_internal): Rewrite + to use _g_locale_charset(). + + * glib/gutf8.c (_g_charset_get_aliases): Private functions + to get aliases from libcharset for a particular canonical + name. + + * glib/gconvert.c: If loading a charset fails, try + aliases to look for fallbacks. + 2001-09-26 Matthias Clasen * gmem.c (g_mem_is_system_malloc): Return !vtable_set. diff --git a/ChangeLog.pre-2-4 b/ChangeLog.pre-2-4 index 52f2abc..bf58b8f 100644 --- a/ChangeLog.pre-2-4 +++ b/ChangeLog.pre-2-4 @@ -1,3 +1,20 @@ +Wed Sep 26 22:34:12 2001 Owen Taylor + + Fixes for #58195, based on some ideas from Hidetosh Tajima. + + * aclibcharset.m4 glib/libcharset: Add Bruno Haible's + portable-current charset detection code from libiconv. + + * glib/gutf8.c (g_utf8_get_charset_internal): Rewrite + to use _g_locale_charset(). + + * glib/gutf8.c (_g_charset_get_aliases): Private functions + to get aliases from libcharset for a particular canonical + name. + + * glib/gconvert.c: If loading a charset fails, try + aliases to look for fallbacks. + 2001-09-26 Matthias Clasen * gmem.c (g_mem_is_system_malloc): Return !vtable_set. diff --git a/ChangeLog.pre-2-6 b/ChangeLog.pre-2-6 index 52f2abc..bf58b8f 100644 --- a/ChangeLog.pre-2-6 +++ b/ChangeLog.pre-2-6 @@ -1,3 +1,20 @@ +Wed Sep 26 22:34:12 2001 Owen Taylor + + Fixes for #58195, based on some ideas from Hidetosh Tajima. + + * aclibcharset.m4 glib/libcharset: Add Bruno Haible's + portable-current charset detection code from libiconv. + + * glib/gutf8.c (g_utf8_get_charset_internal): Rewrite + to use _g_locale_charset(). + + * glib/gutf8.c (_g_charset_get_aliases): Private functions + to get aliases from libcharset for a particular canonical + name. + + * glib/gconvert.c: If loading a charset fails, try + aliases to look for fallbacks. + 2001-09-26 Matthias Clasen * gmem.c (g_mem_is_system_malloc): Return !vtable_set. diff --git a/ChangeLog.pre-2-8 b/ChangeLog.pre-2-8 index 52f2abc..bf58b8f 100644 --- a/ChangeLog.pre-2-8 +++ b/ChangeLog.pre-2-8 @@ -1,3 +1,20 @@ +Wed Sep 26 22:34:12 2001 Owen Taylor + + Fixes for #58195, based on some ideas from Hidetosh Tajima. + + * aclibcharset.m4 glib/libcharset: Add Bruno Haible's + portable-current charset detection code from libiconv. + + * glib/gutf8.c (g_utf8_get_charset_internal): Rewrite + to use _g_locale_charset(). + + * glib/gutf8.c (_g_charset_get_aliases): Private functions + to get aliases from libcharset for a particular canonical + name. + + * glib/gconvert.c: If loading a charset fails, try + aliases to look for fallbacks. + 2001-09-26 Matthias Clasen * gmem.c (g_mem_is_system_malloc): Return !vtable_set. diff --git a/aclibcharset.m4 b/aclibcharset.m4 new file mode 100644 index 0000000..4f7c20b --- /dev/null +++ b/aclibcharset.m4 @@ -0,0 +1,47 @@ +dnl From libcharset 1.1 +#serial 2 + +dnl From Bruno Haible. + +AC_DEFUN(jm_LANGINFO_CODESET, +[ + AC_CHECK_HEADERS(langinfo.h) + AC_CHECK_FUNCS(nl_langinfo) + + AC_CACHE_CHECK([for nl_langinfo and CODESET], jm_cv_langinfo_codeset, + [AC_TRY_LINK([#include ], + [char* cs = nl_langinfo(CODESET);], + jm_cv_langinfo_codeset=yes, + jm_cv_langinfo_codeset=no) + ]) + if test $jm_cv_langinfo_codeset = yes; then + AC_DEFINE(HAVE_LANGINFO_CODESET, 1, + [Define if you have and nl_langinfo(CODESET).]) + fi +]) +#serial 2 + +# Test for the GNU C Library, version 2.1 or newer. +# From Bruno Haible. + +AC_DEFUN(jm_GLIBC21, + [ + AC_CACHE_CHECK(whether we are using the GNU C Library 2.1 or newer, + ac_cv_gnu_library_2_1, + [AC_EGREP_CPP([Lucky GNU user], + [ +#include +#ifdef __GNU_LIBRARY__ + #if (__GLIBC__ == 2 && __GLIBC_MINOR__ >= 1) || (__GLIBC__ > 2) + Lucky GNU user + #endif +#endif + ], + ac_cv_gnu_library_2_1=yes, + ac_cv_gnu_library_2_1=no) + ] + ) + AC_SUBST(GLIBC21) + GLIBC21="$ac_cv_gnu_library_2_1" + ] +) diff --git a/configure.in b/configure.in index f6d84b5..72b6385 100644 --- a/configure.in +++ b/configure.in @@ -2,6 +2,7 @@ dnl *********************************** dnl *** include special GLib macros *** dnl *********************************** builtin(include, acglib.m4)dnl +builtin(include, aclibcharset.m4)dnl # require autoconf 2.13 AC_PREREQ(2.13) @@ -501,6 +502,12 @@ AC_C_BIGENDIAN AC_CHECK_HEADERS([float.h limits.h pwd.h sys/param.h sys/poll.h sys/select.h]) AC_CHECK_HEADERS([sys/time.h sys/times.h unistd.h values.h stdint.h sched.h]) +# Checks for libcharset +jm_LANGINFO_CODESET +jm_GLIBC21 +AC_CHECK_HEADERS([stddef.h stdlib.h string.h]) +AC_CHECK_FUNCS(setlocale) + AC_MSG_CHECKING(whether make is GNU Make) STRIP_BEGIN= STRIP_END= @@ -2145,6 +2152,7 @@ Makefile build/Makefile build/win32/Makefile glib/Makefile +glib/libcharset/Makefile gmodule/gmoduleconf.h gmodule/Makefile gobject/Makefile diff --git a/docs/reference/glib/tmpl/glib-unused.sgml b/docs/reference/glib/tmpl/glib-unused.sgml index 1314294..6d6c397 100644 --- a/docs/reference/glib/tmpl/glib-unused.sgml +++ b/docs/reference/glib/tmpl/glib-unused.sgml @@ -1,29 +1,3 @@ - - -These functions provide support for logging error messages or messages -used for debugging. - - - -There are several built-in levels of messages, defined in #GLogLevelFlags. -These can be extended with user-defined levels. - - - - - - - - - - -versatile support for logging messages with different levels of importance. - - - -Message Logging - - diff --git a/glib/Makefile.am b/glib/Makefile.am index c09f097..044952e 100644 --- a/glib/Makefile.am +++ b/glib/Makefile.am @@ -1,5 +1,7 @@ ## Process this file with automake to produce Makefile.in +SUBDIRS=libcharset + INCLUDES = -I$(top_srcdir) -DG_LOG_DOMAIN=g_log_domain_glib \ @GLIB_DEBUG_FLAGS@ -DG_DISABLE_DEPRECATED -DGLIB_COMPILATION @@ -141,8 +143,8 @@ if OS_WIN32 export_symbols = -export-symbols glib.def endif -libglib_1_3_la_LIBADD = @GIO@ @GSPAWN@ @PLATFORMDEP@ @G_LIB_WIN32_RESOURCE@ @ICONV_LIBS@ @G_LIBS_EXTRA@ -libglib_1_3_la_DEPENDENCIES = @GIO@ @GSPAWN@ @PLATFORMDEP@ @G_LIB_WIN32_RESOURCE@ @GLIB_DEF@ +libglib_1_3_la_LIBADD = libcharset/libcharset.la @GIO@ @GSPAWN@ @PLATFORMDEP@ @G_LIB_WIN32_RESOURCE@ @ICONV_LIBS@ @G_LIBS_EXTRA@ +libglib_1_3_la_DEPENDENCIES = libcharset/libcharset.la @GIO@ @GSPAWN@ @PLATFORMDEP@ @G_LIB_WIN32_RESOURCE@ @GLIB_DEF@ libglib_1_3_la_LDFLAGS = \ -version-info $(LT_CURRENT):$(LT_REVISION):$(LT_AGE) \ diff --git a/glib/gconvert.c b/glib/gconvert.c index d904335..d0c4bd6 100644 --- a/glib/gconvert.c +++ b/glib/gconvert.c @@ -53,6 +53,41 @@ g_convert_error_quark() #error libiconv not in use but included iconv.h is from libiconv #endif +static gboolean +try_conversion (const char *to_codeset, + const char *from_codeset, + iconv_t *cd) +{ + *cd = iconv_open (to_codeset, from_codeset); + + if (*cd == (iconv_t)-1 && errno == EINVAL) + return FALSE; + else + return TRUE; +} + +static gboolean +try_to_aliases (const char **to_aliases, + const char *from_codeset, + iconv_t *cd) +{ + if (to_aliases) + { + const char **p = to_aliases; + while (*p) + { + if (try_conversion (*p, from_codeset, cd)) + return TRUE; + + p++; + } + } + + return FALSE; +} + +extern const char **_g_charset_get_aliases (const char *canonical_name); + /** * g_iconv_open: * @to_codeset: destination codeset @@ -71,8 +106,32 @@ GIConv g_iconv_open (const gchar *to_codeset, const gchar *from_codeset) { - iconv_t cd = iconv_open (to_codeset, from_codeset); + iconv_t cd; + if (!try_conversion (to_codeset, from_codeset, &cd)) + { + const char **to_aliases = _g_charset_get_aliases (to_codeset); + const char **from_aliases = _g_charset_get_aliases (to_codeset); + + if (from_aliases) + { + const char **p = from_aliases; + while (*p) + { + if (try_conversion (to_codeset, *p, &cd)) + return (GIConv)cd; + + if (try_to_aliases (to_aliases, *p, &cd)) + return (GIConv)cd; + + p++; + } + } + + if (try_to_aliases (to_aliases, from_codeset, &cd)) + return (GIConv)cd; + } + return (GIConv)cd; } diff --git a/glib/gutf8.c b/glib/gutf8.c index ab4217b..2aedfdc 100644 --- a/glib/gutf8.c +++ b/glib/gutf8.c @@ -36,6 +36,8 @@ #undef STRICT #endif +#include "libcharset/libcharset.h" + #include "glibintl.h" #define UTF8_COMPUTE(Char, Mask, Len) \ @@ -348,60 +350,105 @@ g_utf8_strncpy (gchar *dest, return dest; } -static gboolean -g_utf8_get_charset_internal (char **a) -{ - char *charset = getenv("CHARSET"); +G_LOCK_DEFINE_STATIC (aliases); - if (charset && a && ! *a) - *a = charset; +static GHashTable * +get_alias_hash (void) +{ + static GHashTable *alias_hash = NULL; + const char *aliases; - if (charset && strstr (charset, "UTF-8")) - return TRUE; + G_LOCK (aliases); -#ifdef HAVE_CODESET - charset = nl_langinfo(CODESET); - if (charset) + if (!alias_hash) { - if (a && ! *a) - *a = charset; - if (strcmp (charset, "UTF-8") == 0) - return TRUE; + alias_hash = g_hash_table_new (g_str_hash, g_str_equal); + + aliases = _g_locale_get_charset_aliases (); + while (*aliases != '\0') + { + const char *canonical; + const char *alias; + const char **alias_array; + int count = 0; + + alias = aliases; + aliases += strlen (aliases) + 1; + canonical = aliases; + aliases += strlen (aliases) + 1; + + alias_array = g_hash_table_lookup (alias_hash, canonical); + if (alias_array) + { + while (alias_array[count]) + count++; + } + + alias_array = g_renew (const char *, alias_array, count + 2); + alias_array[count] = alias; + alias_array[count + 1] = NULL; + + g_hash_table_insert (alias_hash, (char *)canonical, alias_array); + } } -#endif - -#if 0 /* #ifdef _NL_CTYPE_CODESET_NAME */ - charset = nl_langinfo (_NL_CTYPE_CODESET_NAME); - if (charset) + + G_UNLOCK (aliases); + + return alias_hash; +} + +/* As an abuse of the alias table, the following routines gets + * the charsets that are aliases for the canonical name. + */ +const char ** +_g_charset_get_aliases (const char *canonical_name) +{ + GHashTable *alias_hash = get_alias_hash (); + + return g_hash_table_lookup (alias_hash, canonical_name); +} + +static gboolean +g_utf8_get_charset_internal (const char **a) +{ + const char *charset = getenv("CHARSET"); + + if (charset && *charset) { - if (a && ! *a) - *a = charset; - if (strcmp (charset, "UTF-8") == 0) + *a = charset; + + if (charset && strstr (charset, "UTF-8")) return TRUE; + else + return FALSE; } -#endif -#ifdef G_PLATFORM_WIN32 - if (a && ! *a) + /* The libcharset code tries to be thread-safe without + * a lock, but has a memory leak and a missing memory + * barrier, so we lock for it + */ + G_LOCK (aliases); + charset = _g_locale_charset (); + G_UNLOCK (aliases); + + if (charset && *charset) { - static char codepage[10]; + *a = charset; - sprintf (codepage, "CP%d", GetACP ()); - *a = codepage; - /* What about codepage 1200? Is that UTF-8? */ - return FALSE; + if (charset && strstr (charset, "UTF-8")) + return TRUE; + else + return FALSE; } -#else - if (a && ! *a) - *a = "US-ASCII"; -#endif /* Assume this for compatibility at present. */ + *a = "US-ASCII"; + return FALSE; } static int utf8_locale_cache = -1; -static char *utf8_charset_cache = NULL; +static const char *utf8_charset_cache = NULL; /** * g_get_charset: diff --git a/glib/libcharset/.cvsignore b/glib/libcharset/.cvsignore new file mode 100644 index 0000000..e01ff19 --- /dev/null +++ b/glib/libcharset/.cvsignore @@ -0,0 +1,13 @@ +Makefile.in +Makefile +.deps +.libs +ref-add.sed +ref-del.sed +charset.alias + + + + + + diff --git a/glib/libcharset/Makefile.am b/glib/libcharset/Makefile.am new file mode 100644 index 0000000..859fc86 --- /dev/null +++ b/glib/libcharset/Makefile.am @@ -0,0 +1,59 @@ +## Process this file with automake to produce Makefile.in + +INCLUDES = \ + -DLIBDIR=\"$(libdir)\" + +noinst_LTLIBRARIES = libcharset.la + +libcharset_la_SOURCES = \ + libcharset.h \ + localcharset.c + +EXTRA_DIST = \ + README \ + charset.alias \ + ref-add.sed \ + ref-del.sed \ + update.sh \ + make-patch.sh + +charset_alias = $(DESTDIR)$(libdir)/charset.alias +charset_tmp = $(DESTDIR)$(libdir)/charset.tmp +install-exec-local: all-local + $(mkinstalldirs) $(DESTDIR)$(libdir) + if test -f $(charset_alias); then \ + sed -f ref-add.sed $(charset_alias) > $(charset_tmp) ; \ + $(INSTALL_DATA) $(charset_tmp) $(charset_alias) ; \ + rm -f $(charset_tmp) ; \ + else \ + if test @GLIBC21@ = no; then \ + sed -f ref-add.sed charset.alias > $(charset_tmp) ; \ + $(INSTALL_DATA) $(charset_tmp) $(charset_alias) ; \ + rm -f $(charset_tmp) ; \ + fi ; \ + fi + +uninstall-local: all-local + if test -f $(charset_alias); then \ + sed -f ref-del.sed $(charset_alias) > $(charset_tmp); \ + if grep '^# Packages using this file: $$' $(charset_tmp) \ + > /dev/null; then \ + rm -f $(charset_alias); \ + else \ + $(INSTALL_DATA) $(charset_tmp) $(charset_alias); \ + fi; \ + rm -f $(charset_tmp); \ + fi + +charset.alias: config.charset + $(SHELL) $(srcdir)/config.charset '@host@' > t-$@ + mv t-$@ $@ + +all-local: ref-add.sed ref-del.sed charset.alias + +SUFFIXES = .sed .sin +.sin.sed: + sed -e '/^#/d' -e 's/@''PACKAGE''@/@PACKAGE@/g' $< > t-$@ + mv t-$@ $@ + +CLEANFILES = charset.alias ref-add.sed ref-del.sed diff --git a/glib/libcharset/README b/glib/libcharset/README new file mode 100644 index 0000000..cbb4f1e --- /dev/null +++ b/glib/libcharset/README @@ -0,0 +1,41 @@ +The sources are derived from Bruno Haible's libcharset library included +with libiconv: + + http//www.gnu.org/software/libiconv + +The 'update.sh' script in this directory, when pointed at +the original sources updates the files in this directory +(and elsewhere in the GLib distribution) to the new version + +The 'make-patch.sh' script in this directory regenerates +the patch files included in this directory from a copy +of the pristine sources and the files in this directory. + +The license on the portions from libiconv portions is reproduced +below. + +Owen Taylor +26 September 2001 + +==== + +/* Determine a canonical name for the current locale's character encoding. + + Copyright (C) 2000-2001 Free Software Foundation, Inc. + + This program is free software; you can redistribute it and/or modify it + under the terms of the GNU Library General Public License as published + by the Free Software Foundation; either version 2, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Library General Public License for more details. + + You should have received a copy of the GNU Library General Public + License along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, + USA. */ + +/* Written by Bruno Haible . */ diff --git a/glib/libcharset/config.charset b/glib/libcharset/config.charset new file mode 100755 index 0000000..f4f2611 --- /dev/null +++ b/glib/libcharset/config.charset @@ -0,0 +1,438 @@ +#! /bin/sh +# Output a system dependent table of character encoding aliases. +# +# Copyright (C) 2000-2001 Free Software Foundation, Inc. +# +# This program is free software; you can redistribute it and/or modify it +# under the terms of the GNU Library General Public License as published +# by the Free Software Foundation; either version 2, or (at your option) +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Library General Public License for more details. +# +# You should have received a copy of the GNU Library General Public +# License along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, +# USA. +# +# The table consists of lines of the form +# ALIAS CANONICAL +# +# ALIAS is the (system dependent) result of "nl_langinfo (CODESET)". +# ALIAS is compared in a case sensitive way. +# +# CANONICAL is the GNU canonical name for this character encoding. +# It must be an encoding supported by libiconv. Support by GNU libc is +# also desirable. CANONICAL is case insensitive. Usually an upper case +# MIME charset name is preferred. +# The current list of GNU canonical charset names is as follows. +# +# name used by which systems a MIME name? +# ASCII, ANSI_X3.4-1968 glibc solaris freebsd +# ISO-8859-1 glibc aix hpux irix osf solaris freebsd yes +# ISO-8859-2 glibc aix hpux irix osf solaris freebsd yes +# ISO-8859-3 glibc yes +# ISO-8859-4 osf solaris freebsd yes +# ISO-8859-5 glibc aix hpux irix osf solaris freebsd yes +# ISO-8859-6 glibc aix hpux solaris yes +# ISO-8859-7 glibc aix hpux irix osf solaris yes +# ISO-8859-8 glibc aix hpux osf solaris yes +# ISO-8859-9 glibc aix hpux irix osf solaris yes +# ISO-8859-13 glibc +# ISO-8859-15 glibc aix osf solaris freebsd +# KOI8-R glibc solaris freebsd yes +# KOI8-U glibc freebsd yes +# CP437 dos +# CP775 dos +# CP850 aix osf dos +# CP852 dos +# CP855 dos +# CP856 aix +# CP857 dos +# CP861 dos +# CP862 dos +# CP864 dos +# CP865 dos +# CP866 freebsd dos +# CP869 dos +# CP874 win32 dos +# CP922 aix +# CP932 aix win32 dos +# CP943 aix +# CP949 osf win32 dos +# CP950 win32 dos +# CP1046 aix +# CP1124 aix +# CP1129 aix +# CP1250 win32 +# CP1251 glibc win32 +# CP1252 aix win32 +# CP1253 win32 +# CP1254 win32 +# CP1255 win32 +# CP1256 win32 +# CP1257 win32 +# GB2312 glibc aix hpux irix solaris freebsd yes +# EUC-JP glibc aix hpux irix osf solaris freebsd yes +# EUC-KR glibc aix hpux irix osf solaris freebsd yes +# EUC-TW glibc aix hpux irix osf solaris +# BIG5 glibc aix hpux osf solaris freebsd yes +# BIG5-HKSCS glibc +# GBK aix osf win32 dos +# GB18030 glibc +# SHIFT_JIS hpux osf solaris freebsd yes +# JOHAB glibc win32 +# TIS-620 glibc aix hpux osf solaris +# VISCII glibc yes +# HP-ROMAN8 hpux +# HP-ARABIC8 hpux +# HP-GREEK8 hpux +# HP-HEBREW8 hpux +# HP-TURKISH8 hpux +# HP-KANA8 hpux +# DEC-KANJI osf +# DEC-HANYU osf +# UTF-8 glibc aix hpux osf solaris yes +# +# Note: Names which are not marked as being a MIME name should not be used in +# Internet protocols for information interchange (mail, news, etc.). +# +# Note: ASCII and ANSI_X3.4-1968 are synonymous canonical names. Applications +# must understand both names and treat them as equivalent. +# +# The first argument passed to this file is the canonical host specification, +# CPU_TYPE-MANUFACTURER-OPERATING_SYSTEM +# or +# CPU_TYPE-MANUFACTURER-KERNEL-OPERATING_SYSTEM + +host="$1" +os=`echo "$host" | sed -e 's/^[^-]*-[^-]*-\(.*\)$/\1/'` +echo "# This file contains a table of character encoding aliases," +echo "# suitable for operating system '${os}'." +echo "# It was automatically generated from config.charset." +# List of references, updated during installation: +echo "# Packages using this file: " +case "$os" in + linux* | *-gnu*) + # With glibc-2.1 or newer, we don't need any canonicalization, + # because glibc has iconv and both glibc and libiconv support all + # GNU canonical names directly. Therefore, the Makefile does not + # need to install the alias file at all. + # The following applies only to glibc-2.0.x and older libcs. + echo "ISO_646.IRV:1983 ASCII" + ;; + aix*) + echo "ISO8859-1 ISO-8859-1" + echo "ISO8859-2 ISO-8859-2" + echo "ISO8859-5 ISO-8859-5" + echo "ISO8859-6 ISO-8859-6" + echo "ISO8859-7 ISO-8859-7" + echo "ISO8859-8 ISO-8859-8" + echo "ISO8859-9 ISO-8859-9" + echo "ISO8859-15 ISO-8859-15" + echo "IBM-850 CP850" + echo "IBM-856 CP856" + echo "IBM-921 ISO-8859-13" + echo "IBM-922 CP922" + echo "IBM-932 CP932" + echo "IBM-943 CP943" + echo "IBM-1046 CP1046" + echo "IBM-1124 CP1124" + echo "IBM-1129 CP1129" + echo "IBM-1252 CP1252" + echo "IBM-eucCN GB2312" + echo "IBM-eucJP EUC-JP" + echo "IBM-eucKR EUC-KR" + echo "IBM-eucTW EUC-TW" + echo "big5 BIG5" + echo "GBK GBK" + echo "TIS-620 TIS-620" + echo "UTF-8 UTF-8" + ;; + hpux*) + echo "iso88591 ISO-8859-1" + echo "iso88592 ISO-8859-2" + echo "iso88595 ISO-8859-5" + echo "iso88596 ISO-8859-6" + echo "iso88597 ISO-8859-7" + echo "iso88598 ISO-8859-8" + echo "iso88599 ISO-8859-9" + echo "iso885915 ISO-8859-15" + echo "roman8 HP-ROMAN8" + echo "arabic8 HP-ARABIC8" + echo "greek8 HP-GREEK8" + echo "hebrew8 HP-HEBREW8" + echo "turkish8 HP-TURKISH8" + echo "kana8 HP-KANA8" + echo "tis620 TIS-620" + echo "big5 BIG5" + echo "eucJP EUC-JP" + echo "eucKR EUC-KR" + echo "eucTW EUC-TW" + echo "hp15CN GB2312" + #echo "ccdc ?" # what is this? + echo "SJIS SHIFT_JIS" + echo "utf8 UTF-8" + ;; + irix*) + echo "ISO8859-1 ISO-8859-1" + echo "ISO8859-2 ISO-8859-2" + echo "ISO8859-5 ISO-8859-5" + echo "ISO8859-7 ISO-8859-7" + echo "ISO8859-9 ISO-8859-9" + echo "eucCN GB2312" + echo "eucJP EUC-JP" + echo "eucKR EUC-KR" + echo "eucTW EUC-TW" + ;; + osf*) + echo "ISO8859-1 ISO-8859-1" + echo "ISO8859-2 ISO-8859-2" + echo "ISO8859-4 ISO-8859-4" + echo "ISO8859-5 ISO-8859-5" + echo "ISO8859-7 ISO-8859-7" + echo "ISO8859-8 ISO-8859-8" + echo "ISO8859-9 ISO-8859-9" + echo "ISO8859-15 ISO-8859-15" + echo "cp850 CP850" + echo "big5 BIG5" + echo "dechanyu DEC-HANYU" + echo "dechanzi GB2312" + echo "deckanji DEC-KANJI" + echo "deckorean EUC-KR" + echo "eucJP EUC-JP" + echo "eucKR EUC-KR" + echo "eucTW EUC-TW" + echo "GBK GBK" + echo "KSC5601 CP949" + echo "sdeckanji EUC-JP" + echo "SJIS SHIFT_JIS" + echo "TACTIS TIS-620" + echo "UTF-8 UTF-8" + ;; + solaris*) + echo "646 ASCII" + echo "ISO8859-1 ISO-8859-1" + echo "ISO8859-2 ISO-8859-2" + echo "ISO8859-4 ISO-8859-4" + echo "ISO8859-5 ISO-8859-5" + echo "ISO8859-6 ISO-8859-6" + echo "ISO8859-7 ISO-8859-7" + echo "ISO8859-8 ISO-8859-8" + echo "ISO8859-9 ISO-8859-9" + echo "ISO8859-15 ISO-8859-15" + echo "koi8-r KOI8-R" + echo "BIG5 BIG5" + echo "gb2312 GB2312" + echo "cns11643 EUC-TW" + echo "5601 EUC-KR" + echo "eucJP EUC-JP" + echo "PCK SHIFT_JIS" + echo "TIS620.2533 TIS-620" + #echo "sun_eu_greek ?" # what is this? + echo "UTF-8 UTF-8" + ;; + freebsd*) + # FreeBSD 4.2 doesn't have nl_langinfo(CODESET); therefore + # localcharset.c falls back to using the full locale name + # from the environment variables. + echo "C ASCII" + echo "US-ASCII ASCII" + for l in la_LN lt_LN; do + echo "$l.ASCII ASCII" + done + for l in da_DK de_AT de_CH de_DE en_AU en_CA en_GB en_US es_ES \ + fi_FI fr_BE fr_CA fr_CH fr_FR is_IS it_CH it_IT la_LN \ + lt_LN nl_BE nl_NL no_NO pt_PT sv_SE; do + echo "$l.ISO_8859-1 ISO-8859-1" + echo "$l.DIS_8859-15 ISO-8859-15" + done + for l in cs_CZ hr_HR hu_HU la_LN lt_LN pl_PL sl_SI; do + echo "$l.ISO_8859-2 ISO-8859-2" + done + for l in la_LN lt_LT; do + echo "$l.ISO_8859-4 ISO-8859-4" + done + for l in ru_RU ru_SU; do + echo "$l.KOI8-R KOI8-R" + echo "$l.ISO_8859-5 ISO-8859-5" + echo "$l.CP866 CP866" + done + echo "uk_UA.KOI8-U KOI8-U" + echo "zh_TW.BIG5 BIG5" + echo "zh_TW.Big5 BIG5" + echo "zh_CN.EUC GB2312" + echo "ja_JP.EUC EUC-JP" + echo "ja_JP.SJIS SHIFT_JIS" + echo "ja_JP.Shift_JIS SHIFT_JIS" + echo "ko_KR.EUC EUC-KR" + ;; + beos*) + # BeOS has a single locale, and it has UTF-8 encoding. + echo "* UTF-8" + ;; + msdosdjgpp*) + # DJGPP 2.03 doesn't have nl_langinfo(CODESET); therefore + # localcharset.c falls back to using the full locale name + # from the environment variables. + echo "#" + echo "# The encodings given here may not all be correct." + echo "# If you find that the encoding given for your language and" + echo "# country is not the one your DOS machine actually uses, just" + echo "# correct it in this file, and send a mail to" + echo "# Juan Manuel Guerrero " + echo "# and Bruno Haible ." + echo "#" + echo "C ASCII" + # ISO-8859-1 languages + echo "ca CP850" + echo "ca_ES CP850" + echo "da CP865" # not CP850 ?? + echo "da_DK CP865" # not CP850 ?? + echo "de CP850" + echo "de_AT CP850" + echo "de_CH CP850" + echo "de_DE CP850" + echo "en CP850" + echo "en_AU CP850" # not CP437 ?? + echo "en_CA CP850" + echo "en_GB CP850" + echo "en_NZ CP437" + echo "en_US CP437" + echo "en_ZA CP850" # not CP437 ?? + echo "es CP850" + echo "es_AR CP850" + echo "es_BO CP850" + echo "es_CL CP850" + echo "es_CO CP850" + echo "es_CR CP850" + echo "es_CU CP850" + echo "es_DO CP850" + echo "es_EC CP850" + echo "es_ES CP850" + echo "es_GT CP850" + echo "es_HN CP850" + echo "es_MX CP850" + echo "es_NI CP850" + echo "es_PA CP850" + echo "es_PY CP850" + echo "es_PE CP850" + echo "es_SV CP850" + echo "es_UY CP850" + echo "es_VE CP850" + echo "et CP850" + echo "et_EE CP850" + echo "eu CP850" + echo "eu_ES CP850" + echo "fi CP850" + echo "fi_FI CP850" + echo "fr CP850" + echo "fr_BE CP850" + echo "fr_CA CP850" + echo "fr_CH CP850" + echo "fr_FR CP850" + echo "ga CP850" + echo "ga_IE CP850" + echo "gd CP850" + echo "gd_GB CP850" + echo "gl CP850" + echo "gl_ES CP850" + echo "id CP850" # not CP437 ?? + echo "id_ID CP850" # not CP437 ?? + echo "is CP861" # not CP850 ?? + echo "is_IS CP861" # not CP850 ?? + echo "it CP850" + echo "it_CH CP850" + echo "it_IT CP850" + echo "lt CP775" + echo "lt_LT CP775" + echo "lv CP775" + echo "lv_LV CP775" + echo "nb CP865" # not CP850 ?? + echo "nb_NO CP865" # not CP850 ?? + echo "nl CP850" + echo "nl_BE CP850" + echo "nl_NL CP850" + echo "nn CP865" # not CP850 ?? + echo "nn_NO CP865" # not CP850 ?? + echo "no CP865" # not CP850 ?? + echo "no_NO CP865" # not CP850 ?? + echo "pt CP850" + echo "pt_BR CP850" + echo "pt_PT CP850" + echo "sv CP850" + echo "sv_SE CP850" + # ISO-8859-2 languages + echo "cs CP852" + echo "cs_CZ CP852" + echo "hr CP852" + echo "hr_HR CP852" + echo "hu CP852" + echo "hu_HU CP852" + echo "pl CP852" + echo "pl_PL CP852" + echo "ro CP852" + echo "ro_RO CP852" + echo "sk CP852" + echo "sk_SK CP852" + echo "sl CP852" + echo "sl_SI CP852" + echo "sq CP852" + echo "sq_AL CP852" + echo "sr CP852" # CP852 or CP866 or CP855 ?? + echo "sr_YU CP852" # CP852 or CP866 or CP855 ?? + # ISO-8859-3 languages + echo "mt CP850" + echo "mt_MT CP850" + # ISO-8859-5 languages + echo "be CP866" + echo "be_BE CP866" + echo "bg CP866" # not CP855 ?? + echo "bg_BG CP866" # not CP855 ?? + echo "mk CP866" # not CP855 ?? + echo "mk_MK CP866" # not CP855 ?? + echo "ru KOI8-R" # not CP866 ?? + echo "ru_RU KOI8-R" # not CP866 ?? + # ISO-8859-6 languages + echo "ar CP864" + echo "ar_AE CP864" + echo "ar_DZ CP864" + echo "ar_EG CP864" + echo "ar_IQ CP864" + echo "ar_IR CP864" + echo "ar_JO CP864" + echo "ar_KW CP864" + echo "ar_MA CP864" + echo "ar_OM CP864" + echo "ar_QA CP864" + echo "ar_SA CP864" + echo "ar_SY CP864" + # ISO-8859-7 languages + echo "el CP869" + echo "el_GR CP869" + # ISO-8859-8 languages + echo "he CP862" + echo "he_IL CP862" + # ISO-8859-9 languages + echo "tr CP857" + echo "tr_TR CP857" + # Japanese + echo "ja CP932" + echo "ja_JP CP932" + # Chinese + echo "zh_CN GBK" + echo "zh_TW CP950" # not CP938 ?? + # Korean + echo "kr CP949" # not CP934 ?? + echo "kr_KR CP949" # not CP934 ?? + # Thai + echo "th CP874" + echo "th_TH CP874" + # Other + echo "eo CP850" + echo "eo_EO CP850" + ;; +esac diff --git a/glib/libcharset/libcharset-glib.patch b/glib/libcharset/libcharset-glib.patch new file mode 100644 index 0000000..9289589 --- /dev/null +++ b/glib/libcharset/libcharset-glib.patch @@ -0,0 +1,45 @@ +# Patch against libcharset version 1.1 +--- /home/otaylor/ftp/libiconv-1.7.0.1/libcharset/lib/localcharset.c Wed Aug 8 08:52:28 2001 ++++ localcharset.c Wed Sep 26 22:47:38 2001 +@@ -83,8 +83,8 @@ + static const char * volatile charset_aliases; + + /* Return a pointer to the contents of the charset.alias file. */ +-static const char * +-get_charset_aliases () ++const char * ++_g_locale_get_charset_aliases () + { + const char *cp; + +@@ -205,7 +205,7 @@ + STATIC + #endif + const char * +-locale_charset () ++_g_locale_charset () + { + const char *codeset; + const char *aliases; +@@ -262,7 +262,7 @@ + codeset = ""; + + /* Resolve alias. */ +- for (aliases = get_charset_aliases (); ++ for (aliases = _g_locale_get_charset_aliases (); + *aliases != '\0'; + aliases += strlen (aliases) + 1, aliases += strlen (aliases) + 1) + if (strcmp (codeset, aliases) == 0 +--- /home/otaylor/ftp/libiconv-1.7.0.1/libcharset/include/libcharset.h.in Tue Mar 27 08:34:42 2001 ++++ libcharset.h Wed Sep 26 21:55:40 2001 +@@ -30,8 +30,8 @@ + The result must not be freed; it is statically allocated. + If the canonical name cannot be determined, the result is a non-canonical + name. */ +-extern const char * locale_charset (void); +- ++extern const char * _g_locale_charset (void); ++extern const char * _g_locale_get_charset_aliases (void); + + #ifdef __cplusplus + } diff --git a/glib/libcharset/libcharset.h b/glib/libcharset/libcharset.h new file mode 100644 index 0000000..ba4c6bd --- /dev/null +++ b/glib/libcharset/libcharset.h @@ -0,0 +1,41 @@ +/* Copyright (C) 2000-2001 Free Software Foundation, Inc. + This file is part of the GNU CHARSET Library. + + The GNU CHARSET Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Library General Public License as + published by the Free Software Foundation; either version 2 of the + License, or (at your option) any later version. + + The GNU CHARSET Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Library General Public License for more details. + + You should have received a copy of the GNU Library General Public License + along with the GNU CHARSET Library; see the file COPYING.LIB. If not, + write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, + Boston, MA 02111-1307, USA. */ + +#ifndef _LIBCHARSET_H +#define _LIBCHARSET_H + + +#ifdef __cplusplus +extern "C" { +#endif + + +/* Determine the current locale's character encoding, and canonicalize it + into one of the canonical names listed in config.charset. + The result must not be freed; it is statically allocated. + If the canonical name cannot be determined, the result is a non-canonical + name. */ +extern const char * _g_locale_charset (void); +extern const char * _g_locale_get_charset_aliases (void); + +#ifdef __cplusplus +} +#endif + + +#endif /* _LIBCHARSET_H */ diff --git a/glib/libcharset/localcharset.c b/glib/libcharset/localcharset.c new file mode 100644 index 0000000..acd88ed --- /dev/null +++ b/glib/libcharset/localcharset.c @@ -0,0 +1,276 @@ +/* Determine a canonical name for the current locale's character encoding. + + Copyright (C) 2000-2001 Free Software Foundation, Inc. + + This program is free software; you can redistribute it and/or modify it + under the terms of the GNU Library General Public License as published + by the Free Software Foundation; either version 2, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Library General Public License for more details. + + You should have received a copy of the GNU Library General Public + License along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, + USA. */ + +/* Written by Bruno Haible . */ + +#ifdef HAVE_CONFIG_H +# include +#endif + +#if HAVE_STDDEF_H +# include +#endif + +#include +#if HAVE_STRING_H +# include +#else +# include +#endif +#if HAVE_STDLIB_H +# include +#endif + +#if defined _WIN32 || defined __WIN32__ +# undef WIN32 /* avoid warning on mingw32 */ +# define WIN32 +#endif + +#ifndef WIN32 +# if HAVE_LANGINFO_CODESET +# include +# else +# if HAVE_SETLOCALE +# include +# endif +# endif +#else /* WIN32 */ +# define WIN32_LEAN_AND_MEAN +# include +#endif + +#ifndef DIRECTORY_SEPARATOR +# define DIRECTORY_SEPARATOR '/' +#endif + +#ifndef ISSLASH +# define ISSLASH(C) ((C) == DIRECTORY_SEPARATOR) +#endif + +#ifdef __cplusplus +/* When compiling with "gcc -x c++", produce a function with C linkage. */ +extern "C" const char * locale_charset (void); +#endif + +/* The following static variable is declared 'volatile' to avoid a + possible multithread problem in the function get_charset_aliases. If we + are running in a threaded environment, and if two threads initialize + 'charset_aliases' simultaneously, both will produce the same value, + and everything will be ok if the two assignments to 'charset_aliases' + are atomic. But I don't know what will happen if the two assignments mix. */ +#if __STDC__ != 1 +# define volatile /* empty */ +#endif +/* Pointer to the contents of the charset.alias file, if it has already been + read, else NULL. Its format is: + ALIAS_1 '\0' CANONICAL_1 '\0' ... ALIAS_n '\0' CANONICAL_n '\0' '\0' */ +static const char * volatile charset_aliases; + +/* Return a pointer to the contents of the charset.alias file. */ +const char * +_g_locale_get_charset_aliases () +{ + const char *cp; + + cp = charset_aliases; + if (cp == NULL) + { +#ifndef WIN32 + FILE *fp; + const char *dir = LIBDIR; + const char *base = "charset.alias"; + char *file_name; + + /* Concatenate dir and base into freshly allocated file_name. */ + { + size_t dir_len = strlen (dir); + size_t base_len = strlen (base); + int add_slash = (dir_len > 0 && !ISSLASH (dir[dir_len - 1])); + file_name = (char *) malloc (dir_len + add_slash + base_len + 1); + if (file_name != NULL) + { + memcpy (file_name, dir, dir_len); + if (add_slash) + file_name[dir_len] = DIRECTORY_SEPARATOR; + memcpy (file_name + dir_len + add_slash, base, base_len + 1); + } + } + + if (file_name == NULL || (fp = fopen (file_name, "r")) == NULL) + /* Out of memory or file not found, treat it as empty. */ + cp = ""; + else + { + /* Parse the file's contents. */ + int c; + char buf1[50+1]; + char buf2[50+1]; + char *res_ptr = NULL; + size_t res_size = 0; + size_t l1, l2; + + for (;;) + { + c = getc (fp); + if (c == EOF) + break; + if (c == '\n' || c == ' ' || c == '\t') + continue; + if (c == '#') + { + /* Skip comment, to end of line. */ + do + c = getc (fp); + while (!(c == EOF || c == '\n')); + if (c == EOF) + break; + continue; + } + ungetc (c, fp); + if (fscanf(fp, "%50s %50s", buf1, buf2) < 2) + break; + l1 = strlen (buf1); + l2 = strlen (buf2); + if (res_size == 0) + { + res_size = l1 + 1 + l2 + 1; + res_ptr = (char *) malloc (res_size + 1); + } + else + { + res_size += l1 + 1 + l2 + 1; + res_ptr = (char *) realloc (res_ptr, res_size + 1); + } + if (res_ptr == NULL) + { + /* Out of memory. */ + res_size = 0; + break; + } + strcpy (res_ptr + res_size - (l2 + 1) - (l1 + 1), buf1); + strcpy (res_ptr + res_size - (l2 + 1), buf2); + } + fclose (fp); + if (res_size == 0) + cp = ""; + else + { + *(res_ptr + res_size) = '\0'; + cp = res_ptr; + } + } + + if (file_name != NULL) + free (file_name); + +#else /* WIN32 */ + + /* To avoid the troubles of installing a separate file in the same + directory as the DLL and of retrieving the DLL's directory at + runtime, simply inline the aliases here. */ + + cp = "CP936" "\0" "GBK" "\0" + "CP1361" "\0" "JOHAB" "\0"; +#endif + + charset_aliases = cp; + } + + return cp; +} + +/* Determine the current locale's character encoding, and canonicalize it + into one of the canonical names listed in config.charset. + The result must not be freed; it is statically allocated. + If the canonical name cannot be determined, the result is a non-canonical + name. */ + +#ifdef STATIC +STATIC +#endif +const char * +_g_locale_charset () +{ + const char *codeset; + const char *aliases; + +#ifndef WIN32 + +# if HAVE_LANGINFO_CODESET + + /* Most systems support nl_langinfo (CODESET) nowadays. */ + codeset = nl_langinfo (CODESET); + +# else + + /* On old systems which lack it, use setlocale or getenv. */ + const char *locale = NULL; + + /* But most old systems don't have a complete set of locales. Some + (like SunOS 4 or DJGPP) have only the C locale. Therefore we don't + use setlocale here; it would return "C" when it doesn't support the + locale name the user has set. */ +# if HAVE_SETLOCALE && 0 + locale = setlocale (LC_CTYPE, NULL); +# endif + if (locale == NULL || locale[0] == '\0') + { + locale = getenv ("LC_ALL"); + if (locale == NULL || locale[0] == '\0') + { + locale = getenv ("LC_CTYPE"); + if (locale == NULL || locale[0] == '\0') + locale = getenv ("LANG"); + } + } + + /* On some old systems, one used to set locale = "iso8859_1". On others, + you set it to "language_COUNTRY.charset". In any case, we resolve it + through the charset.alias file. */ + codeset = locale; + +# endif + +#else /* WIN32 */ + + static char buf[2 + 10 + 1]; + + /* Win32 has a function returning the locale's codepage as a number. */ + sprintf (buf, "CP%u", GetACP ()); + codeset = buf; + +#endif + + if (codeset == NULL) + /* The canonical name cannot be determined. */ + codeset = ""; + + /* Resolve alias. */ + for (aliases = _g_locale_get_charset_aliases (); + *aliases != '\0'; + aliases += strlen (aliases) + 1, aliases += strlen (aliases) + 1) + if (strcmp (codeset, aliases) == 0 + || (aliases[0] == '*' && aliases[1] == '\0')) + { + codeset = aliases + strlen (aliases) + 1; + break; + } + + return codeset; +} diff --git a/glib/libcharset/make-patch.sh b/glib/libcharset/make-patch.sh new file mode 100755 index 0000000..9548708 --- /dev/null +++ b/glib/libcharset/make-patch.sh @@ -0,0 +1,23 @@ +#!/bin/sh + +if test $# = 1 ; then + ORIGINAL=$1 +else + echo "Usage: make-patch.sh /path/to/libcharset" 1>&2 + exit 1 +fi + +if test -f $ORIGINAL/lib/localcharset.c ; then : ; else + echo "Usage: make-patch.sh /path/to/libcharset" 1>&2 + exit 1 +fi + +VERSION=`grep VERSION= $ORIGINAL/configure.in | sed s/VERSION=//` + +echo "# Patch against libcharset version $VERSION" > libcharset-glib.patch + +for i in localcharset.c ref-add.sin ref-del.sin ; do + diff -u $ORIGINAL/lib/$i $i >> libcharset-glib.patch +done + +diff -u $ORIGINAL/include/libcharset.h.in libcharset.h >> libcharset-glib.patch diff --git a/glib/libcharset/ref-add.sin b/glib/libcharset/ref-add.sin new file mode 100644 index 0000000..167374e --- /dev/null +++ b/glib/libcharset/ref-add.sin @@ -0,0 +1,31 @@ +# Add this package to a list of references stored in a text file. +# +# Copyright (C) 2000 Free Software Foundation, Inc. +# +# This program is free software; you can redistribute it and/or modify it +# under the terms of the GNU Library General Public License as published +# by the Free Software Foundation; either version 2, or (at your option) +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Library General Public License for more details. +# +# You should have received a copy of the GNU Library General Public +# License along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, +# USA. +# +# Written by Bruno Haible . +# +/^# Packages using this file: / { + s/# Packages using this file:// + ta + :a + s/ @PACKAGE@ / @PACKAGE@ / + tb + s/ $/ @PACKAGE@ / + :b + s/^/# Packages using this file:/ +} diff --git a/glib/libcharset/ref-del.sin b/glib/libcharset/ref-del.sin new file mode 100644 index 0000000..613cf37 --- /dev/null +++ b/glib/libcharset/ref-del.sin @@ -0,0 +1,26 @@ +# Remove this package from a list of references stored in a text file. +# +# Copyright (C) 2000 Free Software Foundation, Inc. +# +# This program is free software; you can redistribute it and/or modify it +# under the terms of the GNU Library General Public License as published +# by the Free Software Foundation; either version 2, or (at your option) +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Library General Public License for more details. +# +# You should have received a copy of the GNU Library General Public +# License along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, +# USA. +# +# Written by Bruno Haible . +# +/^# Packages using this file: / { + s/# Packages using this file:// + s/ @PACKAGE@ / / + s/^/# Packages using this file:/ +} diff --git a/glib/libcharset/update.sh b/glib/libcharset/update.sh new file mode 100755 index 0000000..6e2dfce --- /dev/null +++ b/glib/libcharset/update.sh @@ -0,0 +1,30 @@ +#!/bin/sh + +if test $# = 1 ; then + ORIGINAL=$1 +else + echo "Usage: update.sh /path/to/libcharset" 1>&2 + exit 1 +fi + +if test -f $ORIGINAL/lib/localcharset.c ; then : ; else + echo "Usage: update.sh /path/to/libcharset" 1>&2 + exit 1 +fi + +VERSION=`grep VERSION= $ORIGINAL/configure.in | sed s/VERSION=//` + +for i in localcharset.c ref-add.sin ref-del.sin config.charset ; do + cp $ORIGINAL/lib/$i . +done + +cp $ORIGINAL/include/libcharset.h.in ./libcharset.h + +patch -p0 < libcharset-glib.patch + +echo "dnl From libcharset $VERSION" > ../../aclibcharset.m4 + +for i in codeset.m4 glibc21.m4 ; do + cat $ORIGINAL/m4/$i >> ../../aclibcharset.m4 +done + -- 2.7.4