From c22cf34e923b81962da554c8edeb9366f044ae8c Mon Sep 17 00:00:00 2001 From: Tor Lillqvist Date: Wed, 2 Feb 2000 23:39:32 +0000 Subject: [PATCH] glib.h New functions for conversion between UTF-8 and the encoding 2000-02-01 Tor Lillqvist * glib.h * gstrfuncs.c (g_filename_to_utf8, g_filename_from_utf8): New functions for conversion between UTF-8 and the encoding expected by C runtime functions like open() and stat(), and returned by readdir(). Implement them on Win32 where we use the system "ANSI" codepage, which might be single-byte or double-byte. On Unix, just skip the issue for now and provide dummy implementations that return a copy of the argument. * README.win32 * build-dll * glib.def: Minor updates. --- ChangeLog | 17 ++++ ChangeLog.pre-2-0 | 17 ++++ ChangeLog.pre-2-10 | 17 ++++ ChangeLog.pre-2-12 | 17 ++++ ChangeLog.pre-2-2 | 17 ++++ ChangeLog.pre-2-4 | 17 ++++ ChangeLog.pre-2-6 | 17 ++++ ChangeLog.pre-2-8 | 17 ++++ README.win32 | 35 +++++++-- build-dll | 6 +- glib.def | 4 +- glib.h | 7 ++ glib/glib.def | 4 +- glib/glib.h | 7 ++ glib/gstrfuncs.c | 224 +++++++++++++++++++++++++++++++++++++++++++++++++++++ gstrfuncs.c | 224 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 16 files changed, 637 insertions(+), 10 deletions(-) diff --git a/ChangeLog b/ChangeLog index dcb42bb..cb3d870 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,20 @@ +2000-02-01 Tor Lillqvist + + * glib.h + * gstrfuncs.c (g_filename_to_utf8, g_filename_from_utf8): New + functions for conversion between UTF-8 and the encoding expected + by C runtime functions like open() and stat(), and returned by + readdir(). + + Implement them on Win32 where we use the system "ANSI" codepage, + which might be single-byte or double-byte. On Unix, just skip the + issue for now and provide dummy implementations that return a copy + of the argument. + + * README.win32 + * build-dll + * glib.def: Minor updates. + Wed Jan 26 05:24:38 2000 Tim Janik * glib.h: diff --git a/ChangeLog.pre-2-0 b/ChangeLog.pre-2-0 index dcb42bb..cb3d870 100644 --- a/ChangeLog.pre-2-0 +++ b/ChangeLog.pre-2-0 @@ -1,3 +1,20 @@ +2000-02-01 Tor Lillqvist + + * glib.h + * gstrfuncs.c (g_filename_to_utf8, g_filename_from_utf8): New + functions for conversion between UTF-8 and the encoding expected + by C runtime functions like open() and stat(), and returned by + readdir(). + + Implement them on Win32 where we use the system "ANSI" codepage, + which might be single-byte or double-byte. On Unix, just skip the + issue for now and provide dummy implementations that return a copy + of the argument. + + * README.win32 + * build-dll + * glib.def: Minor updates. + Wed Jan 26 05:24:38 2000 Tim Janik * glib.h: diff --git a/ChangeLog.pre-2-10 b/ChangeLog.pre-2-10 index dcb42bb..cb3d870 100644 --- a/ChangeLog.pre-2-10 +++ b/ChangeLog.pre-2-10 @@ -1,3 +1,20 @@ +2000-02-01 Tor Lillqvist + + * glib.h + * gstrfuncs.c (g_filename_to_utf8, g_filename_from_utf8): New + functions for conversion between UTF-8 and the encoding expected + by C runtime functions like open() and stat(), and returned by + readdir(). + + Implement them on Win32 where we use the system "ANSI" codepage, + which might be single-byte or double-byte. On Unix, just skip the + issue for now and provide dummy implementations that return a copy + of the argument. + + * README.win32 + * build-dll + * glib.def: Minor updates. + Wed Jan 26 05:24:38 2000 Tim Janik * glib.h: diff --git a/ChangeLog.pre-2-12 b/ChangeLog.pre-2-12 index dcb42bb..cb3d870 100644 --- a/ChangeLog.pre-2-12 +++ b/ChangeLog.pre-2-12 @@ -1,3 +1,20 @@ +2000-02-01 Tor Lillqvist + + * glib.h + * gstrfuncs.c (g_filename_to_utf8, g_filename_from_utf8): New + functions for conversion between UTF-8 and the encoding expected + by C runtime functions like open() and stat(), and returned by + readdir(). + + Implement them on Win32 where we use the system "ANSI" codepage, + which might be single-byte or double-byte. On Unix, just skip the + issue for now and provide dummy implementations that return a copy + of the argument. + + * README.win32 + * build-dll + * glib.def: Minor updates. + Wed Jan 26 05:24:38 2000 Tim Janik * glib.h: diff --git a/ChangeLog.pre-2-2 b/ChangeLog.pre-2-2 index dcb42bb..cb3d870 100644 --- a/ChangeLog.pre-2-2 +++ b/ChangeLog.pre-2-2 @@ -1,3 +1,20 @@ +2000-02-01 Tor Lillqvist + + * glib.h + * gstrfuncs.c (g_filename_to_utf8, g_filename_from_utf8): New + functions for conversion between UTF-8 and the encoding expected + by C runtime functions like open() and stat(), and returned by + readdir(). + + Implement them on Win32 where we use the system "ANSI" codepage, + which might be single-byte or double-byte. On Unix, just skip the + issue for now and provide dummy implementations that return a copy + of the argument. + + * README.win32 + * build-dll + * glib.def: Minor updates. + Wed Jan 26 05:24:38 2000 Tim Janik * glib.h: diff --git a/ChangeLog.pre-2-4 b/ChangeLog.pre-2-4 index dcb42bb..cb3d870 100644 --- a/ChangeLog.pre-2-4 +++ b/ChangeLog.pre-2-4 @@ -1,3 +1,20 @@ +2000-02-01 Tor Lillqvist + + * glib.h + * gstrfuncs.c (g_filename_to_utf8, g_filename_from_utf8): New + functions for conversion between UTF-8 and the encoding expected + by C runtime functions like open() and stat(), and returned by + readdir(). + + Implement them on Win32 where we use the system "ANSI" codepage, + which might be single-byte or double-byte. On Unix, just skip the + issue for now and provide dummy implementations that return a copy + of the argument. + + * README.win32 + * build-dll + * glib.def: Minor updates. + Wed Jan 26 05:24:38 2000 Tim Janik * glib.h: diff --git a/ChangeLog.pre-2-6 b/ChangeLog.pre-2-6 index dcb42bb..cb3d870 100644 --- a/ChangeLog.pre-2-6 +++ b/ChangeLog.pre-2-6 @@ -1,3 +1,20 @@ +2000-02-01 Tor Lillqvist + + * glib.h + * gstrfuncs.c (g_filename_to_utf8, g_filename_from_utf8): New + functions for conversion between UTF-8 and the encoding expected + by C runtime functions like open() and stat(), and returned by + readdir(). + + Implement them on Win32 where we use the system "ANSI" codepage, + which might be single-byte or double-byte. On Unix, just skip the + issue for now and provide dummy implementations that return a copy + of the argument. + + * README.win32 + * build-dll + * glib.def: Minor updates. + Wed Jan 26 05:24:38 2000 Tim Janik * glib.h: diff --git a/ChangeLog.pre-2-8 b/ChangeLog.pre-2-8 index dcb42bb..cb3d870 100644 --- a/ChangeLog.pre-2-8 +++ b/ChangeLog.pre-2-8 @@ -1,3 +1,20 @@ +2000-02-01 Tor Lillqvist + + * glib.h + * gstrfuncs.c (g_filename_to_utf8, g_filename_from_utf8): New + functions for conversion between UTF-8 and the encoding expected + by C runtime functions like open() and stat(), and returned by + readdir(). + + Implement them on Win32 where we use the system "ANSI" codepage, + which might be single-byte or double-byte. On Unix, just skip the + issue for now and provide dummy implementations that return a copy + of the argument. + + * README.win32 + * build-dll + * glib.def: Minor updates. + Wed Jan 26 05:24:38 2000 Tim Janik * glib.h: diff --git a/README.win32 b/README.win32 index 34764cf..cbae149 100644 --- a/README.win32 +++ b/README.win32 @@ -54,6 +54,14 @@ changed to G_OS_WIN32. G_OS_WIN32 implies using the Microsoft C runtime MSVCRT.DLL. +Building software that use GLib or GTk+ +======================================= + +Unfortunately, even building software that just *use* GLib or GTk+ +also require to have the right compiler set up the right way, so if +you intend to use gcc, follow the relevant instructions below in that +case, too. + Pthreads library ================ @@ -91,12 +99,9 @@ Building with gcc I use the latest and greatest gcc, gcc-2.95.2. 2.95 will also work. Earlier version might, but you are on your own. -Read and understand these instruction carefully. If you don't +Read these instruction carefully and understand them. If you don't understand or can't follow the instructions, you probably shouldn't -want to build GLib (or GTk+ or GIMP) yourself anyway. Unfortunately, -even building software that just *use* GLib or GTk+ also require to -have the right compiler set up the right way, so follow these -instructions in that case, too. +want to build GLib (or GTk+ or GIMP) yourself anyway. 0) Get and install Cygwin B20.1. @@ -270,6 +275,26 @@ diff -ru2 ./w32api/include/wingdi.h ../../src/mingw-runtime-19991107/w32api/incl ================ cut here ================ + fpos_t should be long long with MSVCRT.DLL: + +================ cut here ================ +--- stdio.h~ Thu Aug 19 02:47:42 1999 ++++ stdio.h Mon Jan 17 21:58:20 2000 +@@ -296,6 +296,11 @@ + * it is fairly evident that the fpos_t type is a long (in CRTDLL.DLL). + * Perhaps an unsigned long? TODO? ++ * In MSVCRT.DLL it's a long long, however. + */ ++#ifdef __MSVCRT__ ++typedef long long fpos_t; ++#else + typedef long fpos_t; ++#endif + + int fgetpos (FILE* fileGetPosition, fpos_t* pfpos); +================ cut here ================ + + (I haven't checked yet if other small errors I have noticed in previous w32api header versions have been corrected.) diff --git a/build-dll b/build-dll index 4e13761..3ad30ac 100644 --- a/build-dll +++ b/build-dll @@ -1,10 +1,10 @@ #!/bin/bash -# Temporary hack until building dlls or executables with exported -# entry points is easier with gcc -mno-cygwin ("mingw32"). +# Temporary hack until building dlls is easier with gcc -mno-cygwin +# ("mingw32"). # This is usable with cygwin b20.1 and egcs-2.91.66 19990314 -# (egcs-1.1.2 release) or gcc-2.95 as distributed by Mumit Khan. For +# (egcs-1.1.2 release) or gcc-2.95(.2) as distributed by Mumit Khan. For # other combinations, no idea. GCC="gcc" diff --git a/glib.def b/glib.def index 3bc7466..a7f50e2 100644 --- a/glib.def +++ b/glib.def @@ -88,6 +88,8 @@ EXPORTS g_direct_equal g_direct_hash g_dirname + g_filename_from_utf8 + g_filename_to_utf8 g_free g_get_current_dir g_get_current_time @@ -348,7 +350,6 @@ EXPORTS g_source_remove g_source_remove_by_source_data g_source_remove_by_user_data - g_spaced_primes_closest g_static_mutex_get_mutex_impl g_static_private_get g_static_private_set @@ -446,3 +447,4 @@ EXPORTS glib_major_version glib_micro_version glib_minor_version + g_spaced_primes_closest diff --git a/glib.h b/glib.h index 0379126..0a6d646 100644 --- a/glib.h +++ b/glib.h @@ -1604,6 +1604,13 @@ gchar* g_strcompress (const gchar *source); */ gchar* g_strescape (const gchar *source, const gchar *exceptions); +/* + * Convert between the operating system (or C runtime) + * representation of file names and UTF-8. + */ +gchar* g_filename_to_utf8 (const gchar *opsysstring); +gchar* g_filename_from_utf8 (const gchar *utf8string); + /* Deprecated API: * gchar* g_strescape (const gchar *source); * Luckily this function wasn't much used. diff --git a/glib/glib.def b/glib/glib.def index 3bc7466..a7f50e2 100644 --- a/glib/glib.def +++ b/glib/glib.def @@ -88,6 +88,8 @@ EXPORTS g_direct_equal g_direct_hash g_dirname + g_filename_from_utf8 + g_filename_to_utf8 g_free g_get_current_dir g_get_current_time @@ -348,7 +350,6 @@ EXPORTS g_source_remove g_source_remove_by_source_data g_source_remove_by_user_data - g_spaced_primes_closest g_static_mutex_get_mutex_impl g_static_private_get g_static_private_set @@ -446,3 +447,4 @@ EXPORTS glib_major_version glib_micro_version glib_minor_version + g_spaced_primes_closest diff --git a/glib/glib.h b/glib/glib.h index 0379126..0a6d646 100644 --- a/glib/glib.h +++ b/glib/glib.h @@ -1604,6 +1604,13 @@ gchar* g_strcompress (const gchar *source); */ gchar* g_strescape (const gchar *source, const gchar *exceptions); +/* + * Convert between the operating system (or C runtime) + * representation of file names and UTF-8. + */ +gchar* g_filename_to_utf8 (const gchar *opsysstring); +gchar* g_filename_from_utf8 (const gchar *utf8string); + /* Deprecated API: * gchar* g_strescape (const gchar *source); * Luckily this function wasn't much used. diff --git a/glib/gstrfuncs.c b/glib/gstrfuncs.c index e549260..6df7757 100644 --- a/glib/gstrfuncs.c +++ b/glib/gstrfuncs.c @@ -42,6 +42,11 @@ #include #endif #include "glib.h" + +#ifdef G_OS_WIN32 +#include +#endif + /* do not include in this place since it * inteferes with g_strsignal() on some OSes */ @@ -1068,6 +1073,225 @@ g_strescape (const gchar *source, return dest; } +/* + * g_filename_to_utf8 + * + * Converts a string which is in the encoding used for file names by + * the C runtime (usually the same as that used by the operating + * system) in the current locale into a UTF-8 string. + */ + +gchar * +g_filename_to_utf8 (const gchar *opsysstring) +{ +#ifdef G_OS_WIN32 + + gint i, clen, wclen, first; + const gint len = strlen (opsysstring); + wchar_t *wcs, wc; + gchar *result, *bp; + const wchar_t *wcp; + + wcs = g_new (wchar_t, len); + wclen = MultiByteToWideChar (CP_ACP, 0, opsysstring, len, wcs, len); + + wcp = wcs; + clen = 0; + for (i = 0; i < wclen; i++) + { + wc = *wcp++; + + if (wc < 0x80) + clen += 1; + else if (wc < 0x800) + clen += 2; + else if (wc < 0x10000) + clen += 3; + else if (wc < 0x200000) + clen += 4; + else if (wc < 0x4000000) + clen += 5; + else + clen += 6; + } + + result = g_malloc (clen + 1); + + wcp = wcs; + bp = result; + for (i = 0; i < wclen; i++) + { + wc = *wcp++; + + if (wc < 0x80) + { + first = 0; + clen = 1; + } + else if (wc < 0x800) + { + first = 0xc0; + clen = 2; + } + else if (wc < 0x10000) + { + first = 0xe0; + clen = 3; + } + else if (wc < 0x200000) + { + first = 0xf0; + clen = 4; + } + else if (wc < 0x4000000) + { + first = 0xf8; + clen = 5; + } + else + { + first = 0xfc; + clen = 6; + } + + /* Woo-hoo! */ + switch (clen) + { + case 6: bp[5] = (wc & 0x3f) | 0x80; wc >>= 6; /* Fall through */ + case 5: bp[4] = (wc & 0x3f) | 0x80; wc >>= 6; /* Fall through */ + case 4: bp[3] = (wc & 0x3f) | 0x80; wc >>= 6; /* Fall through */ + case 3: bp[2] = (wc & 0x3f) | 0x80; wc >>= 6; /* Fall through */ + case 2: bp[1] = (wc & 0x3f) | 0x80; wc >>= 6; /* Fall through */ + case 1: bp[0] = wc | first; + } + + bp += clen; + } + *bp = 0; + + g_free (wcs); + + return result; + +#else + + return g_strdup (opsysstring); + +#endif +} + +/* + * g_filename_from_utf8 + * + * The reverse of g_filename_to_utf8. + */ + +gchar * +g_filename_from_utf8 (const gchar *utf8string) +{ +#ifdef G_OS_WIN32 + + gint i, mask, clen, wclen, mblen; + const gint len = strlen (utf8string); + wchar_t *wcs, *wcp; + gchar *result; + guchar *cp, *end, c; + gint n; + + /* First convert to wide chars */ + cp = (guchar *) utf8string; + end = cp + len; + n = 0; + wcs = g_new (wchar_t, len + 1); + wcp = wcs; + while (cp != end) + { + mask = 0; + c = *cp; + + if (c < 0x80) + { + clen = 1; + mask = 0x7f; + } + else if ((c & 0xe0) == 0xc0) + { + clen = 2; + mask = 0x1f; + } + else if ((c & 0xf0) == 0xe0) + { + clen = 3; + mask = 0x0f; + } + else if ((c & 0xf8) == 0xf0) + { + clen = 4; + mask = 0x07; + } + else if ((c & 0xfc) == 0xf8) + { + clen = 5; + mask = 0x03; + } + else if ((c & 0xfc) == 0xfc) + { + clen = 6; + mask = 0x01; + } + else + { + g_free (wcs); + return NULL; + } + + if (cp + clen > end) + { + g_free (wcs); + return NULL; + } + + *wcp = (cp[0] & mask); + for (i = 1; i < clen; i++) + { + if ((cp[i] & 0xc0) != 0x80) + { + g_free (wcs); + return NULL; + } + *wcp <<= 6; + *wcp |= (cp[i] & 0x3f); + } + + cp += clen; + wcp++; + n++; + } + if (cp != end) + { + g_free (wcs); + return NULL; + } + + /* n is the number of wide chars constructed */ + + /* Convert to a string in the current ANSI codepage */ + + result = g_new (gchar, 3 * n + 1); + mblen = WideCharToMultiByte (CP_ACP, 0, wcs, n, result, 3*n, NULL, NULL); + result[mblen] = 0; + g_free (wcs); + + return result; + +#else + + return g_strdup (utf8string); + +#endif +} + + /* blame Elliot for these next five routines */ gchar* g_strchug (gchar *string) diff --git a/gstrfuncs.c b/gstrfuncs.c index e549260..6df7757 100644 --- a/gstrfuncs.c +++ b/gstrfuncs.c @@ -42,6 +42,11 @@ #include #endif #include "glib.h" + +#ifdef G_OS_WIN32 +#include +#endif + /* do not include in this place since it * inteferes with g_strsignal() on some OSes */ @@ -1068,6 +1073,225 @@ g_strescape (const gchar *source, return dest; } +/* + * g_filename_to_utf8 + * + * Converts a string which is in the encoding used for file names by + * the C runtime (usually the same as that used by the operating + * system) in the current locale into a UTF-8 string. + */ + +gchar * +g_filename_to_utf8 (const gchar *opsysstring) +{ +#ifdef G_OS_WIN32 + + gint i, clen, wclen, first; + const gint len = strlen (opsysstring); + wchar_t *wcs, wc; + gchar *result, *bp; + const wchar_t *wcp; + + wcs = g_new (wchar_t, len); + wclen = MultiByteToWideChar (CP_ACP, 0, opsysstring, len, wcs, len); + + wcp = wcs; + clen = 0; + for (i = 0; i < wclen; i++) + { + wc = *wcp++; + + if (wc < 0x80) + clen += 1; + else if (wc < 0x800) + clen += 2; + else if (wc < 0x10000) + clen += 3; + else if (wc < 0x200000) + clen += 4; + else if (wc < 0x4000000) + clen += 5; + else + clen += 6; + } + + result = g_malloc (clen + 1); + + wcp = wcs; + bp = result; + for (i = 0; i < wclen; i++) + { + wc = *wcp++; + + if (wc < 0x80) + { + first = 0; + clen = 1; + } + else if (wc < 0x800) + { + first = 0xc0; + clen = 2; + } + else if (wc < 0x10000) + { + first = 0xe0; + clen = 3; + } + else if (wc < 0x200000) + { + first = 0xf0; + clen = 4; + } + else if (wc < 0x4000000) + { + first = 0xf8; + clen = 5; + } + else + { + first = 0xfc; + clen = 6; + } + + /* Woo-hoo! */ + switch (clen) + { + case 6: bp[5] = (wc & 0x3f) | 0x80; wc >>= 6; /* Fall through */ + case 5: bp[4] = (wc & 0x3f) | 0x80; wc >>= 6; /* Fall through */ + case 4: bp[3] = (wc & 0x3f) | 0x80; wc >>= 6; /* Fall through */ + case 3: bp[2] = (wc & 0x3f) | 0x80; wc >>= 6; /* Fall through */ + case 2: bp[1] = (wc & 0x3f) | 0x80; wc >>= 6; /* Fall through */ + case 1: bp[0] = wc | first; + } + + bp += clen; + } + *bp = 0; + + g_free (wcs); + + return result; + +#else + + return g_strdup (opsysstring); + +#endif +} + +/* + * g_filename_from_utf8 + * + * The reverse of g_filename_to_utf8. + */ + +gchar * +g_filename_from_utf8 (const gchar *utf8string) +{ +#ifdef G_OS_WIN32 + + gint i, mask, clen, wclen, mblen; + const gint len = strlen (utf8string); + wchar_t *wcs, *wcp; + gchar *result; + guchar *cp, *end, c; + gint n; + + /* First convert to wide chars */ + cp = (guchar *) utf8string; + end = cp + len; + n = 0; + wcs = g_new (wchar_t, len + 1); + wcp = wcs; + while (cp != end) + { + mask = 0; + c = *cp; + + if (c < 0x80) + { + clen = 1; + mask = 0x7f; + } + else if ((c & 0xe0) == 0xc0) + { + clen = 2; + mask = 0x1f; + } + else if ((c & 0xf0) == 0xe0) + { + clen = 3; + mask = 0x0f; + } + else if ((c & 0xf8) == 0xf0) + { + clen = 4; + mask = 0x07; + } + else if ((c & 0xfc) == 0xf8) + { + clen = 5; + mask = 0x03; + } + else if ((c & 0xfc) == 0xfc) + { + clen = 6; + mask = 0x01; + } + else + { + g_free (wcs); + return NULL; + } + + if (cp + clen > end) + { + g_free (wcs); + return NULL; + } + + *wcp = (cp[0] & mask); + for (i = 1; i < clen; i++) + { + if ((cp[i] & 0xc0) != 0x80) + { + g_free (wcs); + return NULL; + } + *wcp <<= 6; + *wcp |= (cp[i] & 0x3f); + } + + cp += clen; + wcp++; + n++; + } + if (cp != end) + { + g_free (wcs); + return NULL; + } + + /* n is the number of wide chars constructed */ + + /* Convert to a string in the current ANSI codepage */ + + result = g_new (gchar, 3 * n + 1); + mblen = WideCharToMultiByte (CP_ACP, 0, wcs, n, result, 3*n, NULL, NULL); + result[mblen] = 0; + g_free (wcs); + + return result; + +#else + + return g_strdup (utf8string); + +#endif +} + + /* blame Elliot for these next five routines */ gchar* g_strchug (gchar *string) -- 2.7.4