From 8bda01029faf3078a83c45fcdfbfcbd3368ad475 Mon Sep 17 00:00:00 2001 From: Robert Brady Date: Sun, 12 Nov 2000 21:23:55 +0000 Subject: [PATCH] Remove g_filename_{to,from}_utf8 2000-11-12 Robert Brady * gstrfuncs.c, gstrfuncs.h: Remove g_filename_{to,from}_utf8 * gconvert.c, gconvert.h: Add g_filename_{to,from}_utf8 and g_locale_{to.from}_utf8. The locale_ variant honours nl_langinfo(CODESET), the filename_ variant uses UTF-8 unless asked otherwise. (g_convert): Add G_CONVERT_ERROR_PARTIAL_INPUT error, if bytesread != length and no bytesread pointer passed. --- ChangeLog | 12 +++ ChangeLog.pre-2-0 | 12 +++ ChangeLog.pre-2-10 | 12 +++ ChangeLog.pre-2-12 | 12 +++ ChangeLog.pre-2-2 | 12 +++ ChangeLog.pre-2-4 | 12 +++ ChangeLog.pre-2-6 | 12 +++ ChangeLog.pre-2-8 | 12 +++ gconvert.c | 276 +++++++++++++++++++++++++++++++++++++++++++++++++++++ gconvert.h | 15 ++- glib/gconvert.c | 276 +++++++++++++++++++++++++++++++++++++++++++++++++++++ glib/gconvert.h | 15 ++- glib/gstrfuncs.c | 218 ------------------------------------------ glib/gstrfuncs.h | 6 -- gstrfuncs.c | 218 ------------------------------------------ gstrfuncs.h | 6 -- 16 files changed, 676 insertions(+), 450 deletions(-) diff --git a/ChangeLog b/ChangeLog index 200ce83..f663bd3 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,15 @@ +2000-11-12 Robert Brady + + * gstrfuncs.c, gstrfuncs.h: Remove g_filename_{to,from}_utf8 + + * gconvert.c, gconvert.h: Add g_filename_{to,from}_utf8 and + g_locale_{to.from}_utf8. The locale_ variant honours + nl_langinfo(CODESET), the filename_ variant uses UTF-8 unless + asked otherwise. + + (g_convert): Add G_CONVERT_ERROR_PARTIAL_INPUT error, if bytesread + != length and no bytesread pointer passed. + Sun Nov 12 15:29:53 2000 Owen Taylor * gfileutils.[ch]: template is a reserved word in diff --git a/ChangeLog.pre-2-0 b/ChangeLog.pre-2-0 index 200ce83..f663bd3 100644 --- a/ChangeLog.pre-2-0 +++ b/ChangeLog.pre-2-0 @@ -1,3 +1,15 @@ +2000-11-12 Robert Brady + + * gstrfuncs.c, gstrfuncs.h: Remove g_filename_{to,from}_utf8 + + * gconvert.c, gconvert.h: Add g_filename_{to,from}_utf8 and + g_locale_{to.from}_utf8. The locale_ variant honours + nl_langinfo(CODESET), the filename_ variant uses UTF-8 unless + asked otherwise. + + (g_convert): Add G_CONVERT_ERROR_PARTIAL_INPUT error, if bytesread + != length and no bytesread pointer passed. + Sun Nov 12 15:29:53 2000 Owen Taylor * gfileutils.[ch]: template is a reserved word in diff --git a/ChangeLog.pre-2-10 b/ChangeLog.pre-2-10 index 200ce83..f663bd3 100644 --- a/ChangeLog.pre-2-10 +++ b/ChangeLog.pre-2-10 @@ -1,3 +1,15 @@ +2000-11-12 Robert Brady + + * gstrfuncs.c, gstrfuncs.h: Remove g_filename_{to,from}_utf8 + + * gconvert.c, gconvert.h: Add g_filename_{to,from}_utf8 and + g_locale_{to.from}_utf8. The locale_ variant honours + nl_langinfo(CODESET), the filename_ variant uses UTF-8 unless + asked otherwise. + + (g_convert): Add G_CONVERT_ERROR_PARTIAL_INPUT error, if bytesread + != length and no bytesread pointer passed. + Sun Nov 12 15:29:53 2000 Owen Taylor * gfileutils.[ch]: template is a reserved word in diff --git a/ChangeLog.pre-2-12 b/ChangeLog.pre-2-12 index 200ce83..f663bd3 100644 --- a/ChangeLog.pre-2-12 +++ b/ChangeLog.pre-2-12 @@ -1,3 +1,15 @@ +2000-11-12 Robert Brady + + * gstrfuncs.c, gstrfuncs.h: Remove g_filename_{to,from}_utf8 + + * gconvert.c, gconvert.h: Add g_filename_{to,from}_utf8 and + g_locale_{to.from}_utf8. The locale_ variant honours + nl_langinfo(CODESET), the filename_ variant uses UTF-8 unless + asked otherwise. + + (g_convert): Add G_CONVERT_ERROR_PARTIAL_INPUT error, if bytesread + != length and no bytesread pointer passed. + Sun Nov 12 15:29:53 2000 Owen Taylor * gfileutils.[ch]: template is a reserved word in diff --git a/ChangeLog.pre-2-2 b/ChangeLog.pre-2-2 index 200ce83..f663bd3 100644 --- a/ChangeLog.pre-2-2 +++ b/ChangeLog.pre-2-2 @@ -1,3 +1,15 @@ +2000-11-12 Robert Brady + + * gstrfuncs.c, gstrfuncs.h: Remove g_filename_{to,from}_utf8 + + * gconvert.c, gconvert.h: Add g_filename_{to,from}_utf8 and + g_locale_{to.from}_utf8. The locale_ variant honours + nl_langinfo(CODESET), the filename_ variant uses UTF-8 unless + asked otherwise. + + (g_convert): Add G_CONVERT_ERROR_PARTIAL_INPUT error, if bytesread + != length and no bytesread pointer passed. + Sun Nov 12 15:29:53 2000 Owen Taylor * gfileutils.[ch]: template is a reserved word in diff --git a/ChangeLog.pre-2-4 b/ChangeLog.pre-2-4 index 200ce83..f663bd3 100644 --- a/ChangeLog.pre-2-4 +++ b/ChangeLog.pre-2-4 @@ -1,3 +1,15 @@ +2000-11-12 Robert Brady + + * gstrfuncs.c, gstrfuncs.h: Remove g_filename_{to,from}_utf8 + + * gconvert.c, gconvert.h: Add g_filename_{to,from}_utf8 and + g_locale_{to.from}_utf8. The locale_ variant honours + nl_langinfo(CODESET), the filename_ variant uses UTF-8 unless + asked otherwise. + + (g_convert): Add G_CONVERT_ERROR_PARTIAL_INPUT error, if bytesread + != length and no bytesread pointer passed. + Sun Nov 12 15:29:53 2000 Owen Taylor * gfileutils.[ch]: template is a reserved word in diff --git a/ChangeLog.pre-2-6 b/ChangeLog.pre-2-6 index 200ce83..f663bd3 100644 --- a/ChangeLog.pre-2-6 +++ b/ChangeLog.pre-2-6 @@ -1,3 +1,15 @@ +2000-11-12 Robert Brady + + * gstrfuncs.c, gstrfuncs.h: Remove g_filename_{to,from}_utf8 + + * gconvert.c, gconvert.h: Add g_filename_{to,from}_utf8 and + g_locale_{to.from}_utf8. The locale_ variant honours + nl_langinfo(CODESET), the filename_ variant uses UTF-8 unless + asked otherwise. + + (g_convert): Add G_CONVERT_ERROR_PARTIAL_INPUT error, if bytesread + != length and no bytesread pointer passed. + Sun Nov 12 15:29:53 2000 Owen Taylor * gfileutils.[ch]: template is a reserved word in diff --git a/ChangeLog.pre-2-8 b/ChangeLog.pre-2-8 index 200ce83..f663bd3 100644 --- a/ChangeLog.pre-2-8 +++ b/ChangeLog.pre-2-8 @@ -1,3 +1,15 @@ +2000-11-12 Robert Brady + + * gstrfuncs.c, gstrfuncs.h: Remove g_filename_{to,from}_utf8 + + * gconvert.c, gconvert.h: Add g_filename_{to,from}_utf8 and + g_locale_{to.from}_utf8. The locale_ variant honours + nl_langinfo(CODESET), the filename_ variant uses UTF-8 unless + asked otherwise. + + (g_convert): Add G_CONVERT_ERROR_PARTIAL_INPUT error, if bytesread + != length and no bytesread pointer passed. + Sun Nov 12 15:29:53 2000 Owen Taylor * gfileutils.[ch]: template is a reserved word in diff --git a/gconvert.c b/gconvert.c index 7e596b4..97a0fe1 100644 --- a/gconvert.c +++ b/gconvert.c @@ -23,6 +23,11 @@ #include #include #include +#include + +#ifdef G_OS_WIN32 +#include +#endif #include "glib.h" @@ -173,6 +178,15 @@ g_convert (const gchar *str, if (bytes_read) *bytes_read = p - str; + else + { + if ((p - str) != len) + { + g_set_error (error, G_CONVERT_ERROR, G_CONVERT_ERROR_PARTIAL_INPUT, + _("Partial character sequence at end of input")); + have_error = TRUE; + } + } if (bytes_written) *bytes_written = outp - dest; /* Doesn't include '\0' */ @@ -402,3 +416,265 @@ g_convert_with_fallback (const gchar *str, else return dest; } + +/* + * g_locale_to_utf8 + * + * Converts a string which is in the encoding used for strings by + * the C runtime (usually the same as that used by the operating + * system) in the current locale into a UTF-8 string. + */ + +gchar * +g_locale_to_utf8 (const gchar *opsysstring, GError **error) +{ +#ifdef G_OS_WIN32 + + gint i, clen, wclen, first; + const gint len = strlen (opsysstring); + wchar_t *wcs, wc; + gchar *result, *bp; + const wchar_t *wcp; + + wcs = g_new (wchar_t, len); + wclen = MultiByteToWideChar (CP_ACP, 0, opsysstring, len, wcs, len); + + wcp = wcs; + clen = 0; + for (i = 0; i < wclen; i++) + { + wc = *wcp++; + + if (wc < 0x80) + clen += 1; + else if (wc < 0x800) + clen += 2; + else if (wc < 0x10000) + clen += 3; + else if (wc < 0x200000) + clen += 4; + else if (wc < 0x4000000) + clen += 5; + else + clen += 6; + } + + result = g_malloc (clen + 1); + + wcp = wcs; + bp = result; + for (i = 0; i < wclen; i++) + { + wc = *wcp++; + + if (wc < 0x80) + { + first = 0; + clen = 1; + } + else if (wc < 0x800) + { + first = 0xc0; + clen = 2; + } + else if (wc < 0x10000) + { + first = 0xe0; + clen = 3; + } + else if (wc < 0x200000) + { + first = 0xf0; + clen = 4; + } + else if (wc < 0x4000000) + { + first = 0xf8; + clen = 5; + } + else + { + first = 0xfc; + clen = 6; + } + + /* Woo-hoo! */ + switch (clen) + { + case 6: bp[5] = (wc & 0x3f) | 0x80; wc >>= 6; /* Fall through */ + case 5: bp[4] = (wc & 0x3f) | 0x80; wc >>= 6; /* Fall through */ + case 4: bp[3] = (wc & 0x3f) | 0x80; wc >>= 6; /* Fall through */ + case 3: bp[2] = (wc & 0x3f) | 0x80; wc >>= 6; /* Fall through */ + case 2: bp[1] = (wc & 0x3f) | 0x80; wc >>= 6; /* Fall through */ + case 1: bp[0] = wc | first; + } + + bp += clen; + } + *bp = 0; + + g_free (wcs); + + return result; + +#else + + char *charset, *str; + + if (g_get_charset (&charset)) + return g_strdup (opsysstring); + + str = g_convert (opsysstring, strlen (opsysstring), + "UTF-8", charset, NULL, NULL, error); + + return str; +#endif +} + +/* + * g_locale_from_utf8 + * + * The reverse of g_locale_to_utf8. + */ + +gchar * +g_locale_from_utf8 (const gchar *utf8string, GError **error) +{ +#ifdef G_OS_WIN32 + + gint i, mask, clen, mblen; + const gint len = strlen (utf8string); + wchar_t *wcs, *wcp; + gchar *result; + guchar *cp, *end, c; + gint n; + + /* First convert to wide chars */ + cp = (guchar *) utf8string; + end = cp + len; + n = 0; + wcs = g_new (wchar_t, len + 1); + wcp = wcs; + while (cp != end) + { + mask = 0; + c = *cp; + + if (c < 0x80) + { + clen = 1; + mask = 0x7f; + } + else if ((c & 0xe0) == 0xc0) + { + clen = 2; + mask = 0x1f; + } + else if ((c & 0xf0) == 0xe0) + { + clen = 3; + mask = 0x0f; + } + else if ((c & 0xf8) == 0xf0) + { + clen = 4; + mask = 0x07; + } + else if ((c & 0xfc) == 0xf8) + { + clen = 5; + mask = 0x03; + } + else if ((c & 0xfc) == 0xfc) + { + clen = 6; + mask = 0x01; + } + else + { + g_free (wcs); + return NULL; + } + + if (cp + clen > end) + { + g_free (wcs); + return NULL; + } + + *wcp = (cp[0] & mask); + for (i = 1; i < clen; i++) + { + if ((cp[i] & 0xc0) != 0x80) + { + g_free (wcs); + return NULL; + } + *wcp <<= 6; + *wcp |= (cp[i] & 0x3f); + } + + cp += clen; + wcp++; + n++; + } + if (cp != end) + { + g_free (wcs); + return NULL; + } + + /* n is the number of wide chars constructed */ + + /* Convert to a string in the current ANSI codepage */ + + result = g_new (gchar, 3 * n + 1); + mblen = WideCharToMultiByte (CP_ACP, 0, wcs, n, result, 3*n, NULL, NULL); + result[mblen] = 0; + g_free (wcs); + + return result; + +#else + + gchar *charset, *str; + + if (g_get_charset (&charset)) + return g_strdup (utf8string); + + str = g_convert (utf8string, strlen (utf8string), + charset, "UTF-8", NULL, NULL, error); + + return str; + +#endif +} + +/* Filenames are in UTF-8 unless specificially requested otherwise */ + +gchar* +g_filename_to_utf8 (const gchar *string, GError **error) +{ +#ifdef G_OS_WIN32 + return g_locale_to_utf8 (string, error); +#else + if (getenv ("G_BROKEN_FILENAMES")) + return g_locale_to_utf8 (string, error); + + return g_strdup (string); +#endif +} + +gchar* +g_filename_from_utf8 (const gchar *string, GError **error) +{ +#ifdef G_OS_WIN32 + return g_locale_from_utf8 (string, error); +#else + if (getenv ("G_BROKEN_FILENAMES")) + return g_locale_from_utf8 (string, error); + + return g_strdup (string); +#endif +} + diff --git a/gconvert.h b/gconvert.h index 0586a19..1eaed3e 100644 --- a/gconvert.h +++ b/gconvert.h @@ -35,7 +35,8 @@ typedef enum { G_CONVERT_ERROR_NO_CONVERSION, G_CONVERT_ERROR_ILLEGAL_SEQUENCE, - G_CONVERT_ERROR_FAILED + G_CONVERT_ERROR_FAILED, + G_CONVERT_ERROR_PARTIAL_INPUT, } GConvertError; #define G_CONVERT_ERROR g_convert_error_quark() @@ -57,6 +58,18 @@ gchar* g_convert_with_fallback (const gchar *str, gint *bytes_written, GError **error); + +/* Convert between libc's idea of strings and UTF-8. + */ +gchar* g_locale_to_utf8 (const gchar *opsysstring, GError **error); +gchar* g_locale_from_utf8 (const gchar *utf8string, GError **error); + +/* Convert between the operating system (or C runtime) + * representation of file names and UTF-8. + */ +gchar* g_filename_to_utf8 (const gchar *opsysstring, GError **error); +gchar* g_filename_from_utf8 (const gchar *utf8string, GError **error); + G_END_DECLS #endif /* __G_CONVERT_H__ */ diff --git a/glib/gconvert.c b/glib/gconvert.c index 7e596b4..97a0fe1 100644 --- a/glib/gconvert.c +++ b/glib/gconvert.c @@ -23,6 +23,11 @@ #include #include #include +#include + +#ifdef G_OS_WIN32 +#include +#endif #include "glib.h" @@ -173,6 +178,15 @@ g_convert (const gchar *str, if (bytes_read) *bytes_read = p - str; + else + { + if ((p - str) != len) + { + g_set_error (error, G_CONVERT_ERROR, G_CONVERT_ERROR_PARTIAL_INPUT, + _("Partial character sequence at end of input")); + have_error = TRUE; + } + } if (bytes_written) *bytes_written = outp - dest; /* Doesn't include '\0' */ @@ -402,3 +416,265 @@ g_convert_with_fallback (const gchar *str, else return dest; } + +/* + * g_locale_to_utf8 + * + * Converts a string which is in the encoding used for strings by + * the C runtime (usually the same as that used by the operating + * system) in the current locale into a UTF-8 string. + */ + +gchar * +g_locale_to_utf8 (const gchar *opsysstring, GError **error) +{ +#ifdef G_OS_WIN32 + + gint i, clen, wclen, first; + const gint len = strlen (opsysstring); + wchar_t *wcs, wc; + gchar *result, *bp; + const wchar_t *wcp; + + wcs = g_new (wchar_t, len); + wclen = MultiByteToWideChar (CP_ACP, 0, opsysstring, len, wcs, len); + + wcp = wcs; + clen = 0; + for (i = 0; i < wclen; i++) + { + wc = *wcp++; + + if (wc < 0x80) + clen += 1; + else if (wc < 0x800) + clen += 2; + else if (wc < 0x10000) + clen += 3; + else if (wc < 0x200000) + clen += 4; + else if (wc < 0x4000000) + clen += 5; + else + clen += 6; + } + + result = g_malloc (clen + 1); + + wcp = wcs; + bp = result; + for (i = 0; i < wclen; i++) + { + wc = *wcp++; + + if (wc < 0x80) + { + first = 0; + clen = 1; + } + else if (wc < 0x800) + { + first = 0xc0; + clen = 2; + } + else if (wc < 0x10000) + { + first = 0xe0; + clen = 3; + } + else if (wc < 0x200000) + { + first = 0xf0; + clen = 4; + } + else if (wc < 0x4000000) + { + first = 0xf8; + clen = 5; + } + else + { + first = 0xfc; + clen = 6; + } + + /* Woo-hoo! */ + switch (clen) + { + case 6: bp[5] = (wc & 0x3f) | 0x80; wc >>= 6; /* Fall through */ + case 5: bp[4] = (wc & 0x3f) | 0x80; wc >>= 6; /* Fall through */ + case 4: bp[3] = (wc & 0x3f) | 0x80; wc >>= 6; /* Fall through */ + case 3: bp[2] = (wc & 0x3f) | 0x80; wc >>= 6; /* Fall through */ + case 2: bp[1] = (wc & 0x3f) | 0x80; wc >>= 6; /* Fall through */ + case 1: bp[0] = wc | first; + } + + bp += clen; + } + *bp = 0; + + g_free (wcs); + + return result; + +#else + + char *charset, *str; + + if (g_get_charset (&charset)) + return g_strdup (opsysstring); + + str = g_convert (opsysstring, strlen (opsysstring), + "UTF-8", charset, NULL, NULL, error); + + return str; +#endif +} + +/* + * g_locale_from_utf8 + * + * The reverse of g_locale_to_utf8. + */ + +gchar * +g_locale_from_utf8 (const gchar *utf8string, GError **error) +{ +#ifdef G_OS_WIN32 + + gint i, mask, clen, mblen; + const gint len = strlen (utf8string); + wchar_t *wcs, *wcp; + gchar *result; + guchar *cp, *end, c; + gint n; + + /* First convert to wide chars */ + cp = (guchar *) utf8string; + end = cp + len; + n = 0; + wcs = g_new (wchar_t, len + 1); + wcp = wcs; + while (cp != end) + { + mask = 0; + c = *cp; + + if (c < 0x80) + { + clen = 1; + mask = 0x7f; + } + else if ((c & 0xe0) == 0xc0) + { + clen = 2; + mask = 0x1f; + } + else if ((c & 0xf0) == 0xe0) + { + clen = 3; + mask = 0x0f; + } + else if ((c & 0xf8) == 0xf0) + { + clen = 4; + mask = 0x07; + } + else if ((c & 0xfc) == 0xf8) + { + clen = 5; + mask = 0x03; + } + else if ((c & 0xfc) == 0xfc) + { + clen = 6; + mask = 0x01; + } + else + { + g_free (wcs); + return NULL; + } + + if (cp + clen > end) + { + g_free (wcs); + return NULL; + } + + *wcp = (cp[0] & mask); + for (i = 1; i < clen; i++) + { + if ((cp[i] & 0xc0) != 0x80) + { + g_free (wcs); + return NULL; + } + *wcp <<= 6; + *wcp |= (cp[i] & 0x3f); + } + + cp += clen; + wcp++; + n++; + } + if (cp != end) + { + g_free (wcs); + return NULL; + } + + /* n is the number of wide chars constructed */ + + /* Convert to a string in the current ANSI codepage */ + + result = g_new (gchar, 3 * n + 1); + mblen = WideCharToMultiByte (CP_ACP, 0, wcs, n, result, 3*n, NULL, NULL); + result[mblen] = 0; + g_free (wcs); + + return result; + +#else + + gchar *charset, *str; + + if (g_get_charset (&charset)) + return g_strdup (utf8string); + + str = g_convert (utf8string, strlen (utf8string), + charset, "UTF-8", NULL, NULL, error); + + return str; + +#endif +} + +/* Filenames are in UTF-8 unless specificially requested otherwise */ + +gchar* +g_filename_to_utf8 (const gchar *string, GError **error) +{ +#ifdef G_OS_WIN32 + return g_locale_to_utf8 (string, error); +#else + if (getenv ("G_BROKEN_FILENAMES")) + return g_locale_to_utf8 (string, error); + + return g_strdup (string); +#endif +} + +gchar* +g_filename_from_utf8 (const gchar *string, GError **error) +{ +#ifdef G_OS_WIN32 + return g_locale_from_utf8 (string, error); +#else + if (getenv ("G_BROKEN_FILENAMES")) + return g_locale_from_utf8 (string, error); + + return g_strdup (string); +#endif +} + diff --git a/glib/gconvert.h b/glib/gconvert.h index 0586a19..1eaed3e 100644 --- a/glib/gconvert.h +++ b/glib/gconvert.h @@ -35,7 +35,8 @@ typedef enum { G_CONVERT_ERROR_NO_CONVERSION, G_CONVERT_ERROR_ILLEGAL_SEQUENCE, - G_CONVERT_ERROR_FAILED + G_CONVERT_ERROR_FAILED, + G_CONVERT_ERROR_PARTIAL_INPUT, } GConvertError; #define G_CONVERT_ERROR g_convert_error_quark() @@ -57,6 +58,18 @@ gchar* g_convert_with_fallback (const gchar *str, gint *bytes_written, GError **error); + +/* Convert between libc's idea of strings and UTF-8. + */ +gchar* g_locale_to_utf8 (const gchar *opsysstring, GError **error); +gchar* g_locale_from_utf8 (const gchar *utf8string, GError **error); + +/* Convert between the operating system (or C runtime) + * representation of file names and UTF-8. + */ +gchar* g_filename_to_utf8 (const gchar *opsysstring, GError **error); +gchar* g_filename_from_utf8 (const gchar *utf8string, GError **error); + G_END_DECLS #endif /* __G_CONVERT_H__ */ diff --git a/glib/gstrfuncs.c b/glib/gstrfuncs.c index 2d1ae71..69e19b1 100644 --- a/glib/gstrfuncs.c +++ b/glib/gstrfuncs.c @@ -1230,224 +1230,6 @@ g_strescape (const gchar *source, return dest; } -/* - * g_filename_to_utf8 - * - * Converts a string which is in the encoding used for file names by - * the C runtime (usually the same as that used by the operating - * system) in the current locale into a UTF-8 string. - */ - -gchar * -g_filename_to_utf8 (const gchar *opsysstring) -{ -#ifdef G_OS_WIN32 - - gint i, clen, wclen, first; - const gint len = strlen (opsysstring); - wchar_t *wcs, wc; - gchar *result, *bp; - const wchar_t *wcp; - - wcs = g_new (wchar_t, len); - wclen = MultiByteToWideChar (CP_ACP, 0, opsysstring, len, wcs, len); - - wcp = wcs; - clen = 0; - for (i = 0; i < wclen; i++) - { - wc = *wcp++; - - if (wc < 0x80) - clen += 1; - else if (wc < 0x800) - clen += 2; - else if (wc < 0x10000) - clen += 3; - else if (wc < 0x200000) - clen += 4; - else if (wc < 0x4000000) - clen += 5; - else - clen += 6; - } - - result = g_malloc (clen + 1); - - wcp = wcs; - bp = result; - for (i = 0; i < wclen; i++) - { - wc = *wcp++; - - if (wc < 0x80) - { - first = 0; - clen = 1; - } - else if (wc < 0x800) - { - first = 0xc0; - clen = 2; - } - else if (wc < 0x10000) - { - first = 0xe0; - clen = 3; - } - else if (wc < 0x200000) - { - first = 0xf0; - clen = 4; - } - else if (wc < 0x4000000) - { - first = 0xf8; - clen = 5; - } - else - { - first = 0xfc; - clen = 6; - } - - /* Woo-hoo! */ - switch (clen) - { - case 6: bp[5] = (wc & 0x3f) | 0x80; wc >>= 6; /* Fall through */ - case 5: bp[4] = (wc & 0x3f) | 0x80; wc >>= 6; /* Fall through */ - case 4: bp[3] = (wc & 0x3f) | 0x80; wc >>= 6; /* Fall through */ - case 3: bp[2] = (wc & 0x3f) | 0x80; wc >>= 6; /* Fall through */ - case 2: bp[1] = (wc & 0x3f) | 0x80; wc >>= 6; /* Fall through */ - case 1: bp[0] = wc | first; - } - - bp += clen; - } - *bp = 0; - - g_free (wcs); - - return result; - -#else - - return g_strdup (opsysstring); - -#endif -} - -/* - * g_filename_from_utf8 - * - * The reverse of g_filename_to_utf8. - */ - -gchar * -g_filename_from_utf8 (const gchar *utf8string) -{ -#ifdef G_OS_WIN32 - - gint i, mask, clen, mblen; - const gint len = strlen (utf8string); - wchar_t *wcs, *wcp; - gchar *result; - guchar *cp, *end, c; - gint n; - - /* First convert to wide chars */ - cp = (guchar *) utf8string; - end = cp + len; - n = 0; - wcs = g_new (wchar_t, len + 1); - wcp = wcs; - while (cp != end) - { - mask = 0; - c = *cp; - - if (c < 0x80) - { - clen = 1; - mask = 0x7f; - } - else if ((c & 0xe0) == 0xc0) - { - clen = 2; - mask = 0x1f; - } - else if ((c & 0xf0) == 0xe0) - { - clen = 3; - mask = 0x0f; - } - else if ((c & 0xf8) == 0xf0) - { - clen = 4; - mask = 0x07; - } - else if ((c & 0xfc) == 0xf8) - { - clen = 5; - mask = 0x03; - } - else if ((c & 0xfc) == 0xfc) - { - clen = 6; - mask = 0x01; - } - else - { - g_free (wcs); - return NULL; - } - - if (cp + clen > end) - { - g_free (wcs); - return NULL; - } - - *wcp = (cp[0] & mask); - for (i = 1; i < clen; i++) - { - if ((cp[i] & 0xc0) != 0x80) - { - g_free (wcs); - return NULL; - } - *wcp <<= 6; - *wcp |= (cp[i] & 0x3f); - } - - cp += clen; - wcp++; - n++; - } - if (cp != end) - { - g_free (wcs); - return NULL; - } - - /* n is the number of wide chars constructed */ - - /* Convert to a string in the current ANSI codepage */ - - result = g_new (gchar, 3 * n + 1); - mblen = WideCharToMultiByte (CP_ACP, 0, wcs, n, result, 3*n, NULL, NULL); - result[mblen] = 0; - g_free (wcs); - - return result; - -#else - - return g_strdup (utf8string); - -#endif -} - gchar* g_strchug (gchar *string) { diff --git a/glib/gstrfuncs.h b/glib/gstrfuncs.h index ffec17e..516c310 100644 --- a/glib/gstrfuncs.h +++ b/glib/gstrfuncs.h @@ -103,12 +103,6 @@ gchar* g_strescape (const gchar *source, gpointer g_memdup (gconstpointer mem, guint byte_size); -/* Convert between the operating system (or C runtime) - * representation of file names and UTF-8. - */ -gchar* g_filename_to_utf8 (const gchar *opsysstring); -gchar* g_filename_from_utf8 (const gchar *utf8string); - /* NULL terminated string arrays. * g_strsplit() splits up string into max_tokens tokens at delim and * returns a newly allocated string array. diff --git a/gstrfuncs.c b/gstrfuncs.c index 2d1ae71..69e19b1 100644 --- a/gstrfuncs.c +++ b/gstrfuncs.c @@ -1230,224 +1230,6 @@ g_strescape (const gchar *source, return dest; } -/* - * g_filename_to_utf8 - * - * Converts a string which is in the encoding used for file names by - * the C runtime (usually the same as that used by the operating - * system) in the current locale into a UTF-8 string. - */ - -gchar * -g_filename_to_utf8 (const gchar *opsysstring) -{ -#ifdef G_OS_WIN32 - - gint i, clen, wclen, first; - const gint len = strlen (opsysstring); - wchar_t *wcs, wc; - gchar *result, *bp; - const wchar_t *wcp; - - wcs = g_new (wchar_t, len); - wclen = MultiByteToWideChar (CP_ACP, 0, opsysstring, len, wcs, len); - - wcp = wcs; - clen = 0; - for (i = 0; i < wclen; i++) - { - wc = *wcp++; - - if (wc < 0x80) - clen += 1; - else if (wc < 0x800) - clen += 2; - else if (wc < 0x10000) - clen += 3; - else if (wc < 0x200000) - clen += 4; - else if (wc < 0x4000000) - clen += 5; - else - clen += 6; - } - - result = g_malloc (clen + 1); - - wcp = wcs; - bp = result; - for (i = 0; i < wclen; i++) - { - wc = *wcp++; - - if (wc < 0x80) - { - first = 0; - clen = 1; - } - else if (wc < 0x800) - { - first = 0xc0; - clen = 2; - } - else if (wc < 0x10000) - { - first = 0xe0; - clen = 3; - } - else if (wc < 0x200000) - { - first = 0xf0; - clen = 4; - } - else if (wc < 0x4000000) - { - first = 0xf8; - clen = 5; - } - else - { - first = 0xfc; - clen = 6; - } - - /* Woo-hoo! */ - switch (clen) - { - case 6: bp[5] = (wc & 0x3f) | 0x80; wc >>= 6; /* Fall through */ - case 5: bp[4] = (wc & 0x3f) | 0x80; wc >>= 6; /* Fall through */ - case 4: bp[3] = (wc & 0x3f) | 0x80; wc >>= 6; /* Fall through */ - case 3: bp[2] = (wc & 0x3f) | 0x80; wc >>= 6; /* Fall through */ - case 2: bp[1] = (wc & 0x3f) | 0x80; wc >>= 6; /* Fall through */ - case 1: bp[0] = wc | first; - } - - bp += clen; - } - *bp = 0; - - g_free (wcs); - - return result; - -#else - - return g_strdup (opsysstring); - -#endif -} - -/* - * g_filename_from_utf8 - * - * The reverse of g_filename_to_utf8. - */ - -gchar * -g_filename_from_utf8 (const gchar *utf8string) -{ -#ifdef G_OS_WIN32 - - gint i, mask, clen, mblen; - const gint len = strlen (utf8string); - wchar_t *wcs, *wcp; - gchar *result; - guchar *cp, *end, c; - gint n; - - /* First convert to wide chars */ - cp = (guchar *) utf8string; - end = cp + len; - n = 0; - wcs = g_new (wchar_t, len + 1); - wcp = wcs; - while (cp != end) - { - mask = 0; - c = *cp; - - if (c < 0x80) - { - clen = 1; - mask = 0x7f; - } - else if ((c & 0xe0) == 0xc0) - { - clen = 2; - mask = 0x1f; - } - else if ((c & 0xf0) == 0xe0) - { - clen = 3; - mask = 0x0f; - } - else if ((c & 0xf8) == 0xf0) - { - clen = 4; - mask = 0x07; - } - else if ((c & 0xfc) == 0xf8) - { - clen = 5; - mask = 0x03; - } - else if ((c & 0xfc) == 0xfc) - { - clen = 6; - mask = 0x01; - } - else - { - g_free (wcs); - return NULL; - } - - if (cp + clen > end) - { - g_free (wcs); - return NULL; - } - - *wcp = (cp[0] & mask); - for (i = 1; i < clen; i++) - { - if ((cp[i] & 0xc0) != 0x80) - { - g_free (wcs); - return NULL; - } - *wcp <<= 6; - *wcp |= (cp[i] & 0x3f); - } - - cp += clen; - wcp++; - n++; - } - if (cp != end) - { - g_free (wcs); - return NULL; - } - - /* n is the number of wide chars constructed */ - - /* Convert to a string in the current ANSI codepage */ - - result = g_new (gchar, 3 * n + 1); - mblen = WideCharToMultiByte (CP_ACP, 0, wcs, n, result, 3*n, NULL, NULL); - result[mblen] = 0; - g_free (wcs); - - return result; - -#else - - return g_strdup (utf8string); - -#endif -} - gchar* g_strchug (gchar *string) { diff --git a/gstrfuncs.h b/gstrfuncs.h index ffec17e..516c310 100644 --- a/gstrfuncs.h +++ b/gstrfuncs.h @@ -103,12 +103,6 @@ gchar* g_strescape (const gchar *source, gpointer g_memdup (gconstpointer mem, guint byte_size); -/* Convert between the operating system (or C runtime) - * representation of file names and UTF-8. - */ -gchar* g_filename_to_utf8 (const gchar *opsysstring); -gchar* g_filename_from_utf8 (const gchar *utf8string); - /* NULL terminated string arrays. * g_strsplit() splits up string into max_tokens tokens at delim and * returns a newly allocated string array. -- 2.7.4