X-Git-Url: http://review.tizen.org/git/?a=blobdiff_plain;f=glib%2Fgconvert.c;h=368bef57c09139b7e709275bbdc15f438e717552;hb=4c2a6595889eff44fa5f610e6c69016702100e95;hp=7a20cb56ade90d15f0d217af26dc7f1da821484f;hpb=8ef30758d56b21aa20c23e5c90a78142183003ed;p=platform%2Fupstream%2Fglib.git diff --git a/glib/gconvert.c b/glib/gconvert.c index 7a20cb5..368bef5 100644 --- a/glib/gconvert.c +++ b/glib/gconvert.c @@ -21,8 +21,7 @@ */ #include "config.h" - -#include "glib.h" +#include "glibconfig.h" #ifndef G_OS_WIN32 #include @@ -32,10 +31,6 @@ #include #include -#include "gprintfint.h" -#include "gthreadprivate.h" -#include "gunicode.h" - #ifdef G_OS_WIN32 #include "win_iconv.c" #endif @@ -46,22 +41,160 @@ #undef STRICT #endif +#include "gconvert.h" + +#include "gcharsetprivate.h" +#include "gslist.h" +#include "gstrfuncs.h" +#include "gtestutils.h" +#include "gthread.h" +#include "gunicode.h" +#include "gfileutils.h" + +#ifdef NEED_ICONV_CACHE +#include "glist.h" +#include "ghash.h" +#endif + #include "glibintl.h" #if defined(USE_LIBICONV_GNU) && !defined (_LIBICONV_H) #error GNU libiconv in use but included iconv.h not from libiconv #endif -#if !defined(USE_LIBICONV_GNU) && defined (_LIBICONV_H) +#if !defined(USE_LIBICONV_GNU) && defined (_LIBICONV_H) \ + && !defined (__APPLE_CC__) && !defined (__LP_64__) #error GNU libiconv not in use but included iconv.h is from libiconv #endif -#include "galias.h" -GQuark -g_convert_error_quark (void) -{ - return g_quark_from_static_string ("g_convert_error"); -} +/** + * SECTION:conversions + * @title: Character Set Conversion + * @short_description: convert strings between different character sets + * + * The g_convert() family of function wraps the functionality of iconv(). In + * addition to pure character set conversions, GLib has functions to deal + * with the extra complications of encodings for file names. + * + * + * File Name Encodings + * + * Historically, Unix has not had a defined encoding for file + * names: a file name is valid as long as it does not have path + * separators in it ("/"). However, displaying file names may + * require conversion: from the character set in which they were + * created, to the character set in which the application + * operates. Consider the Spanish file name + * "Presentación.sxi". If the + * application which created it uses ISO-8859-1 for its encoding, + * + * + * Character: P r e s e n t a c i ó n . s x i + * Hex code: 50 72 65 73 65 6e 74 61 63 69 f3 6e 2e 73 78 69 + * + * + * However, if the application use UTF-8, the actual file name on + * disk would look like this: + * + * + * Character: P r e s e n t a c i ó n . s x i + * Hex code: 50 72 65 73 65 6e 74 61 63 69 c3 b3 6e 2e 73 78 69 + * + * + * Glib uses UTF-8 for its strings, and GUI toolkits like GTK+ + * that use Glib do the same thing. If you get a file name from + * the file system, for example, from readdir(3) or from g_dir_read_name(), + * and you wish to display the file name to the user, you + * will need to convert it into UTF-8. The + * opposite case is when the user types the name of a file he + * wishes to save: the toolkit will give you that string in + * UTF-8 encoding, and you will need to convert it to the + * character set used for file names before you can create the + * file with open(2) or fopen(3). + * + * + * By default, Glib assumes that file names on disk are in UTF-8 + * encoding. This is a valid assumption for file systems which + * were created relatively recently: most applications use UTF-8 + * encoding for their strings, and that is also what they use for + * the file names they create. However, older file systems may + * still contain file names created in "older" encodings, such as + * ISO-8859-1. In this case, for compatibility reasons, you may + * want to instruct Glib to use that particular encoding for file + * names rather than UTF-8. You can do this by specifying the + * encoding for file names in the G_FILENAME_ENCODING + * environment variable. For example, if your installation uses + * ISO-8859-1 for file names, you can put this in your + * ~/.profile: + * + * + * export G_FILENAME_ENCODING=ISO-8859-1 + * + * + * Glib provides the functions g_filename_to_utf8() and + * g_filename_from_utf8() to perform the necessary conversions. These + * functions convert file names from the encoding specified in + * G_FILENAME_ENCODING to UTF-8 and vice-versa. + * illustrates how + * these functions are used to convert between UTF-8 and the + * encoding for file names in the file system. + * + *
+ * Conversion between File Name Encodings + * + *
+ * + * Checklist for Application Writers + * + * This section is a practical summary of the detailed + * description above. You can use this as a checklist of + * things to do to make sure your applications process file + * name encodings correctly. + * + * + * + * If you get a file name from the file system from a function + * such as readdir(3) or gtk_file_chooser_get_filename(), + * you do not need to do any conversion to pass that + * file name to functions like open(2), rename(2), or + * fopen(3) — those are "raw" file names which the file + * system understands. + * + * + * If you need to display a file name, convert it to UTF-8 first by + * using g_filename_to_utf8(). If conversion fails, display a string like + * "Unknown file name". Do not + * convert this string back into the encoding used for file names if you + * wish to pass it to the file system; use the original file name instead. + * For example, the document window of a word processor could display + * "Unknown file name" in its title bar but still let the user save the + * file, as it would keep the raw file name internally. This can happen + * if the user has not set the G_FILENAME_ENCODING + * environment variable even though he has files whose names are not + * encoded in UTF-8. + * + * + * If your user interface lets the user type a file name for saving or + * renaming, convert it to the encoding used for file names in the file + * system by using g_filename_from_utf8(). Pass the converted file name + * to functions like fopen(3). If conversion fails, ask the user to enter + * a different file name. This can happen if the user types Japanese + * characters when G_FILENAME_ENCODING is set to + * ISO-8859-1, for example. + * + * + * + *
+ */ + +/* We try to terminate strings in unknown charsets with this many zero bytes + * to ensure that multibyte strings really are nul-terminated when we return + * them from g_convert() and friends. + */ +#define NUL_TERMINATOR_LENGTH 4 + +G_DEFINE_QUARK (g_convert_error, g_convert_error) static gboolean try_conversion (const char *to_codeset, @@ -545,7 +678,7 @@ close_converter (GIConv cd) * input sequence. * @bytes_written: the number of bytes stored in the output buffer (not * including the terminating nul). - * @error: location to store the error occuring, or %NULL to ignore + * @error: location to store the error occurring, or %NULL to ignore * errors. Any of the errors in #GConvertError may occur. * * Converts a string from one character set to another. @@ -595,9 +728,9 @@ g_convert_with_iconv (const gchar *str, p = str; inbytes_remaining = len; - outbuf_size = len + 1; /* + 1 for nul in case len == 1 */ + outbuf_size = len + NUL_TERMINATOR_LENGTH; - outbytes_remaining = outbuf_size - 1; /* -1 for nul */ + outbytes_remaining = outbuf_size - NUL_TERMINATOR_LENGTH; outp = dest = g_malloc (outbuf_size); while (!done && !have_error) @@ -623,19 +756,18 @@ g_convert_with_iconv (const gchar *str, dest = g_realloc (dest, outbuf_size); outp = dest + used; - outbytes_remaining = outbuf_size - used - 1; /* -1 for nul */ + outbytes_remaining = outbuf_size - used - NUL_TERMINATOR_LENGTH; } break; case EILSEQ: - if (error) - g_set_error_literal (error, G_CONVERT_ERROR, G_CONVERT_ERROR_ILLEGAL_SEQUENCE, - _("Invalid byte sequence in conversion input")); + g_set_error_literal (error, G_CONVERT_ERROR, G_CONVERT_ERROR_ILLEGAL_SEQUENCE, + _("Invalid byte sequence in conversion input")); have_error = TRUE; break; default: { int errsv = errno; - + g_set_error (error, G_CONVERT_ERROR, G_CONVERT_ERROR_FAILED, _("Error during conversion: %s"), g_strerror (errsv)); @@ -657,7 +789,7 @@ g_convert_with_iconv (const gchar *str, } } - *outp = '\0'; + memset (outp, 0, NUL_TERMINATOR_LENGTH); if (bytes_read) *bytes_read = p - str; @@ -667,9 +799,8 @@ g_convert_with_iconv (const gchar *str, { if (!have_error) { - if (error) - g_set_error_literal (error, G_CONVERT_ERROR, G_CONVERT_ERROR_PARTIAL_INPUT, - _("Partial character sequence at end of input")); + g_set_error_literal (error, G_CONVERT_ERROR, G_CONVERT_ERROR_PARTIAL_INPUT, + _("Partial character sequence at end of input")); have_error = TRUE; } } @@ -700,7 +831,7 @@ g_convert_with_iconv (const gchar *str, . * @to_codeset: name of character set into which to convert @str * @from_codeset: character set of @str. - * @bytes_read: location to store the number of bytes in the + * @bytes_read: (out): location to store the number of bytes in the * input string that were successfully converted, or %NULL. * Even if the conversion was successful, this may be * less than @len if there were partial characters @@ -708,9 +839,9 @@ g_convert_with_iconv (const gchar *str, * #G_CONVERT_ERROR_ILLEGAL_SEQUENCE occurs, the value * stored will the byte offset after the last valid * input sequence. - * @bytes_written: the number of bytes stored in the output buffer (not + * @bytes_written: (out): the number of bytes stored in the output buffer (not * including the terminating nul). - * @error: location to store the error occuring, or %NULL to ignore + * @error: location to store the error occurring, or %NULL to ignore * errors. Any of the errors in #GConvertError may occur. * * Converts a string from one character set to another. @@ -779,7 +910,7 @@ g_convert (const gchar *str, * at the end of the input. * @bytes_written: the number of bytes stored in the output buffer (not * including the terminating nul). - * @error: location to store the error occuring, or %NULL to ignore + * @error: location to store the error occurring, or %NULL to ignore * errors. Any of the errors in #GConvertError may occur. * * Converts a string from one character set to another, possibly @@ -802,7 +933,7 @@ g_convert_with_fallback (const gchar *str, gssize len, const gchar *to_codeset, const gchar *from_codeset, - gchar *fallback, + const gchar *fallback, gsize *bytes_read, gsize *bytes_written, GError **error) @@ -884,8 +1015,8 @@ g_convert_with_fallback (const gchar *str, */ p = utf8; - outbuf_size = len + 1; /* + 1 for nul in case len == 1 */ - outbytes_remaining = outbuf_size - 1; /* -1 for nul */ + outbuf_size = len + NUL_TERMINATOR_LENGTH; + outbytes_remaining = outbuf_size - NUL_TERMINATOR_LENGTH; outp = dest = g_malloc (outbuf_size); while (!done && !have_error) @@ -909,7 +1040,7 @@ g_convert_with_fallback (const gchar *str, dest = g_realloc (dest, outbuf_size); outp = dest + used; - outbytes_remaining = outbuf_size - used - 1; /* -1 for nul */ + outbytes_remaining = outbuf_size - used - NUL_TERMINATOR_LENGTH; break; } @@ -978,7 +1109,7 @@ g_convert_with_fallback (const gchar *str, /* Cleanup */ - *outp = '\0'; + memset (outp, 0, NUL_TERMINATOR_LENGTH); close_converter (cd); @@ -1060,7 +1191,7 @@ strdup_len (const gchar *string, * input sequence. * @bytes_written: the number of bytes stored in the output buffer (not * including the terminating nul). - * @error: location to store the error occuring, or %NULL to ignore + * @error: location to store the error occurring, or %NULL to ignore * errors. Any of the errors in #GConvertError may occur. * * Converts a string which is in the encoding used for strings by @@ -1101,7 +1232,7 @@ g_locale_to_utf8 (const gchar *opsysstring, * input sequence. * @bytes_written: the number of bytes stored in the output buffer (not * including the terminating nul). - * @error: location to store the error occuring, or %NULL to ignore + * @error: location to store the error occurring, or %NULL to ignore * errors. Any of the errors in #GConvertError may occur. * * Converts a string from UTF-8 to the encoding used for strings by @@ -1181,16 +1312,16 @@ filename_charset_cache_free (gpointer data) * Since: 2.6 */ gboolean -g_get_filename_charsets (G_CONST_RETURN gchar ***filename_charsets) +g_get_filename_charsets (const gchar ***filename_charsets) { - static GStaticPrivate cache_private = G_STATIC_PRIVATE_INIT; - GFilenameCharsetCache *cache = g_static_private_get (&cache_private); + static GPrivate cache_private = G_PRIVATE_INIT (filename_charset_cache_free); + GFilenameCharsetCache *cache = g_private_get (&cache_private); const gchar *charset; if (!cache) { cache = g_new0 (GFilenameCharsetCache, 1); - g_static_private_set (&cache_private, cache, filename_charset_cache_free); + g_private_set (&cache_private, cache); } g_get_charset (&charset); @@ -1246,7 +1377,7 @@ g_get_filename_charsets (G_CONST_RETURN gchar ***filename_charsets) #else /* G_PLATFORM_WIN32 */ gboolean -g_get_filename_charsets (G_CONST_RETURN gchar ***filename_charsets) +g_get_filename_charsets (const gchar ***filename_charsets) { static const gchar *charsets[] = { "UTF-8", @@ -1288,16 +1419,6 @@ get_filename_charset (const gchar **filename_charset) return is_utf8; } -/* This is called from g_thread_init(). It's used to - * initialize some static data in a threadsafe way. - */ -void -_g_convert_thread_init (void) -{ - const gchar **dummy; - (void) g_get_filename_charsets (&dummy); -} - /** * g_filename_to_utf8: * @opsysstring: a string in the encoding for filenames @@ -1313,7 +1434,7 @@ _g_convert_thread_init (void) * input sequence. * @bytes_written: the number of bytes stored in the output buffer (not * including the terminating nul). - * @error: location to store the error occuring, or %NULL to ignore + * @error: location to store the error occurring, or %NULL to ignore * errors. Any of the errors in #GConvertError may occur. * * Converts a string which is in the encoding used by GLib for @@ -1332,6 +1453,8 @@ g_filename_to_utf8 (const gchar *opsysstring, { const gchar *charset; + g_return_val_if_fail (opsysstring != NULL, NULL); + if (get_filename_charset (&charset)) return strdup_len (opsysstring, len, bytes_read, bytes_written, error); else @@ -1357,6 +1480,8 @@ g_filename_to_utf8 (const gchar *opsysstring, { const gchar *charset; + g_return_val_if_fail (opsysstring != NULL, NULL); + if (g_get_charset (&charset)) return strdup_len (opsysstring, len, bytes_read, bytes_written, error); else @@ -1381,7 +1506,7 @@ g_filename_to_utf8 (const gchar *opsysstring, * input sequence. * @bytes_written: the number of bytes stored in the output buffer (not * including the terminating nul). - * @error: location to store the error occuring, or %NULL to ignore + * @error: location to store the error occurring, or %NULL to ignore * errors. Any of the errors in #GConvertError may occur. * * Converts a string from UTF-8 to the encoding GLib uses for @@ -1709,17 +1834,17 @@ hostname_validate (const char *hostname) /** * g_filename_from_uri: * @uri: a uri describing a filename (escaped, encoded in ASCII). - * @hostname: Location to store hostname for the URI, or %NULL. + * @hostname: (out) (allow-none): Location to store hostname for the URI, or %NULL. * If there is no hostname in the URI, %NULL will be * stored in this location. - * @error: location to store the error occuring, or %NULL to ignore + * @error: location to store the error occurring, or %NULL to ignore * errors. Any of the errors in #GConvertError may occur. * * Converts an escaped ASCII-encoded URI to a local filename in the * encoding used for filenames. * - * Return value: a newly-allocated string holding the resulting - * filename, or %NULL on an error. + * Return value: (type filename): a newly-allocated string holding + * the resulting filename, or %NULL on an error. **/ gchar * g_filename_from_uri (const gchar *uri, @@ -1870,8 +1995,8 @@ g_filename_from_uri (const gchar *uri, * @filename: an absolute filename specified in the GLib file name encoding, * which is the on-disk file name bytes on Unix, and UTF-8 on * Windows - * @hostname: A UTF-8 encoded hostname, or %NULL for none. - * @error: location to store the error occuring, or %NULL to ignore + * @hostname: (allow-none): A UTF-8 encoded hostname, or %NULL for none. + * @error: location to store the error occurring, or %NULL to ignore * errors. Any of the errors in #GConvertError may occur. * * Converts an absolute filename to an escaped ASCII-encoded URI, with the path @@ -1950,9 +2075,9 @@ g_filename_to_uri (const gchar *filename, * mime type defined in RFC 2483 into individual URIs, * discarding any comments. The URIs are not validated. * - * Returns: a newly allocated %NULL-terminated list of - * strings holding the individual URIs. The array should - * be freed with g_strfreev(). + * Returns: (transfer full): a newly allocated %NULL-terminated list + * of strings holding the individual URIs. The array should be freed + * with g_strfreev(). * * Since: 2.6 */ @@ -2024,7 +2149,7 @@ g_uri_list_extract_uris (const gchar *uri_list) * for instance there might be problems converting it to UTF-8, and some files * can be translated in the display. * - * If GLib can not make sense of the encoding of @filename, as a last resort it + * If GLib cannot make sense of the encoding of @filename, as a last resort it * replaces unknown characters with U+FFFD, the Unicode replacement character. * You can search the result for the UTF-8 encoding of this character (which is * "\357\277\275" in octal notation) to find out if @filename was in an invalid @@ -2065,7 +2190,7 @@ g_filename_display_basename (const gchar *filename) * Unlike g_filename_to_utf8(), the result is guaranteed to be non-%NULL * even if the filename actually isn't in the GLib file name encoding. * - * If GLib can not make sense of the encoding of @filename, as a last resort it + * If GLib cannot make sense of the encoding of @filename, as a last resort it * replaces unknown characters with U+FFFD, the Unicode replacement character. * You can search the result for the UTF-8 encoding of this character (which is * "\357\277\275" in octal notation) to find out if @filename was in an invalid @@ -2119,6 +2244,3 @@ g_filename_display_name (const gchar *filename) return display_name; } - -#define __G_CONVERT_C__ -#include "galiasdef.c"