X-Git-Url: http://review.tizen.org/git/?a=blobdiff_plain;f=glib%2Fgconvert.c;h=1d55fda1342e892ac3a32beae5c5ea923e61a192;hb=13e15733f38a40c6ef6a1baede91cce81c86ebaa;hp=3202800edecd4bff87e0404a3c6e89555164417c;hpb=f0620902b2991577fbf0f69d2564535e7306ce20;p=platform%2Fupstream%2Fglib.git diff --git a/glib/gconvert.c b/glib/gconvert.c index 3202800..1d55fda 100644 --- a/glib/gconvert.c +++ b/glib/gconvert.c @@ -15,9 +15,7 @@ * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public - * License along with this library; if not, write to the - * Free Software Foundation, Inc., 59 Temple Place - Suite 330, - * Boston, MA 02111-1307, USA. + * License along with this library; if not, see . */ #include "config.h" @@ -43,25 +41,21 @@ #include "gconvert.h" -#include "gprintfint.h" +#include "gcharsetprivate.h" #include "gslist.h" #include "gstrfuncs.h" #include "gtestutils.h" #include "gthread.h" -#include "gthreadprivate.h" #include "gunicode.h" - -#ifdef NEED_ICONV_CACHE -#include "glist.h" -#include "ghash.h" -#endif +#include "gfileutils.h" #include "glibintl.h" #if defined(USE_LIBICONV_GNU) && !defined (_LIBICONV_H) #error GNU libiconv in use but included iconv.h not from libiconv #endif -#if !defined(USE_LIBICONV_GNU) && defined (_LIBICONV_H) +#if !defined(USE_LIBICONV_GNU) && defined (_LIBICONV_H) \ + && !defined (__APPLE_CC__) && !defined (__LP_64__) #error GNU libiconv not in use but included iconv.h is from libiconv #endif @@ -69,122 +63,101 @@ /** * SECTION:conversions * @title: Character Set Conversion - * @short_description: Convert strings between different character sets + * @short_description: convert strings between different character sets + * + * The g_convert() family of function wraps the functionality of iconv(). + * In addition to pure character set conversions, GLib has functions to + * deal with the extra complications of encodings for file names. * - * The g_convert() family of function wraps the functionality of iconv(). In - * addition to pure character set conversions, GLib has functions to deal - * with the extra complications of encodings for file names. + * ## File Name Encodings * - * - * File Name Encodings - * - * Historically, Unix has not had a defined encoding for file - * names: a file name is valid as long as it does not have path - * separators in it ("/"). However, displaying file names may - * require conversion: from the character set in which they were - * created, to the character set in which the application - * operates. Consider the Spanish file name - * "Presentación.sxi". If the - * application which created it uses ISO-8859-1 for its encoding, - * - * + * Historically, UNIX has not had a defined encoding for file names: + * a file name is valid as long as it does not have path separators + * in it ("/"). However, displaying file names may require conversion: + * from the character set in which they were created, to the character + * set in which the application operates. Consider the Spanish file name + * "Presentación.sxi". If the application which created it uses + * ISO-8859-1 for its encoding, + * |[ * Character: P r e s e n t a c i ó n . s x i * Hex code: 50 72 65 73 65 6e 74 61 63 69 f3 6e 2e 73 78 69 - * - * + * ]| * However, if the application use UTF-8, the actual file name on * disk would look like this: - * - * + * |[ * Character: P r e s e n t a c i ó n . s x i * Hex code: 50 72 65 73 65 6e 74 61 63 69 c3 b3 6e 2e 73 78 69 - * - * - * Glib uses UTF-8 for its strings, and GUI toolkits like GTK+ - * that use Glib do the same thing. If you get a file name from - * the file system, for example, from readdir(3) or from g_dir_read_name(), - * and you wish to display the file name to the user, you - * will need to convert it into UTF-8. The - * opposite case is when the user types the name of a file he - * wishes to save: the toolkit will give you that string in - * UTF-8 encoding, and you will need to convert it to the - * character set used for file names before you can create the - * file with open(2) or fopen(3). - * - * + * ]| + * Glib uses UTF-8 for its strings, and GUI toolkits like GTK+ that use + * Glib do the same thing. If you get a file name from the file system, + * for example, from readdir() or from g_dir_read_name(), and you wish + * to display the file name to the user, you will need to convert it + * into UTF-8. The opposite case is when the user types the name of a + * file he wishes to save: the toolkit will give you that string in + * UTF-8 encoding, and you will need to convert it to the character + * set used for file names before you can create the file with open() + * or fopen(). + * * By default, Glib assumes that file names on disk are in UTF-8 - * encoding. This is a valid assumption for file systems which - * were created relatively recently: most applications use UTF-8 + * encoding. This is a valid assumption for file systems which + * were created relatively recently: most applications use UTF-8 * encoding for their strings, and that is also what they use for - * the file names they create. However, older file systems may + * the file names they create. However, older file systems may * still contain file names created in "older" encodings, such as - * ISO-8859-1. In this case, for compatibility reasons, you may - * want to instruct Glib to use that particular encoding for file - * names rather than UTF-8. You can do this by specifying the - * encoding for file names in the G_FILENAME_ENCODING - * environment variable. For example, if your installation uses - * ISO-8859-1 for file names, you can put this in your - * ~/.profile: - * - * + * ISO-8859-1. In this case, for compatibility reasons, you may want + * to instruct Glib to use that particular encoding for file names + * rather than UTF-8. You can do this by specifying the encoding for + * file names in the [`G_FILENAME_ENCODING`][G_FILENAME_ENCODING] + * environment variable. For example, if your installation uses + * ISO-8859-1 for file names, you can put this in your `~/.profile` + * |[ * export G_FILENAME_ENCODING=ISO-8859-1 - * - * + * ]| * Glib provides the functions g_filename_to_utf8() and - * g_filename_from_utf8() to perform the necessary conversions. These - * functions convert file names from the encoding specified in - * G_FILENAME_ENCODING to UTF-8 and vice-versa. - * illustrates how + * g_filename_from_utf8() to perform the necessary conversions. + * These functions convert file names from the encoding specified + * in `G_FILENAME_ENCODING` to UTF-8 and vice-versa. This + * [diagram][file-name-encodings-diagram] illustrates how * these functions are used to convert between UTF-8 and the * encoding for file names in the file system. - * - *
- * Conversion between File Name Encodings - * - *
- * - * Checklist for Application Writers - * + * + * ## Conversion between file name encodings # {#file-name-encodings-diagram) + * + * ![](file-name-encodings.png) + * + * ## Checklist for Application Writers + * * This section is a practical summary of the detailed - * description above. You can use this as a checklist of + * things to do to make sure your applications process file * name encodings correctly. - * - * - * - * If you get a file name from the file system from a function - * such as readdir(3) or gtk_file_chooser_get_filename(), - * you do not need to do any conversion to pass that - * file name to functions like open(2), rename(2), or - * fopen(3) — those are "raw" file names which the file - * system understands. - * - * - * If you need to display a file name, convert it to UTF-8 first by - * using g_filename_to_utf8(). If conversion fails, display a string like - * "Unknown file name". Do not - * convert this string back into the encoding used for file names if you - * wish to pass it to the file system; use the original file name instead. - * For example, the document window of a word processor could display - * "Unknown file name" in its title bar but still let the user save the - * file, as it would keep the raw file name internally. This can happen - * if the user has not set the G_FILENAME_ENCODING - * environment variable even though he has files whose names are not - * encoded in UTF-8. - * - * - * If your user interface lets the user type a file name for saving or - * renaming, convert it to the encoding used for file names in the file - * system by using g_filename_from_utf8(). Pass the converted file name - * to functions like fopen(3). If conversion fails, ask the user to enter - * a different file name. This can happen if the user types Japanese - * characters when G_FILENAME_ENCODING is set to - * ISO-8859-1, for example. - * - * - * - *
+ * + * 1. If you get a file name from the file system from a function + * such as readdir() or gtk_file_chooser_get_filename(), you do + * not need to do any conversion to pass that file name to + * functions like open(), rename(), or fopen() -- those are "raw" + * file names which the file system understands. + * + * 2. If you need to display a file name, convert it to UTF-8 first + * by using g_filename_to_utf8(). If conversion fails, display a + * string like "Unknown file name". Do not convert this string back + * into the encoding used for file names if you wish to pass it to + * the file system; use the original file name instead. + * + * For example, the document window of a word processor could display + * "Unknown file name" in its title bar but still let the user save + * the file, as it would keep the raw file name internally. This + * can happen if the user has not set the `G_FILENAME_ENCODING` + * environment variable even though he has files whose names are + * not encoded in UTF-8. + * + * 3. If your user interface lets the user type a file name for saving + * or renaming, convert it to the encoding used for file names in + * the file system by using g_filename_from_utf8(). Pass the converted + * file name to functions like fopen(). If conversion fails, ask the + * user to enter a different file name. This can happen if the user + * types Japanese characters when `G_FILENAME_ENCODING` is set to + * `ISO-8859-1`, for example. */ /* We try to terminate strings in unknown charsets with this many zero bytes @@ -193,11 +166,7 @@ */ #define NUL_TERMINATOR_LENGTH 4 -GQuark -g_convert_error_quark (void) -{ - return g_quark_from_static_string ("g_convert_error"); -} +G_DEFINE_QUARK (g_convert_error, g_convert_error) static gboolean try_conversion (const char *to_codeset, @@ -232,9 +201,6 @@ try_to_aliases (const char **to_aliases, return FALSE; } -G_GNUC_INTERNAL extern const char ** -_g_charset_get_aliases (const char *canonical_name); - /** * g_iconv_open: * @to_codeset: destination codeset @@ -247,7 +213,7 @@ _g_charset_get_aliases (const char *canonical_name); * GLib provides g_convert() and g_locale_to_utf8() which are likely * more convenient than the raw iconv wrappers. * - * Return value: a "conversion descriptor", or (GIConv)-1 if + * Returns: a "conversion descriptor", or (GIConv)-1 if * opening the converter failed. **/ GIConv @@ -299,7 +265,7 @@ g_iconv_open (const gchar *to_codeset, * GLib provides g_convert() and g_locale_to_utf8() which are likely * more convenient than the raw iconv wrappers. * - * Return value: count of non-reversible conversions, or -1 on error + * Returns: count of non-reversible conversions, or -1 on error **/ gsize g_iconv (GIConv converter, @@ -326,7 +292,7 @@ g_iconv (GIConv converter, * GLib provides g_convert() and g_locale_to_utf8() which are likely * more convenient than the raw iconv wrappers. * - * Return value: -1 on error, 0 on success + * Returns: -1 on error, 0 on success **/ gint g_iconv_close (GIConv converter) @@ -336,296 +302,6 @@ g_iconv_close (GIConv converter) return iconv_close (cd); } - -#ifdef NEED_ICONV_CACHE - -#define ICONV_CACHE_SIZE (16) - -struct _iconv_cache_bucket { - gchar *key; - guint32 refcount; - gboolean used; - GIConv cd; -}; - -static GList *iconv_cache_list; -static GHashTable *iconv_cache; -static GHashTable *iconv_open_hash; -static guint iconv_cache_size = 0; -G_LOCK_DEFINE_STATIC (iconv_cache_lock); - -/* caller *must* hold the iconv_cache_lock */ -static void -iconv_cache_init (void) -{ - static gboolean initialized = FALSE; - - if (initialized) - return; - - iconv_cache_list = NULL; - iconv_cache = g_hash_table_new (g_str_hash, g_str_equal); - iconv_open_hash = g_hash_table_new (g_direct_hash, g_direct_equal); - - initialized = TRUE; -} - - -/* - * iconv_cache_bucket_new: - * @key: cache key - * @cd: iconv descriptor - * - * Creates a new cache bucket, inserts it into the cache and - * increments the cache size. - * - * This assumes ownership of @key. - * - * Returns a pointer to the newly allocated cache bucket. - **/ -static struct _iconv_cache_bucket * -iconv_cache_bucket_new (gchar *key, GIConv cd) -{ - struct _iconv_cache_bucket *bucket; - - bucket = g_new (struct _iconv_cache_bucket, 1); - bucket->key = key; - bucket->refcount = 1; - bucket->used = TRUE; - bucket->cd = cd; - - g_hash_table_insert (iconv_cache, bucket->key, bucket); - - /* FIXME: if we sorted the list so items with few refcounts were - first, then we could expire them faster in iconv_cache_expire_unused () */ - iconv_cache_list = g_list_prepend (iconv_cache_list, bucket); - - iconv_cache_size++; - - return bucket; -} - - -/* - * iconv_cache_bucket_expire: - * @node: cache bucket's node - * @bucket: cache bucket - * - * Expires a single cache bucket @bucket. This should only ever be - * called on a bucket that currently has no used iconv descriptors - * open. - * - * @node is not a required argument. If @node is not supplied, we - * search for it ourselves. - **/ -static void -iconv_cache_bucket_expire (GList *node, struct _iconv_cache_bucket *bucket) -{ - g_hash_table_remove (iconv_cache, bucket->key); - - if (node == NULL) - node = g_list_find (iconv_cache_list, bucket); - - g_assert (node != NULL); - - if (node->prev) - { - node->prev->next = node->next; - if (node->next) - node->next->prev = node->prev; - } - else - { - iconv_cache_list = node->next; - if (node->next) - node->next->prev = NULL; - } - - g_list_free_1 (node); - - g_free (bucket->key); - g_iconv_close (bucket->cd); - g_free (bucket); - - iconv_cache_size--; -} - - -/* - * iconv_cache_expire_unused: - * - * Expires as many unused cache buckets as it needs to in order to get - * the total number of buckets < ICONV_CACHE_SIZE. - **/ -static void -iconv_cache_expire_unused (void) -{ - struct _iconv_cache_bucket *bucket; - GList *node, *next; - - node = iconv_cache_list; - while (node && iconv_cache_size >= ICONV_CACHE_SIZE) - { - next = node->next; - - bucket = node->data; - if (bucket->refcount == 0) - iconv_cache_bucket_expire (node, bucket); - - node = next; - } -} - -static GIConv -open_converter (const gchar *to_codeset, - const gchar *from_codeset, - GError **error) -{ - struct _iconv_cache_bucket *bucket; - gchar *key, *dyn_key, auto_key[80]; - GIConv cd; - gsize len_from_codeset, len_to_codeset; - - /* create our key */ - len_from_codeset = strlen (from_codeset); - len_to_codeset = strlen (to_codeset); - if (len_from_codeset + len_to_codeset + 2 < sizeof (auto_key)) - { - key = auto_key; - dyn_key = NULL; - } - else - key = dyn_key = g_malloc (len_from_codeset + len_to_codeset + 2); - memcpy (key, from_codeset, len_from_codeset); - key[len_from_codeset] = ':'; - strcpy (key + len_from_codeset + 1, to_codeset); - - G_LOCK (iconv_cache_lock); - - /* make sure the cache has been initialized */ - iconv_cache_init (); - - bucket = g_hash_table_lookup (iconv_cache, key); - if (bucket) - { - g_free (dyn_key); - - if (bucket->used) - { - cd = g_iconv_open (to_codeset, from_codeset); - if (cd == (GIConv) -1) - goto error; - } - else - { - /* Apparently iconv on Solaris <= 7 segfaults if you pass in - * NULL for anything but inbuf; work around that. (NULL outbuf - * or NULL *outbuf is allowed by Unix98.) - */ - gsize inbytes_left = 0; - gchar *outbuf = NULL; - gsize outbytes_left = 0; - - cd = bucket->cd; - bucket->used = TRUE; - - /* reset the descriptor */ - g_iconv (cd, NULL, &inbytes_left, &outbuf, &outbytes_left); - } - - bucket->refcount++; - } - else - { - cd = g_iconv_open (to_codeset, from_codeset); - if (cd == (GIConv) -1) - { - g_free (dyn_key); - goto error; - } - - iconv_cache_expire_unused (); - - bucket = iconv_cache_bucket_new (dyn_key ? dyn_key : g_strdup (key), cd); - } - - g_hash_table_insert (iconv_open_hash, cd, bucket->key); - - G_UNLOCK (iconv_cache_lock); - - return cd; - - error: - - G_UNLOCK (iconv_cache_lock); - - /* Something went wrong. */ - if (error) - { - if (errno == EINVAL) - g_set_error (error, G_CONVERT_ERROR, G_CONVERT_ERROR_NO_CONVERSION, - _("Conversion from character set '%s' to '%s' is not supported"), - from_codeset, to_codeset); - else - g_set_error (error, G_CONVERT_ERROR, G_CONVERT_ERROR_FAILED, - _("Could not open converter from '%s' to '%s'"), - from_codeset, to_codeset); - } - - return cd; -} - -static int -close_converter (GIConv converter) -{ - struct _iconv_cache_bucket *bucket; - const gchar *key; - GIConv cd; - - cd = converter; - - if (cd == (GIConv) -1) - return 0; - - G_LOCK (iconv_cache_lock); - - key = g_hash_table_lookup (iconv_open_hash, cd); - if (key) - { - g_hash_table_remove (iconv_open_hash, cd); - - bucket = g_hash_table_lookup (iconv_cache, key); - g_assert (bucket); - - bucket->refcount--; - - if (cd == bucket->cd) - bucket->used = FALSE; - else - g_iconv_close (cd); - - if (!bucket->refcount && iconv_cache_size > ICONV_CACHE_SIZE) - { - /* expire this cache bucket */ - iconv_cache_bucket_expire (NULL, bucket); - } - } - else - { - G_UNLOCK (iconv_cache_lock); - - g_warning ("This iconv context wasn't opened using open_converter"); - - return g_iconv_close (converter); - } - - G_UNLOCK (iconv_cache_lock); - - return 0; -} - -#else /* !NEED_ICONV_CACHE */ - static GIConv open_converter (const gchar *to_codeset, const gchar *from_codeset, @@ -663,13 +339,13 @@ close_converter (GIConv cd) return g_iconv_close (cd); } -#endif /* NEED_ICONV_CACHE */ - /** * g_convert_with_iconv: * @str: the string to convert - * @len: the length of the string, or -1 if the string is - * nul-terminated. + * @len: the length of the string in bytes, or -1 if the string is + * nul-terminated (Note that some encodings may allow nul + * bytes to occur inside strings. In that case, using -1 + * for the @len parameter is unsafe) * @converter: conversion descriptor from g_iconv_open() * @bytes_read: location to store the number of bytes in the * input string that were successfully converted, or %NULL. @@ -681,27 +357,22 @@ close_converter (GIConv cd) * input sequence. * @bytes_written: the number of bytes stored in the output buffer (not * including the terminating nul). - * @error: location to store the error occuring, or %NULL to ignore + * @error: location to store the error occurring, or %NULL to ignore * errors. Any of the errors in #GConvertError may occur. * * Converts a string from one character set to another. * - * Note that you should use g_iconv() for streaming - * conversions - * + * Note that you should use g_iconv() for streaming conversions. * Despite the fact that @byes_read can return information about partial - * characters, the g_convert_... functions - * are not generally suitable for streaming. If the underlying converter - * being used maintains internal state, then this won't be preserved - * across successive calls to g_convert(), g_convert_with_iconv() or - * g_convert_with_fallback(). (An example of this is the GNU C converter - * for CP1255 which does not emit a base character until it knows that - * the next character is not a mark that could combine with the base - * character.) - * - * . + * characters, the g_convert_... functions are not generally suitable + * for streaming. If the underlying converter maintains internal state, + * then this won't be preserved across successive calls to g_convert(), + * g_convert_with_iconv() or g_convert_with_fallback(). (An example of + * this is the GNU C converter for CP1255 which does not emit a base + * character until it knows that the next character is not a mark that + * could combine with the base character.) * - * Return value: If the conversion was successful, a newly allocated + * Returns: If the conversion was successful, a newly allocated * nul-terminated string, which must be freed with * g_free(). Otherwise %NULL and @error will be set. **/ @@ -763,20 +434,18 @@ g_convert_with_iconv (const gchar *str, } break; case EILSEQ: - if (error) - g_set_error_literal (error, G_CONVERT_ERROR, G_CONVERT_ERROR_ILLEGAL_SEQUENCE, - _("Invalid byte sequence in conversion input")); + g_set_error_literal (error, G_CONVERT_ERROR, G_CONVERT_ERROR_ILLEGAL_SEQUENCE, + _("Invalid byte sequence in conversion input")); have_error = TRUE; break; default: - if (error) - { - int errsv = errno; - - g_set_error (error, G_CONVERT_ERROR, G_CONVERT_ERROR_FAILED, - _("Error during conversion: %s"), - g_strerror (errsv)); - } + { + int errsv = errno; + + g_set_error (error, G_CONVERT_ERROR, G_CONVERT_ERROR_FAILED, + _("Error during conversion: %s"), + g_strerror (errsv)); + } have_error = TRUE; break; } @@ -804,9 +473,8 @@ g_convert_with_iconv (const gchar *str, { if (!have_error) { - if (error) - g_set_error_literal (error, G_CONVERT_ERROR, G_CONVERT_ERROR_PARTIAL_INPUT, - _("Partial character sequence at end of input")); + g_set_error_literal (error, G_CONVERT_ERROR, G_CONVERT_ERROR_PARTIAL_INPUT, + _("Partial character sequence at end of input")); have_error = TRUE; } } @@ -827,14 +495,10 @@ g_convert_with_iconv (const gchar *str, /** * g_convert: * @str: the string to convert - * @len: the length of the string, or -1 if the string is - * nul-terminated - - Note that some encodings may allow nul bytes to - occur inside strings. In that case, using -1 for - the @len parameter is unsafe. - - . + * @len: the length of the string in bytes, or -1 if the string is + * nul-terminated (Note that some encodings may allow nul + * bytes to occur inside strings. In that case, using -1 + * for the @len parameter is unsafe) * @to_codeset: name of character set into which to convert @str * @from_codeset: character set of @str. * @bytes_read: (out): location to store the number of bytes in the @@ -847,15 +511,25 @@ g_convert_with_iconv (const gchar *str, * input sequence. * @bytes_written: (out): the number of bytes stored in the output buffer (not * including the terminating nul). - * @error: location to store the error occuring, or %NULL to ignore + * @error: location to store the error occurring, or %NULL to ignore * errors. Any of the errors in #GConvertError may occur. * * Converts a string from one character set to another. * - * Note that you should use g_iconv() for streaming - * conversions. + * Note that you should use g_iconv() for streaming conversions. + * Despite the fact that @byes_read can return information about partial + * characters, the g_convert_... functions are not generally suitable + * for streaming. If the underlying converter maintains internal state, + * then this won't be preserved across successive calls to g_convert(), + * g_convert_with_iconv() or g_convert_with_fallback(). (An example of + * this is the GNU C converter for CP1255 which does not emit a base + * character until it knows that the next character is not a mark that + * could combine with the base character.) * - * Return value: If the conversion was successful, a newly allocated + * Using extensions such as "//TRANSLIT" may not work (or may not work + * well) on many platforms. Consider using g_str_to_ascii() instead. + * + * Returns: If the conversion was successful, a newly allocated * nul-terminated string, which must be freed with * g_free(). Otherwise %NULL and @error will be set. **/ @@ -900,8 +574,10 @@ g_convert (const gchar *str, /** * g_convert_with_fallback: * @str: the string to convert - * @len: the length of the string, or -1 if the string is - * nul-terminated. + * @len: the length of the string in bytes, or -1 if the string is + * nul-terminated (Note that some encodings may allow nul + * bytes to occur inside strings. In that case, using -1 + * for the @len parameter is unsafe) * @to_codeset: name of character set into which to convert @str * @from_codeset: character set of @str. * @fallback: UTF-8 string to use in place of character not @@ -916,7 +592,7 @@ g_convert (const gchar *str, * at the end of the input. * @bytes_written: the number of bytes stored in the output buffer (not * including the terminating nul). - * @error: location to store the error occuring, or %NULL to ignore + * @error: location to store the error occurring, or %NULL to ignore * errors. Any of the errors in #GConvertError may occur. * * Converts a string from one character set to another, possibly @@ -927,10 +603,17 @@ g_convert (const gchar *str, * to @to_codeset in their iconv() functions, * in which case GLib will simply return that approximate conversion. * - * Note that you should use g_iconv() for streaming - * conversions. + * Note that you should use g_iconv() for streaming conversions. + * Despite the fact that @byes_read can return information about partial + * characters, the g_convert_... functions are not generally suitable + * for streaming. If the underlying converter maintains internal state, + * then this won't be preserved across successive calls to g_convert(), + * g_convert_with_iconv() or g_convert_with_fallback(). (An example of + * this is the GNU C converter for CP1255 which does not emit a base + * character until it knows that the next character is not a mark that + * could combine with the base character.) * - * Return value: If the conversion was successful, a newly allocated + * Returns: If the conversion was successful, a newly allocated * nul-terminated string, which must be freed with * g_free(). Otherwise %NULL and @error will be set. **/ @@ -1186,7 +869,9 @@ strdup_len (const gchar *string, * @opsysstring: a string in the encoding of the current locale. On Windows * this means the system codepage. * @len: the length of the string, or -1 if the string is - * nul-terminated. + * nul-terminated (Note that some encodings may allow nul + * bytes to occur inside strings. In that case, using -1 + * for the @len parameter is unsafe) * @bytes_read: location to store the number of bytes in the * input string that were successfully converted, or %NULL. * Even if the conversion was successful, this may be @@ -1197,15 +882,15 @@ strdup_len (const gchar *string, * input sequence. * @bytes_written: the number of bytes stored in the output buffer (not * including the terminating nul). - * @error: location to store the error occuring, or %NULL to ignore + * @error: location to store the error occurring, or %NULL to ignore * errors. Any of the errors in #GConvertError may occur. * * Converts a string which is in the encoding used for strings by * the C runtime (usually the same as that used by the operating - * system) in the current locale into a - * UTF-8 string. + * system) in the [current locale][setlocale] into a UTF-8 string. * - * Return value: The converted string, or %NULL on an error. + * Returns: A newly-allocated buffer containing the converted string, + * or %NULL on an error, and error will be set. **/ gchar * g_locale_to_utf8 (const gchar *opsysstring, @@ -1227,7 +912,9 @@ g_locale_to_utf8 (const gchar *opsysstring, * g_locale_from_utf8: * @utf8string: a UTF-8 encoded string * @len: the length of the string, or -1 if the string is - * nul-terminated. + * nul-terminated (Note that some encodings may allow nul + * bytes to occur inside strings. In that case, using -1 + * for the @len parameter is unsafe) * @bytes_read: location to store the number of bytes in the * input string that were successfully converted, or %NULL. * Even if the conversion was successful, this may be @@ -1238,15 +925,16 @@ g_locale_to_utf8 (const gchar *opsysstring, * input sequence. * @bytes_written: the number of bytes stored in the output buffer (not * including the terminating nul). - * @error: location to store the error occuring, or %NULL to ignore + * @error: location to store the error occurring, or %NULL to ignore * errors. Any of the errors in #GConvertError may occur. * * Converts a string from UTF-8 to the encoding used for strings by * the C runtime (usually the same as that used by the operating - * system) in the current locale. On - * Windows this means the system codepage. + * system) in the [current locale][setlocale]. On Windows this means + * the system codepage. * - * Return value: The converted string, or %NULL on an error. + * Returns: A newly-allocated buffer containing the converted string, + * or %NULL on an error, and error will be set. **/ gchar * g_locale_from_utf8 (const gchar *utf8string, @@ -1293,41 +981,40 @@ filename_charset_cache_free (gpointer data) * representation of a filename, see g_filename_display_name(). * * On Unix, the character sets are determined by consulting the - * environment variables G_FILENAME_ENCODING and - * G_BROKEN_FILENAMES. On Windows, the character set - * used in the GLib API is always UTF-8 and said environment variables - * have no effect. + * environment variables `G_FILENAME_ENCODING` and `G_BROKEN_FILENAMES`. + * On Windows, the character set used in the GLib API is always UTF-8 + * and said environment variables have no effect. * - * G_FILENAME_ENCODING may be set to a comma-separated list - * of character set names. The special token "@locale" is taken to - * mean the character set for the current - * locale. If G_FILENAME_ENCODING is not set, but - * G_BROKEN_FILENAMES is, the character set of the current - * locale is taken as the filename encoding. If neither environment variable - * is set, UTF-8 is taken as the filename encoding, but the character - * set of the current locale is also put in the list of encodings. + * `G_FILENAME_ENCODING` may be set to a comma-separated list of + * character set names. The special token "@locale" is taken + * to mean the character set for the [current locale][setlocale]. + * If `G_FILENAME_ENCODING` is not set, but `G_BROKEN_FILENAMES` is, + * the character set of the current locale is taken as the filename + * encoding. If neither environment variable is set, UTF-8 is taken + * as the filename encoding, but the character set of the current locale + * is also put in the list of encodings. * * The returned @charsets belong to GLib and must not be freed. * * Note that on Unix, regardless of the locale character set or - * G_FILENAME_ENCODING value, the actual file names present + * `G_FILENAME_ENCODING` value, the actual file names present * on a system might be in any random encoding or just gibberish. * - * Return value: %TRUE if the filename encoding is UTF-8. + * Returns: %TRUE if the filename encoding is UTF-8. * * Since: 2.6 */ gboolean -g_get_filename_charsets (G_CONST_RETURN gchar ***filename_charsets) +g_get_filename_charsets (const gchar ***filename_charsets) { - static GStaticPrivate cache_private = G_STATIC_PRIVATE_INIT; - GFilenameCharsetCache *cache = g_static_private_get (&cache_private); + static GPrivate cache_private = G_PRIVATE_INIT (filename_charset_cache_free); + GFilenameCharsetCache *cache = g_private_get (&cache_private); const gchar *charset; if (!cache) { cache = g_new0 (GFilenameCharsetCache, 1); - g_static_private_set (&cache_private, cache, filename_charset_cache_free); + g_private_set (&cache_private, cache); } g_get_charset (&charset); @@ -1383,7 +1070,7 @@ g_get_filename_charsets (G_CONST_RETURN gchar ***filename_charsets) #else /* G_PLATFORM_WIN32 */ gboolean -g_get_filename_charsets (G_CONST_RETURN gchar ***filename_charsets) +g_get_filename_charsets (const gchar ***filename_charsets) { static const gchar *charsets[] = { "UTF-8", @@ -1425,21 +1112,13 @@ get_filename_charset (const gchar **filename_charset) return is_utf8; } -/* This is called from g_thread_init(). It's used to - * initialize some static data in a threadsafe way. - */ -void -_g_convert_thread_init (void) -{ - const gchar **dummy; - (void) g_get_filename_charsets (&dummy); -} - /** * g_filename_to_utf8: * @opsysstring: a string in the encoding for filenames * @len: the length of the string, or -1 if the string is - * nul-terminated. + * nul-terminated (Note that some encodings may allow nul + * bytes to occur inside strings. In that case, using -1 + * for the @len parameter is unsafe) * @bytes_read: location to store the number of bytes in the * input string that were successfully converted, or %NULL. * Even if the conversion was successful, this may be @@ -1450,15 +1129,15 @@ _g_convert_thread_init (void) * input sequence. * @bytes_written: the number of bytes stored in the output buffer (not * including the terminating nul). - * @error: location to store the error occuring, or %NULL to ignore + * @error: location to store the error occurring, or %NULL to ignore * errors. Any of the errors in #GConvertError may occur. * * Converts a string which is in the encoding used by GLib for * filenames into a UTF-8 string. Note that on Windows GLib uses UTF-8 * for filenames; on other platforms, this function indirectly depends on - * the current locale. + * the [current locale][setlocale]. * - * Return value: The converted string, or %NULL on an error. + * Returns: The converted string, or %NULL on an error. **/ gchar* g_filename_to_utf8 (const gchar *opsysstring, @@ -1469,6 +1148,8 @@ g_filename_to_utf8 (const gchar *opsysstring, { const gchar *charset; + g_return_val_if_fail (opsysstring != NULL, NULL); + if (get_filename_charset (&charset)) return strdup_len (opsysstring, len, bytes_read, bytes_written, error); else @@ -1494,6 +1175,8 @@ g_filename_to_utf8 (const gchar *opsysstring, { const gchar *charset; + g_return_val_if_fail (opsysstring != NULL, NULL); + if (g_get_charset (&charset)) return strdup_len (opsysstring, len, bytes_read, bytes_written, error); else @@ -1508,25 +1191,26 @@ g_filename_to_utf8 (const gchar *opsysstring, * @utf8string: a UTF-8 encoded string. * @len: the length of the string, or -1 if the string is * nul-terminated. - * @bytes_read: location to store the number of bytes in the - * input string that were successfully converted, or %NULL. + * @bytes_read: (out) (allow-none): location to store the number of bytes in + * the input string that were successfully converted, or %NULL. * Even if the conversion was successful, this may be * less than @len if there were partial characters * at the end of the input. If the error * #G_CONVERT_ERROR_ILLEGAL_SEQUENCE occurs, the value * stored will the byte offset after the last valid * input sequence. - * @bytes_written: the number of bytes stored in the output buffer (not + * @bytes_written: (out): the number of bytes stored in the output buffer (not * including the terminating nul). - * @error: location to store the error occuring, or %NULL to ignore + * @error: location to store the error occurring, or %NULL to ignore * errors. Any of the errors in #GConvertError may occur. * * Converts a string from UTF-8 to the encoding GLib uses for * filenames. Note that on Windows GLib uses UTF-8 for filenames; * on other platforms, this function indirectly depends on the - * current locale. + * [current locale][setlocale]. * - * Return value: The converted string, or %NULL on an error. + * Returns: (array length=bytes_written) (element-type guint8) (transfer full): + * The converted string, or %NULL on an error. **/ gchar* g_filename_from_utf8 (const gchar *utf8string, @@ -1846,17 +1530,17 @@ hostname_validate (const char *hostname) /** * g_filename_from_uri: * @uri: a uri describing a filename (escaped, encoded in ASCII). - * @hostname: Location to store hostname for the URI, or %NULL. + * @hostname: (out) (allow-none): Location to store hostname for the URI, or %NULL. * If there is no hostname in the URI, %NULL will be * stored in this location. - * @error: location to store the error occuring, or %NULL to ignore + * @error: location to store the error occurring, or %NULL to ignore * errors. Any of the errors in #GConvertError may occur. * * Converts an escaped ASCII-encoded URI to a local filename in the * encoding used for filenames. * - * Return value: a newly-allocated string holding the resulting - * filename, or %NULL on an error. + * Returns: (type filename): a newly-allocated string holding + * the resulting filename, or %NULL on an error. **/ gchar * g_filename_from_uri (const gchar *uri, @@ -2008,13 +1692,13 @@ g_filename_from_uri (const gchar *uri, * which is the on-disk file name bytes on Unix, and UTF-8 on * Windows * @hostname: (allow-none): A UTF-8 encoded hostname, or %NULL for none. - * @error: location to store the error occuring, or %NULL to ignore + * @error: location to store the error occurring, or %NULL to ignore * errors. Any of the errors in #GConvertError may occur. * * Converts an absolute filename to an escaped ASCII-encoded URI, with the path * component following Section 3.3. of RFC 2396. * - * Return value: a newly-allocated string holding the resulting + * Returns: a newly-allocated string holding the resulting * URI, or %NULL on an error. **/ gchar * @@ -2087,9 +1771,9 @@ g_filename_to_uri (const gchar *filename, * mime type defined in RFC 2483 into individual URIs, * discarding any comments. The URIs are not validated. * - * Returns: a newly allocated %NULL-terminated list of - * strings holding the individual URIs. The array should - * be freed with g_strfreev(). + * Returns: (transfer full): a newly allocated %NULL-terminated list + * of strings holding the individual URIs. The array should be freed + * with g_strfreev(). * * Since: 2.6 */ @@ -2173,7 +1857,7 @@ g_uri_list_extract_uris (const gchar *uri_list) * This function is preferred over g_filename_display_name() if you know the * whole path, as it allows translation. * - * Return value: a newly allocated string containing + * Returns: a newly allocated string containing * a rendition of the basename of the filename in valid UTF-8 * * Since: 2.6 @@ -2212,7 +1896,7 @@ g_filename_display_basename (const gchar *filename) * g_filename_display_basename(), since that allows location-based * translation of filenames. * - * Return value: a newly allocated string containing + * Returns: a newly allocated string containing * a rendition of the filename in valid UTF-8 * * Since: 2.6