X-Git-Url: http://review.tizen.org/git/?a=blobdiff_plain;f=glib%2Fgconvert.c;h=1d55fda1342e892ac3a32beae5c5ea923e61a192;hb=14f2376c8a06f0936d734ec6a7e0e836072acf65;hp=bbc15bb143ce5605e461c9378e1f54ac3e4d3351;hpb=4b602940e2c46d9f91acf91f67a6cd0b6ea05e13;p=platform%2Fupstream%2Fglib.git diff --git a/glib/gconvert.c b/glib/gconvert.c index bbc15bb..1d55fda 100644 --- a/glib/gconvert.c +++ b/glib/gconvert.c @@ -15,9 +15,7 @@ * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public - * License along with this library; if not, write to the - * Free Software Foundation, Inc., 59 Temple Place - Suite 330, - * Boston, MA 02111-1307, USA. + * License along with this library; if not, see . */ #include "config.h" @@ -43,7 +41,7 @@ #include "gconvert.h" -#include "gcharset.h" +#include "gcharsetprivate.h" #include "gslist.h" #include "gstrfuncs.h" #include "gtestutils.h" @@ -51,11 +49,6 @@ #include "gunicode.h" #include "gfileutils.h" -#ifdef NEED_ICONV_CACHE -#include "glist.h" -#include "ghash.h" -#endif - #include "glibintl.h" #if defined(USE_LIBICONV_GNU) && !defined (_LIBICONV_H) @@ -72,120 +65,99 @@ * @title: Character Set Conversion * @short_description: convert strings between different character sets * - * The g_convert() family of function wraps the functionality of iconv(). In - * addition to pure character set conversions, GLib has functions to deal - * with the extra complications of encodings for file names. + * The g_convert() family of function wraps the functionality of iconv(). + * In addition to pure character set conversions, GLib has functions to + * deal with the extra complications of encodings for file names. + * + * ## File Name Encodings * - * - * File Name Encodings - * - * Historically, Unix has not had a defined encoding for file - * names: a file name is valid as long as it does not have path - * separators in it ("/"). However, displaying file names may - * require conversion: from the character set in which they were - * created, to the character set in which the application - * operates. Consider the Spanish file name - * "Presentación.sxi". If the - * application which created it uses ISO-8859-1 for its encoding, - * - * + * Historically, UNIX has not had a defined encoding for file names: + * a file name is valid as long as it does not have path separators + * in it ("/"). However, displaying file names may require conversion: + * from the character set in which they were created, to the character + * set in which the application operates. Consider the Spanish file name + * "Presentación.sxi". If the application which created it uses + * ISO-8859-1 for its encoding, + * |[ * Character: P r e s e n t a c i ó n . s x i * Hex code: 50 72 65 73 65 6e 74 61 63 69 f3 6e 2e 73 78 69 - * - * + * ]| * However, if the application use UTF-8, the actual file name on * disk would look like this: - * - * + * |[ * Character: P r e s e n t a c i ó n . s x i * Hex code: 50 72 65 73 65 6e 74 61 63 69 c3 b3 6e 2e 73 78 69 - * - * - * Glib uses UTF-8 for its strings, and GUI toolkits like GTK+ - * that use Glib do the same thing. If you get a file name from - * the file system, for example, from readdir(3) or from g_dir_read_name(), - * and you wish to display the file name to the user, you - * will need to convert it into UTF-8. The - * opposite case is when the user types the name of a file he - * wishes to save: the toolkit will give you that string in - * UTF-8 encoding, and you will need to convert it to the - * character set used for file names before you can create the - * file with open(2) or fopen(3). - * - * + * ]| + * Glib uses UTF-8 for its strings, and GUI toolkits like GTK+ that use + * Glib do the same thing. If you get a file name from the file system, + * for example, from readdir() or from g_dir_read_name(), and you wish + * to display the file name to the user, you will need to convert it + * into UTF-8. The opposite case is when the user types the name of a + * file he wishes to save: the toolkit will give you that string in + * UTF-8 encoding, and you will need to convert it to the character + * set used for file names before you can create the file with open() + * or fopen(). + * * By default, Glib assumes that file names on disk are in UTF-8 - * encoding. This is a valid assumption for file systems which - * were created relatively recently: most applications use UTF-8 + * encoding. This is a valid assumption for file systems which + * were created relatively recently: most applications use UTF-8 * encoding for their strings, and that is also what they use for - * the file names they create. However, older file systems may + * the file names they create. However, older file systems may * still contain file names created in "older" encodings, such as - * ISO-8859-1. In this case, for compatibility reasons, you may - * want to instruct Glib to use that particular encoding for file - * names rather than UTF-8. You can do this by specifying the - * encoding for file names in the G_FILENAME_ENCODING - * environment variable. For example, if your installation uses - * ISO-8859-1 for file names, you can put this in your - * ~/.profile: - * - * + * ISO-8859-1. In this case, for compatibility reasons, you may want + * to instruct Glib to use that particular encoding for file names + * rather than UTF-8. You can do this by specifying the encoding for + * file names in the [`G_FILENAME_ENCODING`][G_FILENAME_ENCODING] + * environment variable. For example, if your installation uses + * ISO-8859-1 for file names, you can put this in your `~/.profile` + * |[ * export G_FILENAME_ENCODING=ISO-8859-1 - * - * + * ]| * Glib provides the functions g_filename_to_utf8() and - * g_filename_from_utf8() to perform the necessary conversions. These - * functions convert file names from the encoding specified in - * G_FILENAME_ENCODING to UTF-8 and vice-versa. - * illustrates how + * g_filename_from_utf8() to perform the necessary conversions. + * These functions convert file names from the encoding specified + * in `G_FILENAME_ENCODING` to UTF-8 and vice-versa. This + * [diagram][file-name-encodings-diagram] illustrates how * these functions are used to convert between UTF-8 and the * encoding for file names in the file system. - * - *
- * Conversion between File Name Encodings - * - *
- * - * Checklist for Application Writers - * + * + * ## Conversion between file name encodings # {#file-name-encodings-diagram) + * + * ![](file-name-encodings.png) + * + * ## Checklist for Application Writers + * * This section is a practical summary of the detailed - * description above. You can use this as a checklist of + * things to do to make sure your applications process file * name encodings correctly. - * - * - * - * If you get a file name from the file system from a function - * such as readdir(3) or gtk_file_chooser_get_filename(), - * you do not need to do any conversion to pass that - * file name to functions like open(2), rename(2), or - * fopen(3) — those are "raw" file names which the file - * system understands. - * - * - * If you need to display a file name, convert it to UTF-8 first by - * using g_filename_to_utf8(). If conversion fails, display a string like - * "Unknown file name". Do not - * convert this string back into the encoding used for file names if you - * wish to pass it to the file system; use the original file name instead. - * For example, the document window of a word processor could display - * "Unknown file name" in its title bar but still let the user save the - * file, as it would keep the raw file name internally. This can happen - * if the user has not set the G_FILENAME_ENCODING - * environment variable even though he has files whose names are not - * encoded in UTF-8. - * - * - * If your user interface lets the user type a file name for saving or - * renaming, convert it to the encoding used for file names in the file - * system by using g_filename_from_utf8(). Pass the converted file name - * to functions like fopen(3). If conversion fails, ask the user to enter - * a different file name. This can happen if the user types Japanese - * characters when G_FILENAME_ENCODING is set to - * ISO-8859-1, for example. - * - * - * - *
+ * + * 1. If you get a file name from the file system from a function + * such as readdir() or gtk_file_chooser_get_filename(), you do + * not need to do any conversion to pass that file name to + * functions like open(), rename(), or fopen() -- those are "raw" + * file names which the file system understands. + * + * 2. If you need to display a file name, convert it to UTF-8 first + * by using g_filename_to_utf8(). If conversion fails, display a + * string like "Unknown file name". Do not convert this string back + * into the encoding used for file names if you wish to pass it to + * the file system; use the original file name instead. + * + * For example, the document window of a word processor could display + * "Unknown file name" in its title bar but still let the user save + * the file, as it would keep the raw file name internally. This + * can happen if the user has not set the `G_FILENAME_ENCODING` + * environment variable even though he has files whose names are + * not encoded in UTF-8. + * + * 3. If your user interface lets the user type a file name for saving + * or renaming, convert it to the encoding used for file names in + * the file system by using g_filename_from_utf8(). Pass the converted + * file name to functions like fopen(). If conversion fails, ask the + * user to enter a different file name. This can happen if the user + * types Japanese characters when `G_FILENAME_ENCODING` is set to + * `ISO-8859-1`, for example. */ /* We try to terminate strings in unknown charsets with this many zero bytes @@ -229,9 +201,6 @@ try_to_aliases (const char **to_aliases, return FALSE; } -G_GNUC_INTERNAL extern const char ** -_g_charset_get_aliases (const char *canonical_name); - /** * g_iconv_open: * @to_codeset: destination codeset @@ -244,7 +213,7 @@ _g_charset_get_aliases (const char *canonical_name); * GLib provides g_convert() and g_locale_to_utf8() which are likely * more convenient than the raw iconv wrappers. * - * Return value: a "conversion descriptor", or (GIConv)-1 if + * Returns: a "conversion descriptor", or (GIConv)-1 if * opening the converter failed. **/ GIConv @@ -296,7 +265,7 @@ g_iconv_open (const gchar *to_codeset, * GLib provides g_convert() and g_locale_to_utf8() which are likely * more convenient than the raw iconv wrappers. * - * Return value: count of non-reversible conversions, or -1 on error + * Returns: count of non-reversible conversions, or -1 on error **/ gsize g_iconv (GIConv converter, @@ -323,7 +292,7 @@ g_iconv (GIConv converter, * GLib provides g_convert() and g_locale_to_utf8() which are likely * more convenient than the raw iconv wrappers. * - * Return value: -1 on error, 0 on success + * Returns: -1 on error, 0 on success **/ gint g_iconv_close (GIConv converter) @@ -333,296 +302,6 @@ g_iconv_close (GIConv converter) return iconv_close (cd); } - -#ifdef NEED_ICONV_CACHE - -#define ICONV_CACHE_SIZE (16) - -struct _iconv_cache_bucket { - gchar *key; - guint32 refcount; - gboolean used; - GIConv cd; -}; - -static GList *iconv_cache_list; -static GHashTable *iconv_cache; -static GHashTable *iconv_open_hash; -static guint iconv_cache_size = 0; -G_LOCK_DEFINE_STATIC (iconv_cache_lock); - -/* caller *must* hold the iconv_cache_lock */ -static void -iconv_cache_init (void) -{ - static gboolean initialized = FALSE; - - if (initialized) - return; - - iconv_cache_list = NULL; - iconv_cache = g_hash_table_new (g_str_hash, g_str_equal); - iconv_open_hash = g_hash_table_new (g_direct_hash, g_direct_equal); - - initialized = TRUE; -} - - -/* - * iconv_cache_bucket_new: - * @key: cache key - * @cd: iconv descriptor - * - * Creates a new cache bucket, inserts it into the cache and - * increments the cache size. - * - * This assumes ownership of @key. - * - * Returns a pointer to the newly allocated cache bucket. - **/ -static struct _iconv_cache_bucket * -iconv_cache_bucket_new (gchar *key, GIConv cd) -{ - struct _iconv_cache_bucket *bucket; - - bucket = g_new (struct _iconv_cache_bucket, 1); - bucket->key = key; - bucket->refcount = 1; - bucket->used = TRUE; - bucket->cd = cd; - - g_hash_table_insert (iconv_cache, bucket->key, bucket); - - /* FIXME: if we sorted the list so items with few refcounts were - first, then we could expire them faster in iconv_cache_expire_unused () */ - iconv_cache_list = g_list_prepend (iconv_cache_list, bucket); - - iconv_cache_size++; - - return bucket; -} - - -/* - * iconv_cache_bucket_expire: - * @node: cache bucket's node - * @bucket: cache bucket - * - * Expires a single cache bucket @bucket. This should only ever be - * called on a bucket that currently has no used iconv descriptors - * open. - * - * @node is not a required argument. If @node is not supplied, we - * search for it ourselves. - **/ -static void -iconv_cache_bucket_expire (GList *node, struct _iconv_cache_bucket *bucket) -{ - g_hash_table_remove (iconv_cache, bucket->key); - - if (node == NULL) - node = g_list_find (iconv_cache_list, bucket); - - g_assert (node != NULL); - - if (node->prev) - { - node->prev->next = node->next; - if (node->next) - node->next->prev = node->prev; - } - else - { - iconv_cache_list = node->next; - if (node->next) - node->next->prev = NULL; - } - - g_list_free_1 (node); - - g_free (bucket->key); - g_iconv_close (bucket->cd); - g_free (bucket); - - iconv_cache_size--; -} - - -/* - * iconv_cache_expire_unused: - * - * Expires as many unused cache buckets as it needs to in order to get - * the total number of buckets < ICONV_CACHE_SIZE. - **/ -static void -iconv_cache_expire_unused (void) -{ - struct _iconv_cache_bucket *bucket; - GList *node, *next; - - node = iconv_cache_list; - while (node && iconv_cache_size >= ICONV_CACHE_SIZE) - { - next = node->next; - - bucket = node->data; - if (bucket->refcount == 0) - iconv_cache_bucket_expire (node, bucket); - - node = next; - } -} - -static GIConv -open_converter (const gchar *to_codeset, - const gchar *from_codeset, - GError **error) -{ - struct _iconv_cache_bucket *bucket; - gchar *key, *dyn_key, auto_key[80]; - GIConv cd; - gsize len_from_codeset, len_to_codeset; - - /* create our key */ - len_from_codeset = strlen (from_codeset); - len_to_codeset = strlen (to_codeset); - if (len_from_codeset + len_to_codeset + 2 < sizeof (auto_key)) - { - key = auto_key; - dyn_key = NULL; - } - else - key = dyn_key = g_malloc (len_from_codeset + len_to_codeset + 2); - memcpy (key, from_codeset, len_from_codeset); - key[len_from_codeset] = ':'; - strcpy (key + len_from_codeset + 1, to_codeset); - - G_LOCK (iconv_cache_lock); - - /* make sure the cache has been initialized */ - iconv_cache_init (); - - bucket = g_hash_table_lookup (iconv_cache, key); - if (bucket) - { - g_free (dyn_key); - - if (bucket->used) - { - cd = g_iconv_open (to_codeset, from_codeset); - if (cd == (GIConv) -1) - goto error; - } - else - { - /* Apparently iconv on Solaris <= 7 segfaults if you pass in - * NULL for anything but inbuf; work around that. (NULL outbuf - * or NULL *outbuf is allowed by Unix98.) - */ - gsize inbytes_left = 0; - gchar *outbuf = NULL; - gsize outbytes_left = 0; - - cd = bucket->cd; - bucket->used = TRUE; - - /* reset the descriptor */ - g_iconv (cd, NULL, &inbytes_left, &outbuf, &outbytes_left); - } - - bucket->refcount++; - } - else - { - cd = g_iconv_open (to_codeset, from_codeset); - if (cd == (GIConv) -1) - { - g_free (dyn_key); - goto error; - } - - iconv_cache_expire_unused (); - - bucket = iconv_cache_bucket_new (dyn_key ? dyn_key : g_strdup (key), cd); - } - - g_hash_table_insert (iconv_open_hash, cd, bucket->key); - - G_UNLOCK (iconv_cache_lock); - - return cd; - - error: - - G_UNLOCK (iconv_cache_lock); - - /* Something went wrong. */ - if (error) - { - if (errno == EINVAL) - g_set_error (error, G_CONVERT_ERROR, G_CONVERT_ERROR_NO_CONVERSION, - _("Conversion from character set '%s' to '%s' is not supported"), - from_codeset, to_codeset); - else - g_set_error (error, G_CONVERT_ERROR, G_CONVERT_ERROR_FAILED, - _("Could not open converter from '%s' to '%s'"), - from_codeset, to_codeset); - } - - return cd; -} - -static int -close_converter (GIConv converter) -{ - struct _iconv_cache_bucket *bucket; - const gchar *key; - GIConv cd; - - cd = converter; - - if (cd == (GIConv) -1) - return 0; - - G_LOCK (iconv_cache_lock); - - key = g_hash_table_lookup (iconv_open_hash, cd); - if (key) - { - g_hash_table_remove (iconv_open_hash, cd); - - bucket = g_hash_table_lookup (iconv_cache, key); - g_assert (bucket); - - bucket->refcount--; - - if (cd == bucket->cd) - bucket->used = FALSE; - else - g_iconv_close (cd); - - if (!bucket->refcount && iconv_cache_size > ICONV_CACHE_SIZE) - { - /* expire this cache bucket */ - iconv_cache_bucket_expire (NULL, bucket); - } - } - else - { - G_UNLOCK (iconv_cache_lock); - - g_warning ("This iconv context wasn't opened using open_converter"); - - return g_iconv_close (converter); - } - - G_UNLOCK (iconv_cache_lock); - - return 0; -} - -#else /* !NEED_ICONV_CACHE */ - static GIConv open_converter (const gchar *to_codeset, const gchar *from_codeset, @@ -660,13 +339,13 @@ close_converter (GIConv cd) return g_iconv_close (cd); } -#endif /* NEED_ICONV_CACHE */ - /** * g_convert_with_iconv: * @str: the string to convert - * @len: the length of the string, or -1 if the string is - * nul-terminated. + * @len: the length of the string in bytes, or -1 if the string is + * nul-terminated (Note that some encodings may allow nul + * bytes to occur inside strings. In that case, using -1 + * for the @len parameter is unsafe) * @converter: conversion descriptor from g_iconv_open() * @bytes_read: location to store the number of bytes in the * input string that were successfully converted, or %NULL. @@ -683,22 +362,17 @@ close_converter (GIConv cd) * * Converts a string from one character set to another. * - * Note that you should use g_iconv() for streaming - * conversions - * + * Note that you should use g_iconv() for streaming conversions. * Despite the fact that @byes_read can return information about partial - * characters, the g_convert_... functions - * are not generally suitable for streaming. If the underlying converter - * being used maintains internal state, then this won't be preserved - * across successive calls to g_convert(), g_convert_with_iconv() or - * g_convert_with_fallback(). (An example of this is the GNU C converter - * for CP1255 which does not emit a base character until it knows that - * the next character is not a mark that could combine with the base - * character.) - * - * . + * characters, the g_convert_... functions are not generally suitable + * for streaming. If the underlying converter maintains internal state, + * then this won't be preserved across successive calls to g_convert(), + * g_convert_with_iconv() or g_convert_with_fallback(). (An example of + * this is the GNU C converter for CP1255 which does not emit a base + * character until it knows that the next character is not a mark that + * could combine with the base character.) * - * Return value: If the conversion was successful, a newly allocated + * Returns: If the conversion was successful, a newly allocated * nul-terminated string, which must be freed with * g_free(). Otherwise %NULL and @error will be set. **/ @@ -821,14 +495,10 @@ g_convert_with_iconv (const gchar *str, /** * g_convert: * @str: the string to convert - * @len: the length of the string, or -1 if the string is - * nul-terminated - - Note that some encodings may allow nul bytes to - occur inside strings. In that case, using -1 for - the @len parameter is unsafe. - - . + * @len: the length of the string in bytes, or -1 if the string is + * nul-terminated (Note that some encodings may allow nul + * bytes to occur inside strings. In that case, using -1 + * for the @len parameter is unsafe) * @to_codeset: name of character set into which to convert @str * @from_codeset: character set of @str. * @bytes_read: (out): location to store the number of bytes in the @@ -846,10 +516,20 @@ g_convert_with_iconv (const gchar *str, * * Converts a string from one character set to another. * - * Note that you should use g_iconv() for streaming - * conversions. + * Note that you should use g_iconv() for streaming conversions. + * Despite the fact that @byes_read can return information about partial + * characters, the g_convert_... functions are not generally suitable + * for streaming. If the underlying converter maintains internal state, + * then this won't be preserved across successive calls to g_convert(), + * g_convert_with_iconv() or g_convert_with_fallback(). (An example of + * this is the GNU C converter for CP1255 which does not emit a base + * character until it knows that the next character is not a mark that + * could combine with the base character.) * - * Return value: If the conversion was successful, a newly allocated + * Using extensions such as "//TRANSLIT" may not work (or may not work + * well) on many platforms. Consider using g_str_to_ascii() instead. + * + * Returns: If the conversion was successful, a newly allocated * nul-terminated string, which must be freed with * g_free(). Otherwise %NULL and @error will be set. **/ @@ -894,8 +574,10 @@ g_convert (const gchar *str, /** * g_convert_with_fallback: * @str: the string to convert - * @len: the length of the string, or -1 if the string is - * nul-terminated. + * @len: the length of the string in bytes, or -1 if the string is + * nul-terminated (Note that some encodings may allow nul + * bytes to occur inside strings. In that case, using -1 + * for the @len parameter is unsafe) * @to_codeset: name of character set into which to convert @str * @from_codeset: character set of @str. * @fallback: UTF-8 string to use in place of character not @@ -921,10 +603,17 @@ g_convert (const gchar *str, * to @to_codeset in their iconv() functions, * in which case GLib will simply return that approximate conversion. * - * Note that you should use g_iconv() for streaming - * conversions. + * Note that you should use g_iconv() for streaming conversions. + * Despite the fact that @byes_read can return information about partial + * characters, the g_convert_... functions are not generally suitable + * for streaming. If the underlying converter maintains internal state, + * then this won't be preserved across successive calls to g_convert(), + * g_convert_with_iconv() or g_convert_with_fallback(). (An example of + * this is the GNU C converter for CP1255 which does not emit a base + * character until it knows that the next character is not a mark that + * could combine with the base character.) * - * Return value: If the conversion was successful, a newly allocated + * Returns: If the conversion was successful, a newly allocated * nul-terminated string, which must be freed with * g_free(). Otherwise %NULL and @error will be set. **/ @@ -1180,7 +869,9 @@ strdup_len (const gchar *string, * @opsysstring: a string in the encoding of the current locale. On Windows * this means the system codepage. * @len: the length of the string, or -1 if the string is - * nul-terminated. + * nul-terminated (Note that some encodings may allow nul + * bytes to occur inside strings. In that case, using -1 + * for the @len parameter is unsafe) * @bytes_read: location to store the number of bytes in the * input string that were successfully converted, or %NULL. * Even if the conversion was successful, this may be @@ -1196,10 +887,10 @@ strdup_len (const gchar *string, * * Converts a string which is in the encoding used for strings by * the C runtime (usually the same as that used by the operating - * system) in the current locale into a - * UTF-8 string. + * system) in the [current locale][setlocale] into a UTF-8 string. * - * Return value: The converted string, or %NULL on an error. + * Returns: A newly-allocated buffer containing the converted string, + * or %NULL on an error, and error will be set. **/ gchar * g_locale_to_utf8 (const gchar *opsysstring, @@ -1221,7 +912,9 @@ g_locale_to_utf8 (const gchar *opsysstring, * g_locale_from_utf8: * @utf8string: a UTF-8 encoded string * @len: the length of the string, or -1 if the string is - * nul-terminated. + * nul-terminated (Note that some encodings may allow nul + * bytes to occur inside strings. In that case, using -1 + * for the @len parameter is unsafe) * @bytes_read: location to store the number of bytes in the * input string that were successfully converted, or %NULL. * Even if the conversion was successful, this may be @@ -1237,10 +930,11 @@ g_locale_to_utf8 (const gchar *opsysstring, * * Converts a string from UTF-8 to the encoding used for strings by * the C runtime (usually the same as that used by the operating - * system) in the current locale. On - * Windows this means the system codepage. + * system) in the [current locale][setlocale]. On Windows this means + * the system codepage. * - * Return value: The converted string, or %NULL on an error. + * Returns: A newly-allocated buffer containing the converted string, + * or %NULL on an error, and error will be set. **/ gchar * g_locale_from_utf8 (const gchar *utf8string, @@ -1287,27 +981,26 @@ filename_charset_cache_free (gpointer data) * representation of a filename, see g_filename_display_name(). * * On Unix, the character sets are determined by consulting the - * environment variables G_FILENAME_ENCODING and - * G_BROKEN_FILENAMES. On Windows, the character set - * used in the GLib API is always UTF-8 and said environment variables - * have no effect. + * environment variables `G_FILENAME_ENCODING` and `G_BROKEN_FILENAMES`. + * On Windows, the character set used in the GLib API is always UTF-8 + * and said environment variables have no effect. * - * G_FILENAME_ENCODING may be set to a comma-separated list - * of character set names. The special token "@locale" is taken to - * mean the character set for the current - * locale. If G_FILENAME_ENCODING is not set, but - * G_BROKEN_FILENAMES is, the character set of the current - * locale is taken as the filename encoding. If neither environment variable - * is set, UTF-8 is taken as the filename encoding, but the character - * set of the current locale is also put in the list of encodings. + * `G_FILENAME_ENCODING` may be set to a comma-separated list of + * character set names. The special token "@locale" is taken + * to mean the character set for the [current locale][setlocale]. + * If `G_FILENAME_ENCODING` is not set, but `G_BROKEN_FILENAMES` is, + * the character set of the current locale is taken as the filename + * encoding. If neither environment variable is set, UTF-8 is taken + * as the filename encoding, but the character set of the current locale + * is also put in the list of encodings. * * The returned @charsets belong to GLib and must not be freed. * * Note that on Unix, regardless of the locale character set or - * G_FILENAME_ENCODING value, the actual file names present + * `G_FILENAME_ENCODING` value, the actual file names present * on a system might be in any random encoding or just gibberish. * - * Return value: %TRUE if the filename encoding is UTF-8. + * Returns: %TRUE if the filename encoding is UTF-8. * * Since: 2.6 */ @@ -1423,7 +1116,9 @@ get_filename_charset (const gchar **filename_charset) * g_filename_to_utf8: * @opsysstring: a string in the encoding for filenames * @len: the length of the string, or -1 if the string is - * nul-terminated. + * nul-terminated (Note that some encodings may allow nul + * bytes to occur inside strings. In that case, using -1 + * for the @len parameter is unsafe) * @bytes_read: location to store the number of bytes in the * input string that were successfully converted, or %NULL. * Even if the conversion was successful, this may be @@ -1440,9 +1135,9 @@ get_filename_charset (const gchar **filename_charset) * Converts a string which is in the encoding used by GLib for * filenames into a UTF-8 string. Note that on Windows GLib uses UTF-8 * for filenames; on other platforms, this function indirectly depends on - * the current locale. + * the [current locale][setlocale]. * - * Return value: The converted string, or %NULL on an error. + * Returns: The converted string, or %NULL on an error. **/ gchar* g_filename_to_utf8 (const gchar *opsysstring, @@ -1496,15 +1191,15 @@ g_filename_to_utf8 (const gchar *opsysstring, * @utf8string: a UTF-8 encoded string. * @len: the length of the string, or -1 if the string is * nul-terminated. - * @bytes_read: location to store the number of bytes in the - * input string that were successfully converted, or %NULL. + * @bytes_read: (out) (allow-none): location to store the number of bytes in + * the input string that were successfully converted, or %NULL. * Even if the conversion was successful, this may be * less than @len if there were partial characters * at the end of the input. If the error * #G_CONVERT_ERROR_ILLEGAL_SEQUENCE occurs, the value * stored will the byte offset after the last valid * input sequence. - * @bytes_written: the number of bytes stored in the output buffer (not + * @bytes_written: (out): the number of bytes stored in the output buffer (not * including the terminating nul). * @error: location to store the error occurring, or %NULL to ignore * errors. Any of the errors in #GConvertError may occur. @@ -1512,9 +1207,10 @@ g_filename_to_utf8 (const gchar *opsysstring, * Converts a string from UTF-8 to the encoding GLib uses for * filenames. Note that on Windows GLib uses UTF-8 for filenames; * on other platforms, this function indirectly depends on the - * current locale. + * [current locale][setlocale]. * - * Return value: The converted string, or %NULL on an error. + * Returns: (array length=bytes_written) (element-type guint8) (transfer full): + * The converted string, or %NULL on an error. **/ gchar* g_filename_from_utf8 (const gchar *utf8string, @@ -1834,7 +1530,7 @@ hostname_validate (const char *hostname) /** * g_filename_from_uri: * @uri: a uri describing a filename (escaped, encoded in ASCII). - * @hostname: (allow-none): Location to store hostname for the URI, or %NULL. + * @hostname: (out) (allow-none): Location to store hostname for the URI, or %NULL. * If there is no hostname in the URI, %NULL will be * stored in this location. * @error: location to store the error occurring, or %NULL to ignore @@ -1843,8 +1539,8 @@ hostname_validate (const char *hostname) * Converts an escaped ASCII-encoded URI to a local filename in the * encoding used for filenames. * - * Return value: a newly-allocated string holding the resulting - * filename, or %NULL on an error. + * Returns: (type filename): a newly-allocated string holding + * the resulting filename, or %NULL on an error. **/ gchar * g_filename_from_uri (const gchar *uri, @@ -2002,7 +1698,7 @@ g_filename_from_uri (const gchar *uri, * Converts an absolute filename to an escaped ASCII-encoded URI, with the path * component following Section 3.3. of RFC 2396. * - * Return value: a newly-allocated string holding the resulting + * Returns: a newly-allocated string holding the resulting * URI, or %NULL on an error. **/ gchar * @@ -2161,7 +1857,7 @@ g_uri_list_extract_uris (const gchar *uri_list) * This function is preferred over g_filename_display_name() if you know the * whole path, as it allows translation. * - * Return value: a newly allocated string containing + * Returns: a newly allocated string containing * a rendition of the basename of the filename in valid UTF-8 * * Since: 2.6 @@ -2200,7 +1896,7 @@ g_filename_display_basename (const gchar *filename) * g_filename_display_basename(), since that allows location-based * translation of filenames. * - * Return value: a newly allocated string containing + * Returns: a newly allocated string containing * a rendition of the filename in valid UTF-8 * * Since: 2.6