1 /* gcharset.c - Charset information
3 * Copyright (C) 2011 Red Hat, Inc.
5 * This library is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU Lesser General Public
7 * License as published by the Free Software Foundation; either
8 * version 2.1 of the License, or (at your option) any later version.
10 * This library is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * Lesser General Public License for more details.
15 * You should have received a copy of the GNU Lesser General Public
16 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
22 #include "gcharsetprivate.h"
27 #include "gmessages.h"
28 #include "gstrfuncs.h"
30 #include "gthreadprivate.h"
35 #include "libcharset/libcharset.h"
40 G_LOCK_DEFINE_STATIC (aliases);
45 static GHashTable *alias_hash = NULL;
52 alias_hash = g_hash_table_new (g_str_hash, g_str_equal);
54 aliases = _g_locale_get_charset_aliases ();
55 while (*aliases != '\0')
57 const char *canonical;
59 const char **alias_array;
63 aliases += strlen (aliases) + 1;
65 aliases += strlen (aliases) + 1;
67 alias_array = g_hash_table_lookup (alias_hash, canonical);
70 while (alias_array[count])
74 alias_array = g_renew (const char *, alias_array, count + 2);
75 alias_array[count] = alias;
76 alias_array[count + 1] = NULL;
78 g_hash_table_insert (alias_hash, (char *)canonical, alias_array);
87 /* As an abuse of the alias table, the following routines gets
88 * the charsets that are aliases for the canonical name.
91 _g_charset_get_aliases (const char *canonical_name)
93 GHashTable *alias_hash = get_alias_hash ();
95 return g_hash_table_lookup (alias_hash, canonical_name);
99 g_utf8_get_charset_internal (const char *raw_data,
102 const char *charset = g_getenv ("CHARSET");
104 if (charset && *charset)
108 if (charset && strstr (charset, "UTF-8"))
114 /* The libcharset code tries to be thread-safe without
115 * a lock, but has a memory leak and a missing memory
116 * barrier, so we lock for it
119 charset = _g_locale_charset_unalias (raw_data);
122 if (charset && *charset)
126 if (charset && strstr (charset, "UTF-8"))
132 /* Assume this for compatibility at present. */
138 typedef struct _GCharsetCache GCharsetCache;
140 struct _GCharsetCache {
147 charset_cache_free (gpointer data)
149 GCharsetCache *cache = data;
151 g_free (cache->charset);
157 * @charset: (out) (optional) (transfer none): return location for character set
160 * Obtains the character set for the [current locale][setlocale]; you
161 * might use this character set as an argument to g_convert(), to convert
162 * from the current locale's encoding to some other encoding. (Frequently
163 * g_locale_to_utf8() and g_locale_from_utf8() are nice shortcuts, though.)
165 * On Windows the character set returned by this function is the
166 * so-called system default ANSI code-page. That is the character set
167 * used by the "narrow" versions of C library and Win32 functions that
168 * handle file names. It might be different from the character set
169 * used by the C library's current locale.
171 * On Linux, the character set is found by consulting nl_langinfo() if
172 * available. If not, the environment variables `LC_ALL`, `LC_CTYPE`, `LANG`
173 * and `CHARSET` are queried in order.
175 * The return value is %TRUE if the locale's encoding is UTF-8, in that
176 * case you can perhaps avoid calling g_convert().
178 * The string returned in @charset is not allocated, and should not be
181 * Returns: %TRUE if the returned charset is UTF-8
184 g_get_charset (const char **charset)
186 static GPrivate cache_private = G_PRIVATE_INIT (charset_cache_free);
187 GCharsetCache *cache = g_private_get (&cache_private);
191 cache = g_private_set_alloc0 (&cache_private, sizeof (GCharsetCache));
194 raw = _g_locale_charset_raw ();
197 if (!(cache->raw && strcmp (cache->raw, raw) == 0))
199 const gchar *new_charset;
202 g_free (cache->charset);
203 cache->raw = g_strdup (raw);
204 cache->is_utf8 = g_utf8_get_charset_internal (raw, &new_charset);
205 cache->charset = g_strdup (new_charset);
209 *charset = cache->charset;
211 return cache->is_utf8;
217 * Gets the character set for the current locale.
219 * Returns: a newly allocated string containing the name
220 * of the character set. This string must be freed with g_free().
225 const gchar *charset;
227 g_get_charset (&charset);
229 return g_strdup (charset);
234 /* read an alias file for the locales */
236 read_aliases (const gchar *file,
237 GHashTable *alias_table)
242 fp = fopen (file,"r");
245 while (fgets (buf, 256, fp))
251 /* Line is a comment */
252 if ((buf[0] == '#') || (buf[0] == '\0'))
255 /* Reads first column */
256 for (p = buf, q = NULL; *p; p++) {
257 if ((*p == '\t') || (*p == ' ') || (*p == ':')) {
260 while ((*q == '\t') || (*q == ' ')) {
266 /* The line only had one column */
267 if (!q || *q == '\0')
270 /* Read second column */
271 for (p = q; *p; p++) {
272 if ((*p == '\t') || (*p == ' ')) {
278 /* Add to alias table if necessary */
279 if (!g_hash_table_lookup (alias_table, buf)) {
280 g_hash_table_insert (alias_table, g_strdup (buf), g_strdup (q));
289 unalias_lang (char *lang)
292 static GHashTable *alias_table = NULL;
296 if (g_once_init_enter (&alias_table))
298 GHashTable *table = g_hash_table_new (g_str_hash, g_str_equal);
299 read_aliases ("/usr/share/locale/locale.alias", table);
300 g_once_init_leave (&alias_table, table);
304 while ((p = g_hash_table_lookup (alias_table, lang)) && (strcmp (p, lang) != 0))
309 static gboolean said_before = FALSE;
311 g_warning ("Too many alias levels for a locale, "
312 "may indicate a loop");
321 /* Mask for components of locale spec. The ordering here is from
322 * least significant to most significant
326 COMPONENT_CODESET = 1 << 0,
327 COMPONENT_TERRITORY = 1 << 1,
328 COMPONENT_MODIFIER = 1 << 2
331 /* Break an X/Open style locale specification into components
334 explode_locale (const gchar *locale,
340 const gchar *uscore_pos;
342 const gchar *dot_pos;
346 uscore_pos = strchr (locale, '_');
347 dot_pos = strchr (uscore_pos ? uscore_pos : locale, '.');
348 at_pos = strchr (dot_pos ? dot_pos : (uscore_pos ? uscore_pos : locale), '@');
352 mask |= COMPONENT_MODIFIER;
353 *modifier = g_strdup (at_pos);
356 at_pos = locale + strlen (locale);
360 mask |= COMPONENT_CODESET;
361 *codeset = g_strndup (dot_pos, at_pos - dot_pos);
368 mask |= COMPONENT_TERRITORY;
369 *territory = g_strndup (uscore_pos, dot_pos - uscore_pos);
372 uscore_pos = dot_pos;
374 *language = g_strndup (locale, uscore_pos - locale);
380 * Compute all interesting variants for a given locale name -
381 * by stripping off different components of the value.
383 * For simplicity, we assume that the locale is in
384 * X/Open format: language[_territory][.codeset][@modifier]
386 * TODO: Extend this to handle the CEN format (see the GNUlibc docs)
387 * as well. We could just copy the code from glibc wholesale
388 * but it is big, ugly, and complicated, so I'm reluctant
389 * to do so when this should handle 99% of the time...
392 append_locale_variants (GPtrArray *array,
395 gchar *language = NULL;
396 gchar *territory = NULL;
397 gchar *codeset = NULL;
398 gchar *modifier = NULL;
403 g_return_if_fail (locale != NULL);
405 mask = explode_locale (locale, &language, &territory, &codeset, &modifier);
407 /* Iterate through all possible combinations, from least attractive
408 * to most attractive.
410 for (j = 0; j <= mask; ++j)
414 if ((i & ~mask) == 0)
416 gchar *val = g_strconcat (language,
417 (i & COMPONENT_TERRITORY) ? territory : "",
418 (i & COMPONENT_CODESET) ? codeset : "",
419 (i & COMPONENT_MODIFIER) ? modifier : "",
421 g_ptr_array_add (array, val);
426 if (mask & COMPONENT_CODESET)
428 if (mask & COMPONENT_TERRITORY)
430 if (mask & COMPONENT_MODIFIER)
435 * g_get_locale_variants:
436 * @locale: a locale identifier
438 * Returns a list of derived variants of @locale, which can be used to
439 * e.g. construct locale-dependent filenames or search paths. The returned
440 * list is sorted from most desirable to least desirable.
441 * This function handles territory, charset and extra locale modifiers.
443 * For example, if @locale is "fr_BE", then the returned list
446 * If you need the list of variants for the current locale,
447 * use g_get_language_names().
449 * Returns: (transfer full) (array zero-terminated=1) (element-type utf8): a newly
450 * allocated array of newly allocated strings with the locale variants. Free with
456 g_get_locale_variants (const gchar *locale)
460 g_return_val_if_fail (locale != NULL, NULL);
462 array = g_ptr_array_sized_new (8);
463 append_locale_variants (array, locale);
464 g_ptr_array_add (array, NULL);
466 return (gchar **) g_ptr_array_free (array, FALSE);
469 /* The following is (partly) taken from the gettext package.
470 Copyright (C) 1995, 1996, 1997, 1998 Free Software Foundation, Inc. */
473 guess_category_value (const gchar *category_name)
477 /* The highest priority value is the 'LANGUAGE' environment
478 variable. This is a GNU extension. */
479 retval = g_getenv ("LANGUAGE");
480 if ((retval != NULL) && (retval[0] != '\0'))
483 /* 'LANGUAGE' is not set. So we have to proceed with the POSIX
484 methods of looking to 'LC_ALL', 'LC_xxx', and 'LANG'. On some
485 systems this can be done by the 'setlocale' function itself. */
487 /* Setting of LC_ALL overwrites all other. */
488 retval = g_getenv ("LC_ALL");
489 if ((retval != NULL) && (retval[0] != '\0'))
492 /* Next comes the name of the desired category. */
493 retval = g_getenv (category_name);
494 if ((retval != NULL) && (retval[0] != '\0'))
497 /* Last possibility is the LANG environment variable. */
498 retval = g_getenv ("LANG");
499 if ((retval != NULL) && (retval[0] != '\0'))
502 #ifdef G_PLATFORM_WIN32
503 /* g_win32_getlocale() first checks for LC_ALL, LC_MESSAGES and
504 * LANG, which we already did above. Oh well. The main point of
505 * calling g_win32_getlocale() is to get the thread's locale as used
506 * by Windows and the Microsoft C runtime (in the "English_United
507 * States" format) translated into the Unixish format.
510 char *locale = g_win32_getlocale ();
511 retval = g_intern_string (locale);
520 typedef struct _GLanguageNamesCache GLanguageNamesCache;
522 struct _GLanguageNamesCache {
524 gchar **language_names;
528 language_names_cache_free (gpointer data)
530 GLanguageNamesCache *cache = data;
531 g_free (cache->languages);
532 g_strfreev (cache->language_names);
537 * g_get_language_names:
539 * Computes a list of applicable locale names, which can be used to
540 * e.g. construct locale-dependent filenames or search paths. The returned
541 * list is sorted from most desirable to least desirable and always contains
542 * the default locale "C".
544 * For example, if LANGUAGE=de:en_US, then the returned list is
545 * "de", "en_US", "en", "C".
547 * This function consults the environment variables `LANGUAGE`, `LC_ALL`,
548 * `LC_MESSAGES` and `LANG` to find the list of locales specified by the
551 * Returns: (array zero-terminated=1) (transfer none): a %NULL-terminated array of strings owned by GLib
552 * that must not be modified or freed.
556 const gchar * const *
557 g_get_language_names (void)
559 return g_get_language_names_with_category ("LC_MESSAGES");
563 * g_get_language_names_with_category:
564 * @category_name: a locale category name
566 * Computes a list of applicable locale names with a locale category name,
567 * which can be used to construct the fallback locale-dependent filenames
568 * or search paths. The returned list is sorted from most desirable to
569 * least desirable and always contains the default locale "C".
571 * This function consults the environment variables `LANGUAGE`, `LC_ALL`,
572 * @category_name, and `LANG` to find the list of locales specified by the
575 * g_get_language_names() returns g_get_language_names_with_category("LC_MESSAGES").
577 * Returns: (array zero-terminated=1) (transfer none): a %NULL-terminated array of strings owned by
578 * the thread g_get_language_names_with_category was called from.
579 * It must not be modified or freed. It must be copied if planned to be used in another thread.
583 const gchar * const *
584 g_get_language_names_with_category (const gchar *category_name)
586 static GPrivate cache_private = G_PRIVATE_INIT ((void (*)(gpointer)) g_hash_table_unref);
587 GHashTable *cache = g_private_get (&cache_private);
588 const gchar *languages;
589 GLanguageNamesCache *name_cache;
591 g_return_val_if_fail (category_name != NULL, NULL);
595 cache = g_hash_table_new_full (g_str_hash, g_str_equal,
596 g_free, language_names_cache_free);
597 g_private_set (&cache_private, cache);
600 languages = guess_category_value (category_name);
604 name_cache = (GLanguageNamesCache *) g_hash_table_lookup (cache, category_name);
605 if (!(name_cache && name_cache->languages &&
606 strcmp (name_cache->languages, languages) == 0))
611 g_hash_table_remove (cache, category_name);
613 array = g_ptr_array_sized_new (8);
615 alist = g_strsplit (languages, ":", 0);
616 for (a = alist; *a; a++)
617 append_locale_variants (array, unalias_lang (*a));
619 g_ptr_array_add (array, g_strdup ("C"));
620 g_ptr_array_add (array, NULL);
622 name_cache = g_new0 (GLanguageNamesCache, 1);
623 name_cache->languages = g_strdup (languages);
624 name_cache->language_names = (gchar **) g_ptr_array_free (array, FALSE);
625 g_hash_table_insert (cache, g_strdup (category_name), name_cache);
628 return (const gchar * const *) name_cache->language_names;