static const char unicode_0x394[] = { 0xce, 0x94, 0 };
size_t width;
+ /* We pass "" instead of "C" because some libc's have
+ * non-ASCII default locale for setlocale("") call
+ * (this allows users of such libc to have Unicoded
+ * system without having to mess with env).
+ *
+ * We set LC_CTYPE because (a) we may be called with $LC_CTYPE
+ * value in LANG, not with $LC_ALL, (b) internationalized
+ * LC_NUMERIC and LC_TIME are more PITA than benefit
+ * (for one, some utilities have hard time with comma
+ * used as a fractional separator).
+ */
//TODO: avoid repeated calls by caching last string?
- setlocale(LC_ALL, (LANG && LANG[0]) ? LANG : "C");
+ setlocale(LC_CTYPE, LANG ? LANG : "");
/* In unicode, this is a one character string */
-// can use unicode_strlen(string) too, but otherwise unicode_strlen() is unused
- width = mbstowcs(NULL, unicode_0x394, INT_MAX);
+ width = unicode_strlen(unicode_0x394);
unicode_status = (width == 1 ? UNICODE_ON : UNICODE_OFF);
}
void FAST_FUNC init_unicode(void)
{
- if (unicode_status == UNICODE_UNKNOWN)
- reinit_unicode(getenv("LANG"));
+ /* Some people set only $LC_CTYPE, not $LC_ALL, because they want
+ * only Unicode to be activated on their system, not the whole
+ * shebang of wrong decimal points, strange date formats and so on.
+ */
+ if (unicode_status == UNICODE_UNKNOWN) {
+ char *s = getenv("LC_ALL");
+ if (!s) s = getenv("LC_CTYPE");
+ if (!s) s = getenv("LANG");
+ reinit_unicode(s);
+ }
}
#else
void FAST_FUNC init_unicode(void)
{
- if (unicode_status == UNICODE_UNKNOWN)
- reinit_unicode(getenv("LANG"));
+ if (unicode_status == UNICODE_UNKNOWN) {
+ char *s = getenv("LC_ALL");
+ if (!s) s = getenv("LC_CTYPE");
+ if (!s) s = getenv("LANG");
+ reinit_unicode(s);
+ }
}
# endif
/* The rest is mostly same for libc and for "homegrown" support */
-#if 0 // UNUSED
size_t FAST_FUNC unicode_strlen(const char *string)
{
size_t width = mbstowcs(NULL, string, INT_MAX);
return strlen(string);
return width;
}
-#endif
size_t FAST_FUNC unicode_strwidth(const char *string)
{