From 65ebb05984db179833ff252f547043f32184d893 Mon Sep 17 00:00:00 2001 From: Karl Williamson Date: Sat, 15 Feb 2014 13:23:36 -0700 Subject: [PATCH] Improve fallback during locale initialization If Perl encounters a problem during startup trying to initialize the locales from the environment it has immediately reverted to the "C" locale. This commit generalizes that so it tries each of the applicable environment variables in order of priority until it works, or it gives up and uses the "C" locale. For example, if LC_ALL is set to something that is invalid, but LANG is valid, LANG will be used. This was motivated by trying to get the Windows system default locale used in preference to "C" if all else fails. --- locale.c | 208 +++++++++++++++++++++++++++++++++++++++++------------ pod/perldelta.pod | 11 +++ pod/perllocale.pod | 29 +++++--- t/run/locale.t | 43 ++++++++++- 4 files changed, 234 insertions(+), 57 deletions(-) diff --git a/locale.c b/locale.c index 73fe055..aaa2c07 100644 --- a/locale.c +++ b/locale.c @@ -444,15 +444,29 @@ Perl_init_i18nl10n(pTHX_ int printwarn) #endif /* USE_LOCALE_NUMERIC */ #ifdef __GLIBC__ char * const language = PerlEnv_getenv("LANGUAGE"); +#else + const char * const language = NULL; #endif + /* NULL uses the existing already set up locale */ const char * const setlocale_init = (PerlEnv_getenv("PERL_SKIP_LOCALE_INIT")) ? NULL : ""; + const char* trial_locales[5] = { setlocale_init }; /* 5 = 1 each for "", + LC_ALL, LANG, "", C + */ + unsigned int trial_locales_count = 1; char * const lc_all = PerlEnv_getenv("LC_ALL"); char * const lang = PerlEnv_getenv("LANG"); bool setlocale_failure = FALSE; + unsigned int i; + char *p; + const bool locwarn = (printwarn > 1 || + (printwarn && + (!(p = PerlEnv_getenv("PERL_BADLANG")) || atoi(p)))); bool done = FALSE; + const char *description; + const char *system_default_locale = NULL; #ifndef LOCALE_ENVIRON_REQUIRED @@ -508,42 +522,85 @@ Perl_init_i18nl10n(pTHX_ int printwarn) #endif /* !LOCALE_ENVIRON_REQUIRED */ + /* We try each locale in the list until we get one that works, or exhaust + * the list */ + for (i= 0; i < trial_locales_count; i++) { + const char * trial_locale = trial_locales[i]; + + if (i > 0) { + + /* XXX This is to preserve old behavior for LOCALE_ENVIRON_REQUIRED + * when i==0, but I (khw) don't think that behavior makes much + * sense */ + setlocale_failure = FALSE; + +#ifdef WIN32 + + /* On Windows machines, an entry of "" after the 0th means to use + * the system default locale, which we now proceed to get. */ + if (strEQ(trial_locale, "")) { + unsigned int j; + + /* Note that this may change the locale, but we are going to do + * that anyway just below */ + system_default_locale = setlocale(LC_ALL, ""); + + /* Skip if invalid or it's already on the list of locales to + * try */ + if (! system_default_locale) { + goto next_iteration; + } + for (j = 0; j < trial_locales_count; j++) { + if (strEQ(system_default_locale, trial_locales[j])) { + goto next_iteration; + } + } + + trial_locale = system_default_locale; + } +#endif + } + #ifdef LC_ALL - if (! my_setlocale(LC_ALL, setlocale_init)) + if (! my_setlocale(LC_ALL, trial_locale)) setlocale_failure = TRUE; #endif /* LC_ALL */ if (!setlocale_failure) { #ifdef USE_LOCALE_CTYPE Safefree(curctype); - if (! (curctype = my_setlocale(LC_CTYPE, setlocale_init))) + if (! (curctype = my_setlocale(LC_CTYPE, trial_locale))) setlocale_failure = TRUE; else curctype = savepv(curctype); #endif /* USE_LOCALE_CTYPE */ #ifdef USE_LOCALE_COLLATE Safefree(curcoll); - if (! (curcoll = my_setlocale(LC_COLLATE, setlocale_init))) + if (! (curcoll = my_setlocale(LC_COLLATE, trial_locale))) setlocale_failure = TRUE; else curcoll = savepv(curcoll); #endif /* USE_LOCALE_COLLATE */ #ifdef USE_LOCALE_NUMERIC Safefree(curnum); - if (! (curnum = my_setlocale(LC_NUMERIC, setlocale_init))) + if (! (curnum = my_setlocale(LC_NUMERIC, trial_locale))) setlocale_failure = TRUE; else curnum = savepv(curnum); #endif /* USE_LOCALE_NUMERIC */ + + if (! setlocale_failure) { /* Success */ + break; + } } - if (setlocale_failure) { - char *p; - const bool locwarn = (printwarn > 1 || - (printwarn && - (!(p = PerlEnv_getenv("PERL_BADLANG")) || atoi(p)))); + /* Here, something failed; will need to try a fallback. */ + ok = 0; + + if (i == 0) { + unsigned int j; - if (locwarn) { + if (locwarn) { /* Output failure info only on the first one */ #ifdef LC_ALL PerlIO_printf(Perl_error_log, @@ -612,43 +669,77 @@ Perl_init_i18nl10n(pTHX_ int printwarn) " are supported and installed on your system.\n"); } -#ifdef LC_ALL + /* Calculate what fallback locales to try. We have avoided this + * until we have to, becuase failure is quite unlikely. This will + * usually change the upper bound of the loop we are in. + * + * Since the system's default way of setting the locale has not + * found one that works, We use Perl's defined ordering: LC_ALL, + * LANG, and the C locale. We don't try the same locale twice, so + * don't add to the list if already there. (On POSIX systems, the + * LC_ALL element will likely be a repeat of the 0th element "", + * but there's no harm done by doing it explicitly */ + if (lc_all) { + for (j = 0; j < trial_locales_count; j++) { + if (strEQ(lc_all, trial_locales[j])) { + goto done_lc_all; + } + } + trial_locales[trial_locales_count++] = lc_all; + } + done_lc_all: - if (setlocale(LC_ALL, "C")) { - if (locwarn) - PerlIO_printf(Perl_error_log, - "perl: warning: Falling back to the standard locale (\"C\").\n"); - ok = 0; - } - else { - if (locwarn) - PerlIO_printf(Perl_error_log, - "perl: warning: Failed to fall back to the standard locale (\"C\").\n"); - ok = -1; - } + if (lang) { + for (j = 0; j < trial_locales_count; j++) { + if (strEQ(lang, trial_locales[j])) { + goto done_lang; + } + } + trial_locales[trial_locales_count++] = lang; + } + done_lang: + +#if defined(WIN32) && defined(LC_ALL) + /* For Windows, we also try the system default locale before "C". + * (If there exists a Windows without LC_ALL we skip this because + * it gets too complicated. For those, the "C" is the next + * fallback possibility). The "" is the same as the 0th element of + * the array, but the code at the loop above knows to treat it + * differently when not the 0th */ + trial_locales[trial_locales_count++] = ""; +#endif + + for (j = 0; j < trial_locales_count; j++) { + if (strEQ("C", trial_locales[j])) { + goto done_C; + } + } + trial_locales[trial_locales_count++] = "C"; -#else /* ! LC_ALL */ + done_C: ; + } /* end of first time through the loop */ - if (0 -#ifdef USE_LOCALE_CTYPE - || !(curctype || setlocale(LC_CTYPE, "C")) -#endif /* USE_LOCALE_CTYPE */ -#ifdef USE_LOCALE_COLLATE - || !(curcoll || setlocale(LC_COLLATE, "C")) -#endif /* USE_LOCALE_COLLATE */ -#ifdef USE_LOCALE_NUMERIC - || !(curnum || setlocale(LC_NUMERIC, "C")) -#endif /* USE_LOCALE_NUMERIC */ - ) - { - if (locwarn) - PerlIO_printf(Perl_error_log, - "perl: warning: Cannot fall back to the standard locale (\"C\").\n"); - ok = -1; - } +#ifdef WIN32 + next_iteration: ; +#endif + + } /* end of looping through the trial locales */ + + if (ok < 1) { /* If we tried to fallback */ + const char* msg; + if (! setlocale_failure) { /* fallback succeeded */ + msg = "Falling back to"; + } + else { /* fallback failed */ -#endif /* ! LC_ALL */ + /* We dropped off the end of the loop, so have to decrement i to + * get back to the value the last time through */ + i--; + ok = -1; + msg = "Failed to fall back to"; + + /* To continue, we should use whatever values we've got */ #ifdef USE_LOCALE_CTYPE Safefree(curctype); curctype = savepv(setlocale(LC_CTYPE, NULL)); @@ -661,8 +752,35 @@ Perl_init_i18nl10n(pTHX_ int printwarn) Safefree(curnum); curnum = savepv(setlocale(LC_NUMERIC, NULL)); #endif /* USE_LOCALE_NUMERIC */ - } - else { + } + + if (locwarn) { + const char * description; + const char * name = ""; + if (strEQ(trial_locales[i], "C")) { + description = "the standard locale"; + name = "C"; + } + else if (strEQ(trial_locales[i], "")) { + description = "the system default locale"; + if (system_default_locale) { + name = system_default_locale; + } + } + else { + description = "a fallback locale"; + name = trial_locales[i]; + } + if (name && strNE(name, "")) { + PerlIO_printf(Perl_error_log, + "perl: warning: %s %s (\"%s\").\n", msg, description, name); + } + else { + PerlIO_printf(Perl_error_log, + "perl: warning: %s %s.\n", msg, description); + } + } + } /* End of tried to fallback */ #ifdef USE_LOCALE_CTYPE new_ctype(curctype); @@ -676,8 +794,6 @@ Perl_init_i18nl10n(pTHX_ int printwarn) new_numeric(curnum); #endif /* USE_LOCALE_NUMERIC */ - } - #if defined(USE_PERLIO) && defined(USE_LOCALE_CTYPE) { /* Set PL_utf8locale to TRUE if using PerlIO _and_ diff --git a/pod/perldelta.pod b/pod/perldelta.pod index 490a260..65c3a92 100644 --- a/pod/perldelta.pod +++ b/pod/perldelta.pod @@ -71,6 +71,17 @@ signatures are enabled. See L for details. +=head2 More locale initialization fallback options + +If there was an error with locales during Perl start-up, it immediately +gave up and tried to use the C<"C"> locale. Now it first tries using +other locales given by the environment variables, as detailed in +L. For example, if C and C are +both set, and using the C locale fails, Perl will now try the +C locale, and only if that fails, will it fall back to C<"C">. On +Windows machines, Perl will try, ahead of using C<"C">, the system +default locale if all the locales given by environment variables fail. + =head1 Security XXX Any security-related notices go here. In particular, any security diff --git a/pod/perllocale.pod b/pod/perllocale.pod index be41f0a..47fcb0a 100644 --- a/pod/perllocale.pod +++ b/pod/perllocale.pod @@ -188,8 +188,10 @@ C> is always initialized to the C locale (the C locale is mentioned under L). If there is no valid environment, the current locale is whatever the -system default has been set to. It is likely, but not necessarily, the -"C" locale. +system default has been set to. On POSIX systems, it is likely, but +not necessarily, the "C" locale. On Windows, the default is set via the +computer's SRegional and Language Options>> (or its +current equivalent). The operations that are affected by locale are: @@ -361,8 +363,9 @@ C function: # LC_CTYPE now in locale "French, Canada, codeset ISO 8859-1" setlocale(LC_CTYPE, ""); - # LC_CTYPE now reset to default defined by LC_ALL/LC_CTYPE/LANG - # environment variables. See below for documentation. + # LC_CTYPE now reset to the default defined by the + # LC_ALL/LC_CTYPE/LANG environment variables, or to the system + # default. See below for documentation. # restore the old locale setlocale(LC_CTYPE, $old_locale); @@ -484,11 +487,13 @@ You may encounter the following warning message at Perl startup: This means that your locale settings had LC_ALL set to "En_US" and LANG exists but has no value. Perl tried to believe you but could not. Instead, Perl gave up and fell back to the "C" locale, the default locale -that is supposed to work no matter what. This usually means your locale -settings were wrong, they mention locales your system has never heard -of, or the locale installation in your system has problems (for example, -some system files are broken or missing). There are quick and temporary -fixes to these problems, as well as more thorough and lasting fixes. +that is supposed to work no matter what. (On Windows, it first tries +falling back to the system default locale.) This usually means your +locale settings were wrong, they mention locales your system has never +heard of, or the locale installation in your system has problems (for +example, some system files are broken or missing). There are quick and +temporary fixes to these problems, as well as more thorough and lasting +fixes. =head2 Testing for broken locales @@ -1117,6 +1122,12 @@ The following environment variables are not specific to Perl: They are part of the standardized (ISO C, XPG4, POSIX 1.c) C method for controlling an application's opinion on data. Windows is non-POSIX, but Perl arranges for the following to work as described anyway. +If the locale given by an environment variable is not valid, Perl tries +the next lower one in priority. If none are valid, on Windows, the +system default locale is then tried. If all else fails, the C<"C"> +locale is used. If even that doesn't work, something is badly broken, +but Perl tries to forge ahead with whatever the locale settinga might +be. =over 12 diff --git a/t/run/locale.t b/t/run/locale.t index 6e98526..b678fed 100644 --- a/t/run/locale.t +++ b/t/run/locale.t @@ -186,6 +186,45 @@ EOF } for ($different) { + local $ENV{LC_ALL} = "invalid"; + local $ENV{LC_NUMERIC} = "invalid"; + local $ENV{LANG} = $_; + + # Can't turn off the warnings, so send them to /dev/null + fresh_perl_is(<<'EOF', "$difference", { stderr => "devnull" }, + use locale; + use POSIX qw(locale_h); + setlocale(LC_NUMERIC, ""); + my $in = 4.2; + printf("%g", $in); +EOF + "LANG is used if LC_ALL, LC_NUMERIC are invalid"); + } + + SKIP: { + if ($^O eq 'MSWin32') { + skip("Win32 uses system default locale in preference to \"C\"", 1); + } + else { + for ($different) { + local $ENV{LC_ALL} = "invalid"; + local $ENV{LC_NUMERIC} = "invalid"; + local $ENV{LANG} = "invalid"; + + # Can't turn off the warnings, so send them to /dev/null + fresh_perl_is(<<'EOF', 4.2, { stderr => "devnull" }, + use locale; + use POSIX qw(locale_h); + setlocale(LC_NUMERIC, ""); + my $in = 4.2; + printf("%g", $in); +EOF + 'C locale is used if LC_ALL, LC_NUMERIC, LANG are invalid'); + } + } + } + + for ($different) { local $ENV{LC_NUMERIC} = $_; local $ENV{LC_ALL}; # so it never overrides LC_NUMERIC fresh_perl_is(<<"EOF", @@ -201,7 +240,7 @@ EOF } unless ($comma) { - skip("no locale available where LC_NUMERIC is a comma", 2); + skip("no locale available where LC_NUMERIC is a comma", 3); } else { @@ -270,4 +309,4 @@ EOF } # SKIP -sub last { 16 } +sub last { 18 } -- 2.7.4