From 11fc5dc3ddedf5268b87d32af351473f9e08d806 Mon Sep 17 00:00:00 2001 From: Jarkko Hietaniemi Date: Mon, 9 Jul 2001 16:14:35 +0000 Subject: [PATCH] More encoding mapping magic. p4raw-id: //depot/perl@11239 --- lib/open.pm | 28 +++++++++++++++++++++++----- 1 file changed, 23 insertions(+), 5 deletions(-) diff --git a/lib/open.pm b/lib/open.pm index 085e770..c6978bb 100644 --- a/lib/open.pm +++ b/lib/open.pm @@ -14,11 +14,12 @@ sub _get_locale_encoding { unless ($@) { $locale_encoding = langinfo(CODESET); } + my $country_language; if (not $locale_encoding && in_locale()) { - if ($ENV{LC_ALL} =~ /^[^.]+\.([^.]+)$/) { - $locale_encoding = $1; - } elsif ($ENV{LANG} =~ /^[^.]+\.([^.]+)$/) { - $locale_encoding = $1; + if ($ENV{LC_ALL} =~ /^([^.]+)\.([^.]+)$/) { + ($country_language, $locale_encoding) = ($1, $2); + } elsif ($ENV{LANG} =~ /^([^.]+)\.([^.]+)$/) { + ($country_language, $locale_encoding) = ($1, $2); } } else { # Could do heuristics based on the country and language @@ -28,6 +29,19 @@ sub _get_locale_encoding { # (the Estonian database would be excellent!) # --jhi } + if (defined $locale_encoding && + $locale_encoding eq 'euc' && + defined $country_language) { + if ($country_language =~ /^ja_JP|japan(?:ese)$/i) { + $locale_encoding = 'eucjp'; + } elsif ($country_language =~ /^ko_KR|korea(?:n)$/i) { + $locale_encoding = 'euckr'; + } elsif ($country_language =~ /^zh_TW|taiwan(?:ese)$/i) { + $locale_encoding = 'euctw'; + } + croak "Locale encoding 'euc' too ambiguous" + if $locale_encoding eq 'euc'; + } } } @@ -50,7 +64,11 @@ sub import { unless defined $locale_encoding; croak "Cannot figure out an encoding to use" unless defined $locale_encoding; - $layer = "encoding($locale_encoding)"; + if ($locale_encoding =~ /^utf-?8$/i) { + $layer = "utf8"; + } else { + $layer = "encoding($locale_encoding)"; + } } unless(PerlIO::Layer::->find($layer)) { carp("Unknown discipline layer '$layer'"); -- 2.7.4