CPP: Implement MappingFileProvider.
authorphilip.liard@gmail.com <philip.liard@gmail.com@ee073f10-1060-11df-b6a4-87a95322a99c>
Thu, 19 Jul 2012 12:49:04 +0000 (12:49 +0000)
committerphilip.liard@gmail.com <philip.liard@gmail.com@ee073f10-1060-11df-b6a4-87a95322a99c>
Thu, 19 Jul 2012 12:49:04 +0000 (12:49 +0000)
Patch contributed by pmezard.

git-svn-id: http://libphonenumber.googlecode.com/svn/trunk@505 ee073f10-1060-11df-b6a4-87a95322a99c

cpp/CMakeLists.txt
cpp/src/phonenumbers/geocoding/mapping_file_provider.cc [new file with mode: 0644]
cpp/src/phonenumbers/geocoding/mapping_file_provider.h [new file with mode: 0644]
cpp/test/phonenumbers/geocoding/mapping_file_provider_test.cc [new file with mode: 0644]

index 00e9b06..6bfed5e 100644 (file)
@@ -182,6 +182,7 @@ set (
   "src/phonenumbers/geocoding/area_code_map.cc"
   "src/phonenumbers/geocoding/default_map_storage.cc"
   "src/phonenumbers/geocoding/geocoding_data.cc"
+  "src/phonenumbers/geocoding/mapping_file_provider.cc"
   "src/phonenumbers/logger.cc"
   "src/phonenumbers/metadata.h"          # Generated by build tools.
   "src/phonenumbers/phonemetadata.pb.cc" # Generated by Protocol Buffers.
@@ -354,6 +355,7 @@ set (TEST_SOURCES
   "test/phonenumbers/geocoding/area_code_map_test.cc"
   "test/phonenumbers/geocoding/geocoding_data_test.cc"
   "test/phonenumbers/geocoding/geocoding_test_data.cc"
+  "test/phonenumbers/geocoding/mapping_file_provider_test.cc"
   "test/phonenumbers/logger_test.cc"
   "test/phonenumbers/phonenumberutil_test.cc"
   "test/phonenumbers/regexp_adapter_test.cc"
diff --git a/cpp/src/phonenumbers/geocoding/mapping_file_provider.cc b/cpp/src/phonenumbers/geocoding/mapping_file_provider.cc
new file mode 100644 (file)
index 0000000..82cb1a8
--- /dev/null
@@ -0,0 +1,175 @@
+// Copyright (C) 2012 The Libphonenumber Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Author: Patrick Mezard
+
+#include "phonenumbers/geocoding/mapping_file_provider.h"
+
+#include <algorithm>
+#include <cstddef>
+#include <cstring>
+#include <sstream>
+#include <string>
+
+#include "phonenumbers/geocoding/geocoding_data.h"
+
+namespace i18n {
+namespace phonenumbers {
+
+using std::string;
+
+namespace {
+
+struct NormalizedLocale {
+  const char* locale;
+  const char* normalized_locale;
+};
+
+const NormalizedLocale kNormalizedLocales[] = {
+  {"zh_TW", "zh_Hant"},
+  {"zh_HK", "zh_Hant"},
+  {"zh_MO", "zh_Hant"},
+};
+
+const char* GetNormalizedLocale(const string& full_locale) {
+  const int size = sizeof(kNormalizedLocales) / sizeof(*kNormalizedLocales);
+  for (int i = 0; i != size; ++i) {
+    if (full_locale.compare(kNormalizedLocales[i].locale) == 0) {
+      return kNormalizedLocales[i].normalized_locale;
+    }
+  }
+  return NULL;
+}
+
+void AppendLocalePart(const string& part, string* full_locale) {
+  if (!part.empty()) {
+    full_locale->append("_");
+    full_locale->append(part);
+  }
+}
+
+void ConstructFullLocale(const string& language, const string& script, const
+                         string& region, string* full_locale) {
+  full_locale->assign(language);
+  AppendLocalePart(script, full_locale);
+  AppendLocalePart(region, full_locale);
+}
+
+// Returns true if s1 comes strictly before s2 in lexicographic order.
+bool IsLowerThan(const char* s1, const char* s2) {
+  return strcmp(s1, s2) < 0;
+}
+
+// Returns true if languages contains language.
+bool HasLanguage(const CountryLanguages* languages, const string& language) {
+  const char** const start = languages->available_languages;
+  const char** const end = start + languages->available_languages_size;
+  const char** const it =
+      std::lower_bound(start, end, language.c_str(), IsLowerThan);
+  return it != end && strcmp(language.c_str(), *it) == 0;
+}
+
+}  // namespace
+
+MappingFileProvider::MappingFileProvider(
+    const int* country_calling_codes, int country_calling_codes_size,
+    country_languages_getter get_country_languages)
+  : country_calling_codes_(country_calling_codes),
+    country_calling_codes_size_(country_calling_codes_size),
+    get_country_languages_(get_country_languages) {
+}
+
+const string& MappingFileProvider::GetFileName(int country_calling_code,
+                                               const string& language,
+                                               const string& script,
+                                               const string& region,
+                                               string* filename) const {
+  filename->clear();
+  if (language.empty()) {
+    return *filename;
+  }
+  const int* const country_calling_codes_end = country_calling_codes_ +
+      country_calling_codes_size_;
+  const int* const it =
+      std::lower_bound(country_calling_codes_,
+                       country_calling_codes_end,
+                       country_calling_code);
+  if (it == country_calling_codes_end || *it != country_calling_code) {
+    return *filename;
+  }
+  const CountryLanguages* const langs =
+      get_country_languages_(it - country_calling_codes_);
+  if (langs->available_languages_size > 0) {
+    string language_code;
+    FindBestMatchingLanguageCode(langs, language, script, region,
+                                 &language_code);
+  if (!language_code.empty()) {
+    std::stringstream filename_buf;
+    filename_buf << country_calling_code << "_" << language_code;
+    *filename = filename_buf.str();
+    }
+  }
+  return *filename;
+}
+
+void MappingFileProvider::FindBestMatchingLanguageCode(
+  const CountryLanguages* languages, const string& language,
+  const string& script, const string& region, string* best_match) const {
+  string full_locale;
+  ConstructFullLocale(language, script, region, &full_locale);
+  const char* const normalized_locale = GetNormalizedLocale(full_locale);
+  if (normalized_locale != NULL) {
+    string normalized_locale_str(normalized_locale);
+    if (HasLanguage(languages, normalized_locale_str)) {
+      best_match->swap(normalized_locale_str);
+      return;
+    }
+  }
+
+  if (HasLanguage(languages, full_locale)) {
+    best_match->swap(full_locale);
+    return;
+  }
+
+  if (script.empty() != region.empty()) {
+    if (HasLanguage(languages, language)) {
+      *best_match = language;
+      return;
+    }
+  } else if (!script.empty() && !region.empty()) {
+    string lang_with_script(language);
+    lang_with_script.append("_");
+    lang_with_script.append(script);
+    if (HasLanguage(languages, lang_with_script)) {
+      best_match->swap(lang_with_script);
+      return;
+    }
+  }
+
+  string lang_with_region(language);
+  lang_with_region.append("_");
+  lang_with_region.append(region);
+  if (HasLanguage(languages, lang_with_region)) {
+    best_match->swap(lang_with_region);
+    return;
+  }
+  if (HasLanguage(languages, language)) {
+    *best_match = language;
+    return;
+  }
+  best_match->clear();
+}
+
+}  // namespace phonenumbers
+}  // namespace i18n
diff --git a/cpp/src/phonenumbers/geocoding/mapping_file_provider.h b/cpp/src/phonenumbers/geocoding/mapping_file_provider.h
new file mode 100644 (file)
index 0000000..1234a5e
--- /dev/null
@@ -0,0 +1,75 @@
+// Copyright (C) 2012 The Libphonenumber Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Author: Patrick Mezard
+
+#ifndef I18N_PHONENUMBERS_GEOCODING_MAPPING_FILE_PROVIDER_H_
+#define I18N_PHONENUMBERS_GEOCODING_MAPPING_FILE_PROVIDER_H_
+
+#include <string>
+
+#include "base/basictypes.h"
+
+namespace i18n {
+namespace phonenumbers {
+
+using std::string;
+
+struct CountryLanguages;
+
+// A utility which knows the data files that are available for the geocoder to
+// use. The data files contain mappings from phone number prefixes to text
+// descriptions, and are organized by country calling code and language that the
+// text descriptions are in.
+class MappingFileProvider {
+ public:
+  typedef const CountryLanguages* (*country_languages_getter)(int index);
+
+  // Initializes a MappingFileProvider with country_calling_codes, a sorted
+  // list of country_calling_code_size calling codes, and a function
+  // get_country_languages(int index) returning the CountryLanguage information
+  // related to the country code at index in country_calling_codes.
+  MappingFileProvider(const int* country_calling_codes,
+                      int country_calling_code_size,
+                      country_languages_getter get_country_languages);
+
+  // Returns the name of the file that contains the mapping data for the
+  // country_calling_code in the language specified, or an empty string if no
+  // such file can be found. language is a two-letter lowercase ISO language
+  // codes as defined by ISO 639-1. script is a four-letter titlecase (the first
+  // letter is uppercase and the rest of the letters are lowercase) ISO script
+  // codes as defined in ISO 15924. region is a two-letter uppercase ISO country
+  // codes as defined by ISO 3166-1.
+  const string& GetFileName(int country_calling_code, const string& language,
+                            const string& script, const string& region, string*
+                            filename) const;
+
+ private:
+  void FindBestMatchingLanguageCode(const CountryLanguages* languages,
+                                    const string& language,
+                                    const string& script,
+                                    const string& region,
+                                    string* best_match) const;
+
+  const int* const country_calling_codes_;
+  const int country_calling_codes_size_;
+  const country_languages_getter get_country_languages_;
+
+  DISALLOW_COPY_AND_ASSIGN(MappingFileProvider);
+};
+
+}  // namespace phonenumbers
+}  // namespace i18n
+
+#endif  // I18N_PHONENUMBERS_GEOCODING_MAPPING_FILE_PROVIDER_H_
diff --git a/cpp/test/phonenumbers/geocoding/mapping_file_provider_test.cc b/cpp/test/phonenumbers/geocoding/mapping_file_provider_test.cc
new file mode 100644 (file)
index 0000000..81057af
--- /dev/null
@@ -0,0 +1,91 @@
+// Copyright (C) 2012 The Libphonenumber Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Author: Patrick Mezard
+
+#include "phonenumbers/geocoding/mapping_file_provider.h"
+
+#include <gtest/gtest.h>  // NOLINT(build/include_order)
+
+#include "phonenumbers/geocoding/geocoding_data.h"
+
+namespace i18n {
+namespace phonenumbers {
+
+using std::string;
+
+namespace {
+
+#define COUNTRY_LANGUAGES(code, languagelist)                             \
+  const char* country_languages_##code[] = languagelist;                  \
+  const CountryLanguages country_##code = {                               \
+    country_languages_##code,                                             \
+    sizeof(country_languages_##code) / sizeof(*country_languages_##code), \
+  };
+
+// Array literals cannot be passed as regular macro arguments, the separating
+// commas are interpreted as macro arguments separators. The following dummy
+// variadic macro wraps the array commas, and appears as a single argument to an
+// outer macro call.
+#define ARRAY_WRAPPER(...) __VA_ARGS__
+
+const int country_calling_codes[] = {1, 41, 65, 86};
+
+const int country_calling_codes_size =
+  sizeof(country_calling_codes) / sizeof(*country_calling_codes);
+
+COUNTRY_LANGUAGES(1,  ARRAY_WRAPPER({"en"}));
+COUNTRY_LANGUAGES(41, ARRAY_WRAPPER({"de", "fr", "it", "rm"}));
+COUNTRY_LANGUAGES(65, ARRAY_WRAPPER({"en", "ms", "ta", "zh_Hans"}));
+COUNTRY_LANGUAGES(86, ARRAY_WRAPPER({"en", "zh", "zh_Hant"}));
+
+const CountryLanguages* country_languages[] = {
+  &country_1,
+  &country_41,
+  &country_65,
+  &country_86,
+};
+
+const CountryLanguages* test_get_country_languages(int index) {
+  return country_languages[index];
+}
+
+}  // namespace
+
+TEST(MappingFileProviderTest, TestGetFileName) {
+  MappingFileProvider provider(country_calling_codes,
+                               country_calling_codes_size,
+                               test_get_country_languages);
+
+  string filename;
+  EXPECT_EQ("1_en", provider.GetFileName(1, "en", "", "", &filename));
+  EXPECT_EQ("1_en", provider.GetFileName(1, "en", "", "US", &filename));
+  EXPECT_EQ("1_en", provider.GetFileName(1, "en", "", "GB", &filename));
+  EXPECT_EQ("41_de", provider.GetFileName(41, "de", "", "CH", &filename));
+  EXPECT_EQ("", provider.GetFileName(44, "en", "", "GB", &filename));
+  EXPECT_EQ("86_zh", provider.GetFileName(86, "zh", "", "", &filename));
+  EXPECT_EQ("86_zh", provider.GetFileName(86, "zh", "Hans", "", &filename));
+  EXPECT_EQ("86_zh", provider.GetFileName(86, "zh", "", "CN", &filename));
+  EXPECT_EQ("", provider.GetFileName(86, "", "", "CN", &filename));
+  EXPECT_EQ("86_zh", provider.GetFileName(86, "zh", "Hans", "CN", &filename));
+  EXPECT_EQ("86_zh", provider.GetFileName(86, "zh", "Hans", "SG", &filename));
+  EXPECT_EQ("86_zh", provider.GetFileName(86, "zh", "", "SG", &filename));
+  EXPECT_EQ("86_zh_Hant", provider.GetFileName(86, "zh", "", "TW", &filename));
+  EXPECT_EQ("86_zh_Hant", provider.GetFileName(86, "zh", "", "HK", &filename));
+  EXPECT_EQ("86_zh_Hant", provider.GetFileName(86, "zh", "Hant", "TW",
+                                               &filename));
+}
+
+}  // namespace phonenumbers
+}  // namespace i18n