1 // Copyright (C) 2012 The Libphonenumber Authors
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
7 // http://www.apache.org/licenses/LICENSE-2.0
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
15 // Author: Patrick Mezard
17 #include "cpp-build/generate_geocoding_data.h"
35 #include "base/basictypes.h"
38 namespace phonenumbers {
46 template <typename ResourceType> class AutoCloser {
48 typedef int (*ReleaseFunction) (ResourceType* resource);
50 AutoCloser(ResourceType** resource, ReleaseFunction release_function)
51 : resource_(resource),
52 release_function_(release_function)
59 ResourceType* get_resource() const {
65 release_function_(*resource_);
71 ResourceType** resource_;
72 ReleaseFunction release_function_;
82 DirEntry(const char* n, DirEntryKinds k)
87 const std::string& name() const { return name_; }
88 DirEntryKinds kind() const { return kind_; }
95 // Lists directory entries in path. "." and ".." are excluded. Returns true on
97 bool ListDirectory(const string& path, vector<DirEntry>* entries) {
99 DIR* dir = opendir(path.c_str());
103 AutoCloser<DIR> dir_closer(&dir, closedir);
104 struct dirent entry, *dir_result;
105 struct stat entry_stat;
107 const int res = readdir_r(dir, &entry, &dir_result);
111 if (dir_result == NULL) {
114 if (strcmp(entry.d_name, ".") == 0 || strcmp(entry.d_name, "..") == 0) {
117 const string entry_path = path + "/" + entry.d_name;
118 if (stat(entry_path.c_str(), &entry_stat)) {
121 DirEntryKinds kind = kFile;
122 if (S_ISDIR(entry_stat.st_mode)) {
124 } else if (!S_ISREG(entry_stat.st_mode)) {
127 entries->push_back(DirEntry(entry.d_name, kind));
131 // Returns true if s ends with suffix.
132 bool EndsWith(const string& s, const string& suffix) {
133 if (suffix.length() > s.length()) {
136 return std::equal(suffix.rbegin(), suffix.rend(), s.rbegin());
139 // Converts string to integer, returns true on success.
140 bool StrToInt(const string& s, int32* n) {
141 std::stringstream stream;
147 // Converts integer to string, returns true on success.
148 bool IntToStr(int32 n, string* s) {
149 std::stringstream stream;
155 // Parses the prefix descriptions file at path, clears and fills the output
156 // prefixes phone number prefix to description mapping.
157 // Returns true on success.
158 bool ParsePrefixes(const string& path, map<int32, string>* prefixes) {
160 FILE* input = fopen(path.c_str(), "r");
164 AutoCloser<FILE> input_closer(&input, fclose);
165 const int kMaxLineLength = 2*1024;
166 vector<char> buffer(kMaxLineLength);
167 vector<char>::iterator begin, end, sep;
168 string prefix, description;
170 while (fgets(&buffer[0], buffer.size(), input)) {
171 begin = buffer.begin();
172 end = std::find(begin, buffer.end(), '\0');
177 if (*end != '\n' && !feof(input)) {
178 // A line without LF can only happen at the end of file.
182 // Trim and check for comments.
183 for (; begin != end && std::isspace(*begin); ++begin) {}
184 for (; end != begin && std::isspace(*(end - 1)); --end) {}
185 if (begin == end || *begin == '#') {
189 sep = std::find(begin, end, '|');
193 prefix = string(begin, sep);
194 if (!StrToInt(prefix, &prefix_code)) {
197 (*prefixes)[prefix_code] = string(sep + 1, end);
199 return ferror(input) == 0;
202 // Builds a C string literal from s. The output is enclosed in double-quotes and
203 // care is taken to escape input quotes and non-ASCII or control characters.
208 // "Op""\xc3""\xa9""ra"
209 string MakeStringLiteral(const string& s) {
210 std::stringstream buffer;
212 buffer << std::hex << std::setfill('0');
214 for (string::const_iterator it = s.begin(); it != s.end(); ++it) {
216 if (c >= 32 && c < 127) {
217 if (prev_is_hex == 2) {
226 if (prev_is_hex != 0) {
229 buffer << "\\x" << std::setw(2) << (c < 0 ? c + 256 : c);
237 void WriteStringLiteral(const string& s, FILE* output) {
238 string literal = MakeStringLiteral(s);
239 fprintf(output, "%s", literal.c_str());
242 const char kLicense[] =
243 "// Copyright (C) 2012 The Libphonenumber Authors\n"
245 "// Licensed under the Apache License, Version 2.0 (the \"License\");\n"
246 "// you may not use this file except in compliance with the License.\n"
247 "// You may obtain a copy of the License at\n"
249 "// http://www.apache.org/licenses/LICENSE-2.0\n"
251 "// Unless required by applicable law or agreed to in writing, software\n"
252 "// distributed under the License is distributed on an \"AS IS\" BASIS,\n"
253 "// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or "
255 "// See the License for the specific language governing permissions and\n"
256 "// limitations under the License.\n"
258 "// This file is generated automatically, do not edit it manually.\n"
261 void WriteLicense(FILE* output) {
262 fprintf(output, "%s", kLicense);
265 const char kI18NNS[] = "i18n";
266 const char kPhoneNumbersNS[] = "phonenumbers";
268 void WriteNSHeader(FILE* output) {
269 fprintf(output, "namespace %s {\n", kI18NNS);
270 fprintf(output, "namespace %s {\n", kPhoneNumbersNS);
273 void WriteNSFooter(FILE* output) {
274 fprintf(output, "} // namespace %s\n", kPhoneNumbersNS);
275 fprintf(output, "} // namespace %s\n", kI18NNS);
278 void WriteCppHeader(const string& base_name, FILE* output) {
279 fprintf(output, "#include \"phonenumbers/geocoding/%s.h\"\n",
281 fprintf(output, "\n");
282 fprintf(output, "#include \"phonenumbers/base/basictypes.h\"\n");
283 fprintf(output, "\n");
286 void WriteArrayAndSize(const string& name, FILE* output) {
287 fprintf(output, " %s,\n", name.c_str());
288 fprintf(output, " sizeof(%s)/sizeof(*%s),\n", name.c_str(), name.c_str());
291 // Writes a PrefixDescriptions variable named "name", with its prefixes field
292 // set to "prefixes_name" variable, its descriptions to "desc_name" and its
293 // possible_lengths to "possible_lengths_name":
295 // const PrefixDescriptions ${name} = {
297 // sizeof(${prefix_name})/sizeof(*${prefix_name}),
299 // ${possible_lengths_name},
300 // sizeof(${possible_lengths_name})/sizeof(*${possible_lengths_name}),
303 void WritePrefixDescriptionsDefinition(
304 const string& name, const string& prefixes_name, const string& desc_name,
305 const string& possible_lengths_name, FILE* output) {
306 fprintf(output, "const PrefixDescriptions %s = {\n", name.c_str());
307 WriteArrayAndSize(prefixes_name, output);
308 fprintf(output, " %s,\n", desc_name.c_str());
309 WriteArrayAndSize(possible_lengths_name, output);
310 fprintf(output, "};\n");
313 // Writes prefixes, descriptions and possible_lengths arrays built from the
314 // phone number prefix to description mapping "prefixes". Binds these arrays
315 // in a single PrefixDescriptions variable named "var_name".
317 // const int32 ${var_name}_prefixes[] = {
322 // const char* ${var_name}_descriptions[] = {
327 // const int32 ${var_name}_possible_lengths[] = {
331 // const PrefixDescriptions ${var_name} = {
335 void WritePrefixDescriptions(const string& var_name, const map<int, string>&
336 prefixes, FILE* output) {
337 set<int> possible_lengths;
338 const string prefixes_name = var_name + "_prefixes";
339 fprintf(output, "const int32 %s[] = {\n", prefixes_name.c_str());
340 for (map<int, string>::const_iterator it = prefixes.begin();
341 it != prefixes.end(); ++it) {
342 fprintf(output, " %d,\n", it->first);
343 possible_lengths.insert(static_cast<int>(log10(it->first) + 1));
349 const string desc_name = var_name + "_descriptions";
350 fprintf(output, "const char* %s[] = {\n", desc_name.c_str());
351 for (map<int, string>::const_iterator it = prefixes.begin();
352 it != prefixes.end(); ++it) {
353 fprintf(output, " ");
354 WriteStringLiteral(it->second, output);
355 fprintf(output, ",\n");
361 const string possible_lengths_name = var_name + "_possible_lengths";
362 fprintf(output, "const int32 %s[] = {\n ", possible_lengths_name.c_str());
363 for (set<int>::const_iterator it = possible_lengths.begin();
364 it != possible_lengths.end(); ++it) {
365 fprintf(output, " %d,", *it);
372 WritePrefixDescriptionsDefinition(var_name, prefixes_name, desc_name,
373 possible_lengths_name, output);
374 fprintf(output, "\n");
377 // Writes a pair of arrays mapping prefix language code pairs to
378 // PrefixDescriptions instances. "prefix_var_names" maps language code pairs
379 // to prefix variable names.
381 // const char* prefix_language_code_pairs[] = {
386 // const PrefixDescriptions* prefix_descriptions[] = {
391 void WritePrefixesDescriptions(const map<string, string>& prefix_var_names,
393 fprintf(output, "const char* prefix_language_code_pairs[] = {\n");
394 for (map<string, string>::const_iterator it = prefix_var_names.begin();
395 it != prefix_var_names.end(); ++it) {
396 fprintf(output, " \"%s\",\n", it->first.c_str());
401 "const PrefixDescriptions* prefixes_descriptions[] = {\n");
402 for (map<string, string>::const_iterator it = prefix_var_names.begin();
403 it != prefix_var_names.end(); ++it) {
404 fprintf(output, " &%s,\n", it->second.c_str());
411 // For each entry in "languages" mapping a country calling code to a set
412 // of available languages, writes a sorted array of languages, then wraps it
413 // into a CountryLanguages instance. Finally, writes a pair of arrays mapping
414 // country calling codes to CountryLanguages instances.
416 // const char* country_1[] = {
421 // const CountryLanguages country_1_languages = {
423 // sizeof(country_1)/sizeof(*country_1),
428 // const CountryLanguages* country_languages[] = {
429 // &country_1_languages,
433 // const int country_calling_codes[] = {
438 bool WriteCountryLanguages(const map<int32, set<string> >& languages,
440 vector<string> country_languages_vars;
441 vector<string> countries;
442 for (map<int32, set<string> >::const_iterator it = languages.begin();
443 it != languages.end(); ++it) {
445 if (!IntToStr(it->first, &country_code)) {
448 const string country_var = "country_" + country_code;
449 fprintf(output, "const char* %s[] = {\n", country_var.c_str());
450 for (set<string>::const_iterator it_lang = it->second.begin();
451 it_lang != it->second.end(); ++it_lang) {
452 fprintf(output, " \"%s\",\n", it_lang->c_str());
458 const string country_languages_var = country_var + "_languages";
459 fprintf(output, "const CountryLanguages %s = {\n",
460 country_languages_var.c_str());
461 WriteArrayAndSize(country_var, output);
465 country_languages_vars.push_back(country_languages_var);
466 countries.push_back(country_code);
471 "const CountryLanguages* countries_languages[] = {\n");
472 for (vector<string>::const_iterator
473 it_languages_var = country_languages_vars.begin();
474 it_languages_var != country_languages_vars.end(); ++it_languages_var) {
475 fprintf(output, " &%s,\n", it_languages_var->c_str());
480 "const int country_calling_codes[] = {\n");
481 for (vector<string>::const_iterator it_country = countries.begin();
482 it_country != countries.end(); ++it_country) {
483 fprintf(output, " %s,\n", it_country->c_str());
491 // Returns a copy of input where all occurences of pattern are replaced with
492 // value. If pattern is empty, input is returned unchanged.
493 string ReplaceAll(const string& input, const string& pattern,
494 const string& value) {
495 if (pattern.size() == 0) {
499 std::back_insert_iterator<string> output = std::back_inserter(replaced);
500 string::const_iterator begin = input.begin(), end = begin;
502 const size_t pos = input.find(pattern, begin - input.begin());
503 if (pos == string::npos) {
504 std::copy(begin, input.end(), output);
507 end = input.begin() + pos;
508 std::copy(begin, end, output);
509 std::copy(value.begin(), value.end(), output);
510 begin = end + pattern.length();
515 // Writes data accessor definitions, prefixed with "accessor_prefix".
516 void WriteAccessorsDefinitions(const string& accessor_prefix, FILE* output) {
518 "const int* get$prefix$_country_calling_codes() {\n"
519 " return country_calling_codes;\n"
522 "int get$prefix$_country_calling_codes_size() {\n"
523 " return sizeof(country_calling_codes)\n"
524 " /sizeof(*country_calling_codes);\n"
527 "const CountryLanguages* get$prefix$_country_languages(int index) {\n"
528 " return countries_languages[index];\n"
531 "const char** get$prefix$_prefix_language_code_pairs() {\n"
532 " return prefix_language_code_pairs;\n"
535 "int get$prefix$_prefix_language_code_pairs_size() {\n"
536 " return sizeof(prefix_language_code_pairs)\n"
537 " /sizeof(*prefix_language_code_pairs);\n"
540 "const PrefixDescriptions* get$prefix$_prefix_descriptions(int index) {\n"
541 " return prefixes_descriptions[index];\n"
543 string defs = ReplaceAll(templ, "$prefix$", accessor_prefix);
544 fprintf(output, "%s", defs.c_str());
547 // Writes geocoding data .cc file. "data_path" is the path of geocoding textual
548 // data directory. "base_name" is the base name of the .h/.cc pair, like
550 bool WriteSource(const string& data_path, const string& base_name,
551 const string& accessor_prefix, FILE* output) {
552 WriteLicense(output);
553 WriteCppHeader(base_name, output);
554 WriteNSHeader(output);
559 // Enumerate language/script directories.
560 map<string, string> prefix_vars;
561 map<int32, set<string> > country_languages;
562 vector<DirEntry> entries;
563 if (!ListDirectory(data_path, &entries)) {
564 fprintf(stderr, "failed to read directory entries");
567 for (vector<DirEntry>::const_iterator it = entries.begin();
568 it != entries.end(); ++it) {
569 if (it->kind() != kDirectory) {
572 // Enumerate country calling code files.
573 const string dir_path = data_path + "/" + it->name();
574 vector<DirEntry> files;
575 if (!ListDirectory(dir_path, &files)) {
576 fprintf(stderr, "failed to read file entries\n");
579 for (vector<DirEntry>::const_iterator it_files = files.begin();
580 it_files != files.end(); ++it_files) {
581 const string fname = it_files->name();
582 if (!EndsWith(fname, ".txt")) {
586 const string country_code_str = fname.substr(0, fname.length() - 4);
587 if (!StrToInt(country_code_str, &country_code)) {
590 const string path = dir_path + "/" + fname;
592 map<int32, string> prefixes;
593 if (!ParsePrefixes(path, &prefixes)) {
597 const string prefix_var = "prefix_" + country_code_str + "_" + it->name();
598 WritePrefixDescriptions(prefix_var, prefixes, output);
599 prefix_vars[country_code_str + "_" + it->name()] = prefix_var;
600 country_languages[country_code].insert(it->name());
603 WritePrefixesDescriptions(prefix_vars, output);
604 if (!WriteCountryLanguages(country_languages, output)) {
607 fprintf(output, "} // namespace\n");
608 fprintf(output, "\n");
609 WriteAccessorsDefinitions(accessor_prefix, output);
610 WriteNSFooter(output);
611 return ferror(output) == 0;
614 int PrintHelp(const string& message) {
615 fprintf(stderr, "error: %s\n", message.c_str());
616 fprintf(stderr, "generate_geocoding_data DATADIR CCPATH");
620 int Main(int argc, const char* argv[]) {
622 return PrintHelp("geocoding data root directory expected");
625 return PrintHelp("output source path expected");
627 string accessor_prefix = "";
629 accessor_prefix = argv[3];
631 const string root_path(argv[1]);
632 string source_path(argv[2]);
633 std::replace(source_path.begin(), source_path.end(), '\\', '/');
634 string base_name = source_path;
635 if (base_name.rfind('/') != string::npos) {
636 base_name = base_name.substr(base_name.rfind('/') + 1);
638 base_name = base_name.substr(0, base_name.rfind('.'));
640 FILE* source_fp = fopen(source_path.c_str(), "w");
642 fprintf(stderr, "failed to open %s\n", source_path.c_str());
645 AutoCloser<FILE> source_closer(&source_fp, fclose);
646 if (!WriteSource(root_path, base_name, accessor_prefix,
653 } // namespace phonenumbers