1 // Copyright (C) 2012 The Libphonenumber Authors
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
7 // http://www.apache.org/licenses/LICENSE-2.0
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
15 // Author: Patrick Mezard
17 #include "cpp-build/generate_geocoding_data.h"
36 #include "base/basictypes.h"
39 namespace phonenumbers {
47 template <typename ResourceType> class AutoCloser {
49 typedef int (*ReleaseFunction) (ResourceType* resource);
51 AutoCloser(ResourceType** resource, ReleaseFunction release_function)
52 : resource_(resource),
53 release_function_(release_function)
60 ResourceType* get_resource() const {
66 release_function_(*resource_);
72 ResourceType** resource_;
73 ReleaseFunction release_function_;
83 DirEntry(const char* n, DirEntryKinds k)
88 const std::string& name() const { return name_; }
89 DirEntryKinds kind() const { return kind_; }
96 // Lists directory entries in path. "." and ".." are excluded. Returns true on
98 bool ListDirectory(const string& path, vector<DirEntry>* entries) {
100 DIR* dir = opendir(path.c_str());
104 AutoCloser<DIR> dir_closer(&dir, closedir);
105 struct dirent *entry;
106 struct stat entry_stat;
108 while ((entry = readdir(dir))) {
109 if (strcmp(entry->d_name, ".") == 0 || strcmp(entry->d_name, "..") == 0) {
112 const string entry_path = path + "/" + entry->d_name;
113 if (stat(entry_path.c_str(), &entry_stat)) {
116 DirEntryKinds kind = kFile;
117 if (S_ISDIR(entry_stat.st_mode)) {
119 } else if (!S_ISREG(entry_stat.st_mode)) {
122 entries->push_back(DirEntry(entry->d_name, kind));
131 // Returns true if s ends with suffix.
132 bool EndsWith(const string& s, const string& suffix) {
133 if (suffix.length() > s.length()) {
136 return std::equal(suffix.rbegin(), suffix.rend(), s.rbegin());
139 // Converts string to integer, returns true on success.
140 bool StrToInt(const string& s, int32* n) {
141 std::stringstream stream;
147 // Converts integer to string, returns true on success.
148 bool IntToStr(int32 n, string* s) {
149 std::stringstream stream;
155 // Parses the prefix descriptions file at path, clears and fills the output
156 // prefixes phone number prefix to description mapping.
157 // Returns true on success.
158 bool ParsePrefixes(const string& path, map<int32, string>* prefixes) {
160 FILE* input = fopen(path.c_str(), "r");
164 AutoCloser<FILE> input_closer(&input, fclose);
165 const int kMaxLineLength = 2*1024;
166 vector<char> buffer(kMaxLineLength);
167 vector<char>::iterator begin, end, sep;
168 string prefix, description;
170 while (fgets(&buffer[0], buffer.size(), input)) {
171 begin = buffer.begin();
172 end = std::find(begin, buffer.end(), '\0');
177 if (*end != '\n' && !feof(input)) {
178 // A line without LF can only happen at the end of file.
182 // Trim and check for comments.
183 for (; begin != end && std::isspace(*begin); ++begin) {}
184 for (; end != begin && std::isspace(*(end - 1)); --end) {}
185 if (begin == end || *begin == '#') {
189 sep = std::find(begin, end, '|');
193 prefix = string(begin, sep);
194 if (!StrToInt(prefix, &prefix_code)) {
197 (*prefixes)[prefix_code] = string(sep + 1, end);
199 return ferror(input) == 0;
202 // Builds a C string literal from s. The output is enclosed in double-quotes and
203 // care is taken to escape input quotes and non-ASCII or control characters.
208 // "Op""\xc3""\xa9""ra"
209 string MakeStringLiteral(const string& s) {
210 std::stringstream buffer;
212 buffer << std::hex << std::setfill('0');
214 for (string::const_iterator it = s.begin(); it != s.end(); ++it) {
216 if (c >= 32 && c < 127) {
217 if (prev_is_hex == 2) {
226 if (prev_is_hex != 0) {
229 buffer << "\\x" << std::setw(2) << (c < 0 ? c + 256 : c);
237 void WriteStringLiteral(const string& s, FILE* output) {
238 string literal = MakeStringLiteral(s);
239 fprintf(output, "%s", literal.c_str());
242 const char kLicense[] =
243 "// Copyright (C) 2012 The Libphonenumber Authors\n"
245 "// Licensed under the Apache License, Version 2.0 (the \"License\");\n"
246 "// you may not use this file except in compliance with the License.\n"
247 "// You may obtain a copy of the License at\n"
249 "// http://www.apache.org/licenses/LICENSE-2.0\n"
251 "// Unless required by applicable law or agreed to in writing, software\n"
252 "// distributed under the License is distributed on an \"AS IS\" BASIS,\n"
253 "// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or "
255 "// See the License for the specific language governing permissions and\n"
256 "// limitations under the License.\n"
258 "// This file is generated automatically, do not edit it manually.\n"
261 void WriteLicense(FILE* output) {
262 fprintf(output, "%s", kLicense);
265 const char kI18NNS[] = "i18n";
266 const char kPhoneNumbersNS[] = "phonenumbers";
268 void WriteNSHeader(FILE* output) {
269 fprintf(output, "namespace %s {\n", kI18NNS);
270 fprintf(output, "namespace %s {\n", kPhoneNumbersNS);
273 void WriteNSFooter(FILE* output) {
274 fprintf(output, "} // namespace %s\n", kPhoneNumbersNS);
275 fprintf(output, "} // namespace %s\n", kI18NNS);
278 void WriteCppHeader(const string& base_name, FILE* output) {
279 fprintf(output, "#include \"phonenumbers/geocoding/%s.h\"\n",
281 fprintf(output, "\n");
282 fprintf(output, "#include \"phonenumbers/base/basictypes.h\"\n");
283 fprintf(output, "\n");
286 void WriteArrayAndSize(const string& name, FILE* output) {
287 fprintf(output, " %s,\n", name.c_str());
288 fprintf(output, " sizeof(%s)/sizeof(*%s),\n", name.c_str(), name.c_str());
291 // Writes a PrefixDescriptions variable named "name", with its prefixes field
292 // set to "prefixes_name" variable, its descriptions to "desc_name" and its
293 // possible_lengths to "possible_lengths_name":
295 // const PrefixDescriptions ${name} = {
297 // sizeof(${prefix_name})/sizeof(*${prefix_name}),
299 // ${possible_lengths_name},
300 // sizeof(${possible_lengths_name})/sizeof(*${possible_lengths_name}),
303 void WritePrefixDescriptionsDefinition(
304 const string& name, const string& prefixes_name, const string& desc_name,
305 const string& possible_lengths_name, FILE* output) {
306 fprintf(output, "const PrefixDescriptions %s = {\n", name.c_str());
307 WriteArrayAndSize(prefixes_name, output);
308 fprintf(output, " %s,\n", desc_name.c_str());
309 WriteArrayAndSize(possible_lengths_name, output);
310 fprintf(output, "};\n");
313 // Writes prefixes, descriptions and possible_lengths arrays built from the
314 // phone number prefix to description mapping "prefixes". Binds these arrays
315 // in a single PrefixDescriptions variable named "var_name".
317 // const int32 ${var_name}_prefixes[] = {
322 // const char* ${var_name}_descriptions[] = {
327 // const int32 ${var_name}_possible_lengths[] = {
331 // const PrefixDescriptions ${var_name} = {
335 void WritePrefixDescriptions(const string& var_name, const map<int, string>&
336 prefixes, FILE* output) {
337 set<int> possible_lengths;
338 const string prefixes_name = var_name + "_prefixes";
339 fprintf(output, "const int32 %s[] = {\n", prefixes_name.c_str());
340 for (map<int, string>::const_iterator it = prefixes.begin();
341 it != prefixes.end(); ++it) {
342 fprintf(output, " %d,\n", it->first);
343 possible_lengths.insert(static_cast<int>(log10(it->first) + 1));
349 const string desc_name = var_name + "_descriptions";
350 fprintf(output, "const char* %s[] = {\n", desc_name.c_str());
351 for (map<int, string>::const_iterator it = prefixes.begin();
352 it != prefixes.end(); ++it) {
353 fprintf(output, " ");
354 WriteStringLiteral(it->second, output);
355 fprintf(output, ",\n");
361 const string possible_lengths_name = var_name + "_possible_lengths";
362 fprintf(output, "const int32 %s[] = {\n ", possible_lengths_name.c_str());
363 for (set<int>::const_iterator it = possible_lengths.begin();
364 it != possible_lengths.end(); ++it) {
365 fprintf(output, " %d,", *it);
372 WritePrefixDescriptionsDefinition(var_name, prefixes_name, desc_name,
373 possible_lengths_name, output);
374 fprintf(output, "\n");
377 // Writes a pair of arrays mapping prefix language code pairs to
378 // PrefixDescriptions instances. "prefix_var_names" maps language code pairs
379 // to prefix variable names.
381 // const char* prefix_language_code_pairs[] = {
386 // const PrefixDescriptions* prefix_descriptions[] = {
391 void WritePrefixesDescriptions(const map<string, string>& prefix_var_names,
393 fprintf(output, "const char* prefix_language_code_pairs[] = {\n");
394 for (map<string, string>::const_iterator it = prefix_var_names.begin();
395 it != prefix_var_names.end(); ++it) {
396 fprintf(output, " \"%s\",\n", it->first.c_str());
401 "const PrefixDescriptions* prefixes_descriptions[] = {\n");
402 if(prefix_var_names.size() > 0) {
403 for (map<string, string>::const_iterator it = prefix_var_names.begin();
404 it != prefix_var_names.end(); ++it) {
405 fprintf(output, " &%s,\n", it->second.c_str());
408 fprintf(output, "%s", "0");
415 // For each entry in "languages" mapping a country calling code to a set
416 // of available languages, writes a sorted array of languages, then wraps it
417 // into a CountryLanguages instance. Finally, writes a pair of arrays mapping
418 // country calling codes to CountryLanguages instances.
420 // const char* country_1[] = {
425 // const CountryLanguages country_1_languages = {
427 // sizeof(country_1)/sizeof(*country_1),
432 // const CountryLanguages* country_languages[] = {
433 // &country_1_languages,
437 // const int country_calling_codes[] = {
442 bool WriteCountryLanguages(const map<int32, set<string> >& languages,
444 vector<string> country_languages_vars;
445 vector<string> countries;
446 for (map<int32, set<string> >::const_iterator it = languages.begin();
447 it != languages.end(); ++it) {
449 if (!IntToStr(it->first, &country_code)) {
452 const string country_var = "country_" + country_code;
453 fprintf(output, "const char* %s[] = {\n", country_var.c_str());
454 for (set<string>::const_iterator it_lang = it->second.begin();
455 it_lang != it->second.end(); ++it_lang) {
456 fprintf(output, " \"%s\",\n", it_lang->c_str());
462 const string country_languages_var = country_var + "_languages";
463 fprintf(output, "const CountryLanguages %s = {\n",
464 country_languages_var.c_str());
465 WriteArrayAndSize(country_var, output);
469 country_languages_vars.push_back(country_languages_var);
470 countries.push_back(country_code);
473 fprintf(output, "\n" "const CountryLanguages* countries_languages[] = {\n");
474 if( country_languages_vars.size() > 0) {
475 for (vector<string>::const_iterator it_languages_var = country_languages_vars.begin();
476 it_languages_var != country_languages_vars.end(); ++it_languages_var) {
477 fprintf(output, " &%s,\n", it_languages_var->c_str());
480 fprintf(output, "%s", "0");
485 "const int country_calling_codes[] = {\n");
486 for (vector<string>::const_iterator it_country = countries.begin();
487 it_country != countries.end(); ++it_country) {
488 fprintf(output, " %s,\n", it_country->c_str());
496 // Returns a copy of input where all occurences of pattern are replaced with
497 // value. If pattern is empty, input is returned unchanged.
498 string ReplaceAll(const string& input, const string& pattern,
499 const string& value) {
500 if (pattern.size() == 0) {
504 std::back_insert_iterator<string> output = std::back_inserter(replaced);
505 string::const_iterator begin = input.begin(), end = begin;
507 const size_t pos = input.find(pattern, begin - input.begin());
508 if (pos == string::npos) {
509 std::copy(begin, input.end(), output);
512 end = input.begin() + pos;
513 std::copy(begin, end, output);
514 std::copy(value.begin(), value.end(), output);
515 begin = end + pattern.length();
520 // Writes data accessor definitions, prefixed with "accessor_prefix".
521 void WriteAccessorsDefinitions(const string& accessor_prefix, FILE* output) {
523 "const int* get$prefix$_country_calling_codes() {\n"
524 " return country_calling_codes;\n"
527 "int get$prefix$_country_calling_codes_size() {\n"
528 " return sizeof(country_calling_codes)\n"
529 " /sizeof(*country_calling_codes);\n"
532 "const CountryLanguages* get$prefix$_country_languages(int index) {\n"
533 " return countries_languages[index];\n"
536 "const char** get$prefix$_prefix_language_code_pairs() {\n"
537 " return prefix_language_code_pairs;\n"
540 "int get$prefix$_prefix_language_code_pairs_size() {\n"
541 " return sizeof(prefix_language_code_pairs)\n"
542 " /sizeof(*prefix_language_code_pairs);\n"
545 "const PrefixDescriptions* get$prefix$_prefix_descriptions(int index) {\n"
546 " return prefixes_descriptions[index];\n"
548 string defs = ReplaceAll(templ, "$prefix$", accessor_prefix);
549 fprintf(output, "%s", defs.c_str());
552 // Writes geocoding data .cc file. "data_path" is the path of geocoding textual
553 // data directory. "base_name" is the base name of the .h/.cc pair, like
555 bool WriteSource(const string& data_path, const string& base_name,
556 const string& accessor_prefix, FILE* output) {
557 WriteLicense(output);
558 WriteCppHeader(base_name, output);
559 WriteNSHeader(output);
564 // Enumerate language/script directories.
565 map<string, string> prefix_vars;
566 map<int32, set<string> > country_languages;
567 vector<DirEntry> entries;
568 if (!ListDirectory(data_path, &entries)) {
569 fprintf(stderr, "failed to read directory entries");
572 for (vector<DirEntry>::const_iterator it = entries.begin();
573 it != entries.end(); ++it) {
574 if (it->kind() != kDirectory) {
577 // Enumerate country calling code files.
578 const string dir_path = data_path + "/" + it->name();
579 vector<DirEntry> files;
580 if (!ListDirectory(dir_path, &files)) {
581 fprintf(stderr, "failed to read file entries\n");
584 for (vector<DirEntry>::const_iterator it_files = files.begin();
585 it_files != files.end(); ++it_files) {
586 const string fname = it_files->name();
587 if (!EndsWith(fname, ".txt")) {
591 const string country_code_str = fname.substr(0, fname.length() - 4);
592 if (!StrToInt(country_code_str, &country_code)) {
595 const string path = dir_path + "/" + fname;
597 map<int32, string> prefixes;
598 if (!ParsePrefixes(path, &prefixes)) {
602 const string prefix_var = "prefix_" + country_code_str + "_" + it->name();
603 WritePrefixDescriptions(prefix_var, prefixes, output);
604 prefix_vars[country_code_str + "_" + it->name()] = prefix_var;
605 country_languages[country_code].insert(it->name());
608 WritePrefixesDescriptions(prefix_vars, output);
609 if (!WriteCountryLanguages(country_languages, output)) {
612 fprintf(output, "} // namespace\n");
613 fprintf(output, "\n");
614 WriteAccessorsDefinitions(accessor_prefix, output);
615 WriteNSFooter(output);
616 return ferror(output) == 0;
619 int PrintHelp(const string& message) {
620 fprintf(stderr, "error: %s\n", message.c_str());
621 fprintf(stderr, "generate_geocoding_data DATADIR CCPATH");
625 int Main(int argc, const char* argv[]) {
627 return PrintHelp("geocoding data root directory expected");
630 return PrintHelp("output source path expected");
632 string accessor_prefix = "";
634 accessor_prefix = argv[3];
636 const string root_path(argv[1]);
637 string source_path(argv[2]);
638 std::replace(source_path.begin(), source_path.end(), '\\', '/');
639 string base_name = source_path;
640 if (base_name.rfind('/') != string::npos) {
641 base_name = base_name.substr(base_name.rfind('/') + 1);
643 base_name = base_name.substr(0, base_name.rfind('.'));
645 FILE* source_fp = fopen(source_path.c_str(), "w");
647 fprintf(stderr, "failed to open %s\n", source_path.c_str());
650 AutoCloser<FILE> source_closer(&source_fp, fclose);
651 if (!WriteSource(root_path, base_name, accessor_prefix,
658 } // namespace phonenumbers