2 * Copyright (C) 2009 The Libphonenumber Authors
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
17 package com.google.i18n.phonenumbers;
19 import com.google.i18n.phonenumbers.Phonemetadata.NumberFormat;
20 import com.google.i18n.phonenumbers.Phonemetadata.PhoneMetadata;
21 import com.google.i18n.phonenumbers.Phonemetadata.PhoneMetadataCollection;
22 import com.google.i18n.phonenumbers.Phonemetadata.PhoneNumberDesc;
24 import org.w3c.dom.Document;
25 import org.w3c.dom.Element;
26 import org.w3c.dom.NodeList;
29 import java.util.ArrayList;
30 import java.util.List;
32 import java.util.TreeMap;
33 import java.util.logging.Level;
34 import java.util.logging.Logger;
35 import java.util.regex.Pattern;
36 import java.util.regex.PatternSyntaxException;
38 import javax.xml.parsers.DocumentBuilder;
39 import javax.xml.parsers.DocumentBuilderFactory;
42 * Library to build phone number metadata from the XML format.
44 * @author Shaopeng Jia
46 public class BuildMetadataFromXml {
47 private static final Logger LOGGER = Logger.getLogger(BuildMetadataFromXml.class.getName());
49 // String constants used to fetch the XML nodes and attributes.
50 private static final String CARRIER_CODE_FORMATTING_RULE = "carrierCodeFormattingRule";
51 private static final String CARRIER_SPECIFIC = "carrierSpecific";
52 private static final String COUNTRY_CODE = "countryCode";
53 private static final String EMERGENCY = "emergency";
54 private static final String EXAMPLE_NUMBER = "exampleNumber";
55 private static final String FIXED_LINE = "fixedLine";
56 private static final String FORMAT = "format";
57 private static final String GENERAL_DESC = "generalDesc";
58 private static final String INTERNATIONAL_PREFIX = "internationalPrefix";
59 private static final String INTL_FORMAT = "intlFormat";
60 private static final String LEADING_DIGITS = "leadingDigits";
61 private static final String LEADING_ZERO_POSSIBLE = "leadingZeroPossible";
62 private static final String MAIN_COUNTRY_FOR_CODE = "mainCountryForCode";
63 private static final String MOBILE = "mobile";
64 private static final String MOBILE_NUMBER_PORTABLE_REGION = "mobileNumberPortableRegion";
65 private static final String NATIONAL_NUMBER_PATTERN = "nationalNumberPattern";
66 private static final String NATIONAL_PREFIX = "nationalPrefix";
67 private static final String NATIONAL_PREFIX_FORMATTING_RULE = "nationalPrefixFormattingRule";
68 private static final String NATIONAL_PREFIX_OPTIONAL_WHEN_FORMATTING =
69 "nationalPrefixOptionalWhenFormatting";
70 private static final String NATIONAL_PREFIX_FOR_PARSING = "nationalPrefixForParsing";
71 private static final String NATIONAL_PREFIX_TRANSFORM_RULE = "nationalPrefixTransformRule";
72 private static final String NO_INTERNATIONAL_DIALLING = "noInternationalDialling";
73 private static final String NUMBER_FORMAT = "numberFormat";
74 private static final String PAGER = "pager";
75 private static final String PATTERN = "pattern";
76 private static final String PERSONAL_NUMBER = "personalNumber";
77 private static final String POSSIBLE_NUMBER_PATTERN = "possibleNumberPattern";
78 private static final String PREFERRED_EXTN_PREFIX = "preferredExtnPrefix";
79 private static final String PREFERRED_INTERNATIONAL_PREFIX = "preferredInternationalPrefix";
80 private static final String PREMIUM_RATE = "premiumRate";
81 private static final String SHARED_COST = "sharedCost";
82 private static final String SHORT_CODE = "shortCode";
83 private static final String STANDARD_RATE = "standardRate";
84 private static final String TOLL_FREE = "tollFree";
85 private static final String UAN = "uan";
86 private static final String VOICEMAIL = "voicemail";
87 private static final String VOIP = "voip";
89 // Build the PhoneMetadataCollection from the input XML file.
90 public static PhoneMetadataCollection buildPhoneMetadataCollection(String inputXmlFile,
91 boolean liteBuild) throws Exception {
92 DocumentBuilderFactory builderFactory = DocumentBuilderFactory.newInstance();
93 DocumentBuilder builder = builderFactory.newDocumentBuilder();
94 File xmlFile = new File(inputXmlFile);
95 Document document = builder.parse(xmlFile);
96 document.getDocumentElement().normalize();
97 Element rootElement = document.getDocumentElement();
98 NodeList territory = rootElement.getElementsByTagName("territory");
99 PhoneMetadataCollection.Builder metadataCollection = PhoneMetadataCollection.newBuilder();
100 int numOfTerritories = territory.getLength();
101 // TODO: Look for other uses of these constants and possibly pull them out into
102 // a separate constants file.
103 boolean isShortNumberMetadata = inputXmlFile.contains("ShortNumberMetadata");
104 boolean isAlternateFormatsMetadata = inputXmlFile.contains("PhoneNumberAlternateFormats");
105 for (int i = 0; i < numOfTerritories; i++) {
106 Element territoryElement = (Element) territory.item(i);
107 String regionCode = "";
108 // For the main metadata file this should always be set, but for other supplementary data
109 // files the country calling code may be all that is needed.
110 if (territoryElement.hasAttribute("id")) {
111 regionCode = territoryElement.getAttribute("id");
113 PhoneMetadata metadata = loadCountryMetadata(regionCode, territoryElement, liteBuild,
114 isShortNumberMetadata, isAlternateFormatsMetadata);
115 metadataCollection.addMetadata(metadata);
117 return metadataCollection.build();
120 // Build a mapping from a country calling code to the region codes which denote the country/region
121 // represented by that country code. In the case of multiple countries sharing a calling code,
122 // such as the NANPA countries, the one indicated with "isMainCountryForCode" in the metadata
124 public static Map<Integer, List<String>> buildCountryCodeToRegionCodeMap(
125 PhoneMetadataCollection metadataCollection) {
126 Map<Integer, List<String>> countryCodeToRegionCodeMap =
127 new TreeMap<Integer, List<String>>();
128 for (PhoneMetadata metadata : metadataCollection.getMetadataList()) {
129 String regionCode = metadata.getId();
130 int countryCode = metadata.getCountryCode();
131 if (countryCodeToRegionCodeMap.containsKey(countryCode)) {
132 if (metadata.getMainCountryForCode()) {
133 countryCodeToRegionCodeMap.get(countryCode).add(0, regionCode);
135 countryCodeToRegionCodeMap.get(countryCode).add(regionCode);
138 // For most countries, there will be only one region code for the country calling code.
139 List<String> listWithRegionCode = new ArrayList<String>(1);
140 if (!regionCode.isEmpty()) { // For alternate formats, there are no region codes at all.
141 listWithRegionCode.add(regionCode);
143 countryCodeToRegionCodeMap.put(countryCode, listWithRegionCode);
146 return countryCodeToRegionCodeMap;
149 private static String validateRE(String regex) {
150 return validateRE(regex, false);
153 // @VisibleForTesting
154 static String validateRE(String regex, boolean removeWhitespace) {
155 // Removes all the whitespace and newline from the regexp. Not using pattern compile options to
156 // make it work across programming languages.
157 String compressedRegex = removeWhitespace ? regex.replaceAll("\\s", "") : regex;
158 Pattern.compile(compressedRegex);
159 // We don't ever expect to see | followed by a ) in our metadata - this would be an indication
160 // of a bug. If one wants to make something optional, we prefer ? to using an empty group.
161 int errorIndex = compressedRegex.indexOf("|)");
162 if (errorIndex >= 0) {
163 LOGGER.log(Level.SEVERE,
164 "Error with original regex: " + regex + "\n| should not be followed directly " +
165 "by ) in phone number regular expressions.");
166 throw new PatternSyntaxException("| followed by )", compressedRegex, errorIndex);
168 // return the regex if it is of correct syntax, i.e. compile did not fail with a
169 // PatternSyntaxException.
170 return compressedRegex;
174 * Returns the national prefix of the provided country element.
176 // @VisibleForTesting
177 static String getNationalPrefix(Element element) {
178 return element.hasAttribute(NATIONAL_PREFIX) ? element.getAttribute(NATIONAL_PREFIX) : "";
181 // @VisibleForTesting
182 static PhoneMetadata.Builder loadTerritoryTagMetadata(String regionCode, Element element,
183 String nationalPrefix) {
184 PhoneMetadata.Builder metadata = PhoneMetadata.newBuilder();
185 metadata.setId(regionCode);
186 if (element.hasAttribute(COUNTRY_CODE)) {
187 metadata.setCountryCode(Integer.parseInt(element.getAttribute(COUNTRY_CODE)));
189 if (element.hasAttribute(LEADING_DIGITS)) {
190 metadata.setLeadingDigits(validateRE(element.getAttribute(LEADING_DIGITS)));
192 metadata.setInternationalPrefix(validateRE(element.getAttribute(INTERNATIONAL_PREFIX)));
193 if (element.hasAttribute(PREFERRED_INTERNATIONAL_PREFIX)) {
194 String preferredInternationalPrefix = element.getAttribute(PREFERRED_INTERNATIONAL_PREFIX);
195 metadata.setPreferredInternationalPrefix(preferredInternationalPrefix);
197 if (element.hasAttribute(NATIONAL_PREFIX_FOR_PARSING)) {
198 metadata.setNationalPrefixForParsing(
199 validateRE(element.getAttribute(NATIONAL_PREFIX_FOR_PARSING), true));
200 if (element.hasAttribute(NATIONAL_PREFIX_TRANSFORM_RULE)) {
201 metadata.setNationalPrefixTransformRule(
202 validateRE(element.getAttribute(NATIONAL_PREFIX_TRANSFORM_RULE)));
205 if (!nationalPrefix.isEmpty()) {
206 metadata.setNationalPrefix(nationalPrefix);
207 if (!metadata.hasNationalPrefixForParsing()) {
208 metadata.setNationalPrefixForParsing(nationalPrefix);
211 if (element.hasAttribute(PREFERRED_EXTN_PREFIX)) {
212 metadata.setPreferredExtnPrefix(element.getAttribute(PREFERRED_EXTN_PREFIX));
214 if (element.hasAttribute(MAIN_COUNTRY_FOR_CODE)) {
215 metadata.setMainCountryForCode(true);
217 if (element.hasAttribute(LEADING_ZERO_POSSIBLE)) {
218 metadata.setLeadingZeroPossible(true);
220 if (element.hasAttribute(MOBILE_NUMBER_PORTABLE_REGION)) {
221 metadata.setMobileNumberPortableRegion(true);
227 * Extracts the pattern for international format. If there is no intlFormat, default to using the
228 * national format. If the intlFormat is set to "NA" the intlFormat should be ignored.
230 * @throws RuntimeException if multiple intlFormats have been encountered.
231 * @return whether an international number format is defined.
233 // @VisibleForTesting
234 static boolean loadInternationalFormat(PhoneMetadata.Builder metadata,
235 Element numberFormatElement,
236 NumberFormat nationalFormat) {
237 NumberFormat.Builder intlFormat = NumberFormat.newBuilder();
238 setLeadingDigitsPatterns(numberFormatElement, intlFormat);
239 intlFormat.setPattern(numberFormatElement.getAttribute(PATTERN));
240 NodeList intlFormatPattern = numberFormatElement.getElementsByTagName(INTL_FORMAT);
241 boolean hasExplicitIntlFormatDefined = false;
243 if (intlFormatPattern.getLength() > 1) {
244 LOGGER.log(Level.SEVERE,
245 "A maximum of one intlFormat pattern for a numberFormat element should be " +
247 throw new RuntimeException("Invalid number of intlFormat patterns for country: " +
249 } else if (intlFormatPattern.getLength() == 0) {
250 // Default to use the same as the national pattern if none is defined.
251 intlFormat.mergeFrom(nationalFormat);
253 String intlFormatPatternValue = intlFormatPattern.item(0).getFirstChild().getNodeValue();
254 if (!intlFormatPatternValue.equals("NA")) {
255 intlFormat.setFormat(intlFormatPatternValue);
257 hasExplicitIntlFormatDefined = true;
260 if (intlFormat.hasFormat()) {
261 metadata.addIntlNumberFormat(intlFormat);
263 return hasExplicitIntlFormatDefined;
267 * Extracts the pattern for the national format.
269 * @throws RuntimeException if multiple or no formats have been encountered.
271 // @VisibleForTesting
272 static void loadNationalFormat(PhoneMetadata.Builder metadata, Element numberFormatElement,
273 NumberFormat.Builder format) {
274 setLeadingDigitsPatterns(numberFormatElement, format);
275 format.setPattern(validateRE(numberFormatElement.getAttribute(PATTERN)));
277 NodeList formatPattern = numberFormatElement.getElementsByTagName(FORMAT);
278 if (formatPattern.getLength() != 1) {
279 LOGGER.log(Level.SEVERE,
280 "Only one format pattern for a numberFormat element should be defined.");
281 throw new RuntimeException("Invalid number of format patterns for country: " +
284 format.setFormat(formatPattern.item(0).getFirstChild().getNodeValue());
288 * Extracts the available formats from the provided DOM element. If it does not contain any
289 * nationalPrefixFormattingRule, the one passed-in is retained. The nationalPrefix,
290 * nationalPrefixFormattingRule and nationalPrefixOptionalWhenFormatting values are provided from
291 * the parent (territory) element.
293 // @VisibleForTesting
294 static void loadAvailableFormats(PhoneMetadata.Builder metadata,
295 Element element, String nationalPrefix,
296 String nationalPrefixFormattingRule,
297 boolean nationalPrefixOptionalWhenFormatting) {
298 String carrierCodeFormattingRule = "";
299 if (element.hasAttribute(CARRIER_CODE_FORMATTING_RULE)) {
300 carrierCodeFormattingRule = validateRE(
301 getDomesticCarrierCodeFormattingRuleFromElement(element, nationalPrefix));
303 NodeList numberFormatElements = element.getElementsByTagName(NUMBER_FORMAT);
304 boolean hasExplicitIntlFormatDefined = false;
306 int numOfFormatElements = numberFormatElements.getLength();
307 if (numOfFormatElements > 0) {
308 for (int i = 0; i < numOfFormatElements; i++) {
309 Element numberFormatElement = (Element) numberFormatElements.item(i);
310 NumberFormat.Builder format = NumberFormat.newBuilder();
312 if (numberFormatElement.hasAttribute(NATIONAL_PREFIX_FORMATTING_RULE)) {
313 format.setNationalPrefixFormattingRule(
314 getNationalPrefixFormattingRuleFromElement(numberFormatElement, nationalPrefix));
316 format.setNationalPrefixFormattingRule(nationalPrefixFormattingRule);
319 if (format.hasNationalPrefixFormattingRule()) {
320 if (numberFormatElement.hasAttribute(NATIONAL_PREFIX_OPTIONAL_WHEN_FORMATTING)) {
321 format.setNationalPrefixOptionalWhenFormatting(
322 Boolean.valueOf(numberFormatElement.getAttribute(
323 NATIONAL_PREFIX_OPTIONAL_WHEN_FORMATTING)));
325 format.setNationalPrefixOptionalWhenFormatting(nationalPrefixOptionalWhenFormatting);
328 if (numberFormatElement.hasAttribute(CARRIER_CODE_FORMATTING_RULE)) {
329 format.setDomesticCarrierCodeFormattingRule(validateRE(
330 getDomesticCarrierCodeFormattingRuleFromElement(numberFormatElement,
333 format.setDomesticCarrierCodeFormattingRule(carrierCodeFormattingRule);
335 loadNationalFormat(metadata, numberFormatElement, format);
336 metadata.addNumberFormat(format);
338 if (loadInternationalFormat(metadata, numberFormatElement, format.build())) {
339 hasExplicitIntlFormatDefined = true;
342 // Only a small number of regions need to specify the intlFormats in the xml. For the majority
343 // of countries the intlNumberFormat metadata is an exact copy of the national NumberFormat
344 // metadata. To minimize the size of the metadata file, we only keep intlNumberFormats that
345 // actually differ in some way to the national formats.
346 if (!hasExplicitIntlFormatDefined) {
347 metadata.clearIntlNumberFormat();
352 // @VisibleForTesting
353 static void setLeadingDigitsPatterns(Element numberFormatElement, NumberFormat.Builder format) {
354 NodeList leadingDigitsPatternNodes = numberFormatElement.getElementsByTagName(LEADING_DIGITS);
355 int numOfLeadingDigitsPatterns = leadingDigitsPatternNodes.getLength();
356 if (numOfLeadingDigitsPatterns > 0) {
357 for (int i = 0; i < numOfLeadingDigitsPatterns; i++) {
358 format.addLeadingDigitsPattern(
359 validateRE((leadingDigitsPatternNodes.item(i)).getFirstChild().getNodeValue(), true));
364 // @VisibleForTesting
365 static String getNationalPrefixFormattingRuleFromElement(Element element,
366 String nationalPrefix) {
367 String nationalPrefixFormattingRule = element.getAttribute(NATIONAL_PREFIX_FORMATTING_RULE);
368 // Replace $NP with national prefix and $FG with the first group ($1).
369 nationalPrefixFormattingRule =
370 nationalPrefixFormattingRule.replaceFirst("\\$NP", nationalPrefix)
371 .replaceFirst("\\$FG", "\\$1");
372 return nationalPrefixFormattingRule;
375 // @VisibleForTesting
376 static String getDomesticCarrierCodeFormattingRuleFromElement(Element element,
377 String nationalPrefix) {
378 String carrierCodeFormattingRule = element.getAttribute(CARRIER_CODE_FORMATTING_RULE);
379 // Replace $FG with the first group ($1) and $NP with the national prefix.
380 carrierCodeFormattingRule = carrierCodeFormattingRule.replaceFirst("\\$FG", "\\$1")
381 .replaceFirst("\\$NP", nationalPrefix);
382 return carrierCodeFormattingRule;
385 // @VisibleForTesting
386 static boolean isValidNumberType(String numberType) {
387 return numberType.equals(FIXED_LINE) || numberType.equals(MOBILE) ||
388 numberType.equals(GENERAL_DESC);
392 * Processes a phone number description element from the XML file and returns it as a
393 * PhoneNumberDesc. If the description element is a fixed line or mobile number, the general
394 * description will be used to fill in the whole element if necessary, or any components that are
395 * missing. For all other types, the general description will only be used to fill in missing
396 * components if the type has a partial definition. For example, if no "tollFree" element exists,
397 * we assume there are no toll free numbers for that locale, and return a phone number description
398 * with "NA" for both the national and possible number patterns.
400 * @param generalDesc a generic phone number description that will be used to fill in missing
401 * parts of the description
402 * @param countryElement the XML element representing all the country information
403 * @param numberType the name of the number type, corresponding to the appropriate tag in the XML
404 * file with information about that type
405 * @return complete description of that phone number type
407 // @VisibleForTesting
408 static PhoneNumberDesc.Builder processPhoneNumberDescElement(PhoneNumberDesc.Builder generalDesc,
409 Element countryElement,
412 NodeList phoneNumberDescList = countryElement.getElementsByTagName(numberType);
413 PhoneNumberDesc.Builder numberDesc = PhoneNumberDesc.newBuilder();
414 if (phoneNumberDescList.getLength() == 0 && !isValidNumberType(numberType)) {
415 numberDesc.setNationalNumberPattern("NA");
416 numberDesc.setPossibleNumberPattern("NA");
419 numberDesc.mergeFrom(generalDesc.build());
420 if (phoneNumberDescList.getLength() > 0) {
421 Element element = (Element) phoneNumberDescList.item(0);
422 NodeList possiblePattern = element.getElementsByTagName(POSSIBLE_NUMBER_PATTERN);
423 if (possiblePattern.getLength() > 0) {
424 numberDesc.setPossibleNumberPattern(
425 validateRE(possiblePattern.item(0).getFirstChild().getNodeValue(), true));
428 NodeList validPattern = element.getElementsByTagName(NATIONAL_NUMBER_PATTERN);
429 if (validPattern.getLength() > 0) {
430 numberDesc.setNationalNumberPattern(
431 validateRE(validPattern.item(0).getFirstChild().getNodeValue(), true));
435 NodeList exampleNumber = element.getElementsByTagName(EXAMPLE_NUMBER);
436 if (exampleNumber.getLength() > 0) {
437 numberDesc.setExampleNumber(exampleNumber.item(0).getFirstChild().getNodeValue());
444 // @VisibleForTesting
445 static void setRelevantDescPatterns(PhoneMetadata.Builder metadata, Element element,
446 boolean liteBuild, boolean isShortNumberMetadata) {
447 PhoneNumberDesc.Builder generalDesc = PhoneNumberDesc.newBuilder();
448 generalDesc = processPhoneNumberDescElement(generalDesc, element, GENERAL_DESC, liteBuild);
449 metadata.setGeneralDesc(generalDesc);
451 if (!isShortNumberMetadata) {
452 // Set fields used only by regular length phone numbers.
453 metadata.setFixedLine(
454 processPhoneNumberDescElement(generalDesc, element, FIXED_LINE, liteBuild));
456 processPhoneNumberDescElement(generalDesc, element, MOBILE, liteBuild));
457 metadata.setSharedCost(
458 processPhoneNumberDescElement(generalDesc, element, SHARED_COST, liteBuild));
460 processPhoneNumberDescElement(generalDesc, element, VOIP, liteBuild));
461 metadata.setPersonalNumber(
462 processPhoneNumberDescElement(generalDesc, element, PERSONAL_NUMBER, liteBuild));
464 processPhoneNumberDescElement(generalDesc, element, PAGER, liteBuild));
466 processPhoneNumberDescElement(generalDesc, element, UAN, liteBuild));
467 metadata.setVoicemail(
468 processPhoneNumberDescElement(generalDesc, element, VOICEMAIL, liteBuild));
469 metadata.setNoInternationalDialling(
470 processPhoneNumberDescElement(generalDesc, element, NO_INTERNATIONAL_DIALLING,
472 metadata.setSameMobileAndFixedLinePattern(
473 metadata.getMobile().getNationalNumberPattern().equals(
474 metadata.getFixedLine().getNationalNumberPattern()));
476 // Set fields used only by short numbers.
477 metadata.setStandardRate(
478 processPhoneNumberDescElement(generalDesc, element, STANDARD_RATE, liteBuild));
479 metadata.setShortCode(
480 processPhoneNumberDescElement(generalDesc, element, SHORT_CODE, liteBuild));
481 metadata.setCarrierSpecific(
482 processPhoneNumberDescElement(generalDesc, element, CARRIER_SPECIFIC, liteBuild));
483 metadata.setEmergency(
484 processPhoneNumberDescElement(generalDesc, element, EMERGENCY, liteBuild));
487 // Set fields used by both regular length and short numbers.
488 metadata.setTollFree(
489 processPhoneNumberDescElement(generalDesc, element, TOLL_FREE, liteBuild));
490 metadata.setPremiumRate(
491 processPhoneNumberDescElement(generalDesc, element, PREMIUM_RATE, liteBuild));
494 // @VisibleForTesting
495 static PhoneMetadata loadCountryMetadata(String regionCode, Element element, boolean liteBuild,
496 boolean isShortNumberMetadata, boolean isAlternateFormatsMetadata) {
497 String nationalPrefix = getNationalPrefix(element);
498 PhoneMetadata.Builder metadata =
499 loadTerritoryTagMetadata(regionCode, element, nationalPrefix);
500 String nationalPrefixFormattingRule =
501 getNationalPrefixFormattingRuleFromElement(element, nationalPrefix);
502 loadAvailableFormats(metadata, element, nationalPrefix.toString(),
503 nationalPrefixFormattingRule.toString(),
504 element.hasAttribute(NATIONAL_PREFIX_OPTIONAL_WHEN_FORMATTING));
505 if (!isAlternateFormatsMetadata) {
506 // The alternate formats metadata does not need most of the patterns to be set.
507 setRelevantDescPatterns(metadata, element, liteBuild, isShortNumberMetadata);
509 return metadata.build();