+June 10th, 2011
+* Code changes:
+ - Fixes for PhoneNumberMatcher to be more restrictive in valid mode and not match numbers
+ surrounded by Latin characters. This ensures, for example, the string abc123456789acg will not be
+ marked as a phone numbers.
+ - Enable PhoneNumberUtil to handle all digits, rather than a subset
+ - Fix for AYTF issue36 and improvement for US AYTF behaviour.
+* Metadata changes:
+ - Updates: BG, EG, ES, GH, PF, SC, SY, VA
+
May 24th, 2011
* Code changes:
- Phonenumber now implements Serializable.
private boolean maybeCreateNewTemplate() {
// When there are multiple available formats, the formatter uses the first format where a
// formatting template could be created.
- for (NumberFormat numberFormat : possibleFormats) {
+ Iterator<NumberFormat> it = possibleFormats.iterator();
+ while (it.hasNext()) {
+ NumberFormat numberFormat = it.next();
String pattern = numberFormat.getPattern();
if (currentFormattingPattern.equals(pattern)) {
return false;
if (createFormattingTemplate(numberFormat)) {
currentFormattingPattern = pattern;
return true;
+ } else { // Remove the current number format from possibleFormats.
+ it.remove();
}
}
ableToFormat = false;
numberPattern = STANDALONE_DIGIT_PATTERN.matcher(numberPattern).replaceAll("\\\\d");
formattingTemplate.setLength(0);
String tempTemplate = getFormattingTemplate(numberPattern, format.getFormat());
- if (tempTemplate.length() > nationalNumber.length()) {
+ if (tempTemplate.length() > 0) {
formattingTemplate.append(tempTemplate);
return true;
}
Matcher m = regexCache.getPatternForRegex(numberPattern).matcher(longestPhoneNumber);
m.find(); // this will always succeed
String aPhoneNumber = m.group();
+ // No formatting template can be created if the number of digits entered so far is longer than
+ // the maximum the current formatting rule can accommodate.
+ if (aPhoneNumber.length() < nationalNumber.length()) {
+ return "";
+ }
// Formats the number according to numberFormat
String template = aPhoneNumber.replaceAll(numberPattern, numberFormat);
// Replaces each digit with character digitPlaceholder
// version, if nextChar is a digit in non-ASCII format. This method assumes its input is either a
// digit or the plus sign.
private char normalizeAndAccrueDigitsAndPlusSign(char nextChar, boolean rememberPosition) {
+ char normalizedChar;
if (nextChar == PhoneNumberUtil.PLUS_SIGN) {
+ normalizedChar = nextChar;
accruedInputWithoutFormatting.append(nextChar);
} else {
- nextChar = PhoneNumberUtil.DIGIT_MAPPINGS.get(nextChar);
- accruedInputWithoutFormatting.append(nextChar);
- nationalNumber.append(nextChar);
+ int radix = 10;
+ normalizedChar = Character.forDigit(Character.digit(nextChar, radix), radix);
+ accruedInputWithoutFormatting.append(normalizedChar);
+ nationalNumber.append(normalizedChar);
}
if (rememberPosition) {
positionToRemember = accruedInputWithoutFormatting.length();
}
- return nextChar;
+ return normalizedChar;
}
private String inputDigitHelper(char nextChar) {
/** The start index into the text. */
private final int start;
/** The raw substring matched. */
- private final String match;
+ private final String rawString;
/** The matched phone number. */
private final PhoneNumber number;
* Creates a new match.
*
* @param start the start index into the target text
- * @param match the matched substring of the target text
+ * @param rawString the matched substring of the target text
* @param number the matched phone number
*/
- PhoneNumberMatch(int start, String match, PhoneNumber number) {
+ PhoneNumberMatch(int start, String rawString, PhoneNumber number) {
if (start < 0) {
throw new IllegalArgumentException("Start index must be >= 0.");
}
- if (match == null || number == null) {
+ if (rawString == null || number == null) {
throw new NullPointerException();
}
this.start = start;
- this.match = match;
+ this.rawString = rawString;
this.number = number;
}
/** Returns the exclusive end index of the matched phone number within the searched text. */
public int end() {
- return start + match.length();
+ return start + rawString.length();
}
/** Returns the raw string matched as a phone number in the searched text. */
public String rawString() {
- return match;
+ return rawString;
}
@Override
public int hashCode() {
- return Arrays.hashCode(new Object[]{start, match, number});
+ return Arrays.hashCode(new Object[]{start, rawString, number});
}
@Override
return false;
}
PhoneNumberMatch other = (PhoneNumberMatch) obj;
- return match.equals(other.match) && (start == other.start) && number.equals(other.number);
+ return rawString.equals(other.rawString) && (start == other.start) &&
+ number.equals(other.number);
}
@Override
public String toString() {
- return "PhoneNumberMatch [" + start() + "," + end() + ") " + match;
+ return "PhoneNumberMatch [" + start() + "," + end() + ") " + rawString;
}
}
import com.google.i18n.phonenumbers.PhoneNumberUtil.Leniency;
import com.google.i18n.phonenumbers.Phonenumber.PhoneNumber;
+import java.lang.Character.UnicodeBlock;
import java.util.Iterator;
import java.util.NoSuchElementException;
import java.util.regex.Matcher;
*/
private static final Pattern GROUP_SEPARATOR = Pattern.compile("\\p{Z}+");
+ /**
+ * Punctuation that may be at the start of a phone number - brackets and plus signs.
+ */
+ private static final Pattern LEAD_CLASS;
+
static {
/* Builds the MATCHING_BRACKETS and PATTERN regular expressions. The building blocks below exist
* to make the pattern more easily understood. */
* country code. */
int digitBlockLimit =
PhoneNumberUtil.MAX_LENGTH_FOR_NSN + PhoneNumberUtil.MAX_LENGTH_COUNTRY_CODE;
- /* Limit on the number of blocks separated by punctuation. Use digitBlockLimit since in some
+ /* Limit on the number of blocks separated by punctuation. Uses digitBlockLimit since some
* formats use spaces to separate each digit. */
String blockLimit = limit(0, digitBlockLimit);
String punctuation = "[" + PhoneNumberUtil.VALID_PUNCTUATION + "]" + punctuationLimit;
/* A digits block without punctuation. */
String digitSequence = "\\p{Nd}" + limit(1, digitBlockLimit);
- /* Punctuation that may be at the start of a phone number - brackets and plus signs. */
+
String leadClass = "[" + openingParens + PhoneNumberUtil.PLUS_CHARS + "]";
+ LEAD_CLASS = Pattern.compile(leadClass);
/* Phone number pattern allowing optional punctuation. */
PATTERN = Pattern.compile(
}
/** The phone number utility. */
- private final PhoneNumberUtil util;
+ private final PhoneNumberUtil phoneUtil;
/** The text searched for phone numbers. */
private final CharSequence text;
/**
if (maxTries < 0) {
throw new IllegalArgumentException();
}
- this.util = util;
+ this.phoneUtil = util;
this.text = (text != null) ? text : "";
this.preferredRegion = country;
this.leniency = leniency;
}
/**
+ * Helper method to determine if a character is a Latin-script letter or not. For our purposes,
+ * combining marks should also return true since we assume they have been added to a preceding
+ * Latin character.
+ */
+ static boolean isLatinLetter(char letter) {
+ // Combining marks are a subset of non-spacing-mark.
+ if (!Character.isLetter(letter) && Character.getType(letter) != Character.NON_SPACING_MARK) {
+ return false;
+ }
+ UnicodeBlock block = UnicodeBlock.of(letter);
+ return block.equals(UnicodeBlock.BASIC_LATIN) ||
+ block.equals(UnicodeBlock.LATIN_1_SUPPLEMENT) ||
+ block.equals(UnicodeBlock.LATIN_EXTENDED_A) ||
+ block.equals(UnicodeBlock.LATIN_EXTENDED_ADDITIONAL) ||
+ block.equals(UnicodeBlock.LATIN_EXTENDED_B) ||
+ block.equals(UnicodeBlock.COMBINING_DIACRITICAL_MARKS);
+ }
+
+ /**
* Attempts to extract a match from a {@code candidate} character sequence.
*
* @param candidate the candidate text that might contain a phone number
return null;
}
+ // If leniency is set to VALID only, we also want to skip numbers that are surrounded by Latin
+ // alphabetic characters, to skip cases like abc8005001234 or 8005001234def.
+ if (leniency == Leniency.VALID) {
+ // If the candidate is not at the start of the text, and does not start with punctuation and
+ // the previous character is not a Latin letter, return null.
+ if (offset > 0 &&
+ (!LEAD_CLASS.matcher(candidate).lookingAt() && isLatinLetter(text.charAt(offset - 1)))) {
+ return null;
+ }
+ int lastCharIndex = offset + candidate.length();
+ if (lastCharIndex < text.length() && isLatinLetter(text.charAt(lastCharIndex))) {
+ return null;
+ }
+ }
+
// Try to come up with a valid match given the entire candidate.
String rawString = candidate.toString();
PhoneNumberMatch match = parseAndVerify(rawString, offset);
*/
private PhoneNumberMatch extractInnerMatch(String candidate, int offset) {
// Try removing either the first or last "group" in the number and see if this gives a result.
- // We consider white space to be a possible indications of the start or end of the phone number.
+ // We consider white space to be a possible indication of the start or end of the phone number.
Matcher groupMatcher = GROUP_SEPARATOR.matcher(candidate);
if (groupMatcher.find()) {
if (!MATCHING_BRACKETS.matcher(candidate).matches()) {
return null;
}
- PhoneNumber number = util.parse(candidate, preferredRegion);
- if (leniency.verify(number, util)) {
+ PhoneNumber number = phoneUtil.parse(candidate, preferredRegion);
+ if (leniency.verify(number, phoneUtil)) {
return new PhoneNumberMatch(offset, candidate, number);
}
} catch (NumberParseException e) {
// Region-code for the unknown region.
private static final String UNKNOWN_REGION = "ZZ";
- // The set of regions that share country code 1.
+ // The set of regions that share country calling code 1.
// There are roughly 26 regions and we set the initial capacity of the HashSet to 35 to offer a
// load factor of roughly 0.75.
private final Set<String> nanpaRegions = new HashSet<String>(35);
private static final String RFC3966_EXTN_PREFIX = ";ext=";
- // These mappings map a character (key) to a specific digit that should replace it for
- // normalization purposes. Non-European digits that may be used in phone numbers are mapped to a
- // European equivalent.
- static final Map<Character, Character> DIGIT_MAPPINGS;
-
// Only upper-case variants of alpha characters are stored.
private static final Map<Character, Character> ALPHA_MAPPINGS;
// For performance reasons, amalgamate both into one map.
- private static final Map<Character, Character> ALL_NORMALIZATION_MAPPINGS;
+ private static final Map<Character, Character> ALPHA_PHONE_MAPPINGS;
// Separate map of all symbols that we wish to retain when formatting alpha numbers. This
// includes digits, ASCII letters and number grouping symbols such as "-" and " ".
private static final Map<Character, Character> ALL_PLUS_NUMBER_GROUPING_SYMBOLS;
static {
- // Simple ASCII digits map used to populate DIGIT_MAPPINGS and
+ // Simple ASCII digits map used to populate ALPHA_PHONE_MAPPINGS and
// ALL_PLUS_NUMBER_GROUPING_SYMBOLS.
HashMap<Character, Character> asciiDigitMappings = new HashMap<Character, Character>();
asciiDigitMappings.put('0', '0');
asciiDigitMappings.put('8', '8');
asciiDigitMappings.put('9', '9');
- HashMap<Character, Character> digitMap = new HashMap<Character, Character>(50);
- digitMap.putAll(asciiDigitMappings);
- digitMap.put('\uFF10', '0'); // Fullwidth digit 0
- digitMap.put('\u0660', '0'); // Arabic-indic digit 0
- digitMap.put('\u06F0', '0'); // Eastern-Arabic digit 0
- digitMap.put('\uFF11', '1'); // Fullwidth digit 1
- digitMap.put('\u0661', '1'); // Arabic-indic digit 1
- digitMap.put('\u06F1', '1'); // Eastern-Arabic digit 1
- digitMap.put('\uFF12', '2'); // Fullwidth digit 2
- digitMap.put('\u0662', '2'); // Arabic-indic digit 2
- digitMap.put('\u06F2', '2'); // Eastern-Arabic digit 2
- digitMap.put('\uFF13', '3'); // Fullwidth digit 3
- digitMap.put('\u0663', '3'); // Arabic-indic digit 3
- digitMap.put('\u06F3', '3'); // Eastern-Arabic digit 3
- digitMap.put('\uFF14', '4'); // Fullwidth digit 4
- digitMap.put('\u0664', '4'); // Arabic-indic digit 4
- digitMap.put('\u06F4', '4'); // Eastern-Arabic digit 4
- digitMap.put('\uFF15', '5'); // Fullwidth digit 5
- digitMap.put('\u0665', '5'); // Arabic-indic digit 5
- digitMap.put('\u06F5', '5'); // Eastern-Arabic digit 5
- digitMap.put('\uFF16', '6'); // Fullwidth digit 6
- digitMap.put('\u0666', '6'); // Arabic-indic digit 6
- digitMap.put('\u06F6', '6'); // Eastern-Arabic digit 6
- digitMap.put('\uFF17', '7'); // Fullwidth digit 7
- digitMap.put('\u0667', '7'); // Arabic-indic digit 7
- digitMap.put('\u06F7', '7'); // Eastern-Arabic digit 7
- digitMap.put('\uFF18', '8'); // Fullwidth digit 8
- digitMap.put('\u0668', '8'); // Arabic-indic digit 8
- digitMap.put('\u06F8', '8'); // Eastern-Arabic digit 8
- digitMap.put('\uFF19', '9'); // Fullwidth digit 9
- digitMap.put('\u0669', '9'); // Arabic-indic digit 9
- digitMap.put('\u06F9', '9'); // Eastern-Arabic digit 9
- DIGIT_MAPPINGS = Collections.unmodifiableMap(digitMap);
-
HashMap<Character, Character> alphaMap = new HashMap<Character, Character>(40);
alphaMap.put('A', '2');
alphaMap.put('B', '2');
ALPHA_MAPPINGS = Collections.unmodifiableMap(alphaMap);
HashMap<Character, Character> combinedMap = new HashMap<Character, Character>(100);
- combinedMap.putAll(alphaMap);
- combinedMap.putAll(digitMap);
- ALL_NORMALIZATION_MAPPINGS = Collections.unmodifiableMap(combinedMap);
+ combinedMap.putAll(ALPHA_MAPPINGS);
+ combinedMap.putAll(asciiDigitMappings);
+ ALPHA_PHONE_MAPPINGS = Collections.unmodifiableMap(combinedMap);
HashMap<Character, Character> allPlusNumberGroupings = new HashMap<Character, Character>();
// Put (lower letter -> upper letter) and (upper letter -> upper letter) mappings.
// found as a leading character only.
// This consists of dash characters, white space characters, full stops, slashes,
// square brackets, parentheses and tildes. It also includes the letter 'x' as that is found as a
- // placeholder for carrier information in some phone numbers.
+ // placeholder for carrier information in some phone numbers. Full-width variants are also
+ // present.
static final String VALID_PUNCTUATION = "-x\u2010-\u2015\u2212\u30FC\uFF0D-\uFF0F " +
"\u00A0\u200B\u2060\u3000()\uFF08\uFF09\uFF3B\uFF3D.\\[\\]/~\u2053\u223C\uFF5E";
- // Digits accepted in phone numbers that we understand.
- private static final String VALID_DIGITS =
- Arrays.toString(DIGIT_MAPPINGS.keySet().toArray()).replaceAll("[, \\[\\]]", "");
+ private static final String DIGITS = "\\p{Nd}";
// We accept alpha characters in phone numbers, ASCII only, upper and lower case.
private static final String VALID_ALPHA =
Arrays.toString(ALPHA_MAPPINGS.keySet().toArray()).replaceAll("[, \\[\\]]", "") +
static final String PLUS_CHARS = "+\uFF0B";
private static final Pattern PLUS_CHARS_PATTERN = Pattern.compile("[" + PLUS_CHARS + "]+");
private static final Pattern SEPARATOR_PATTERN = Pattern.compile("[" + VALID_PUNCTUATION + "]+");
- private static final Pattern CAPTURING_DIGIT_PATTERN =
- Pattern.compile("([" + VALID_DIGITS + "])");
+ private static final Pattern CAPTURING_DIGIT_PATTERN = Pattern.compile("(" + DIGITS + ")");
// Regular expression of acceptable characters that may start a phone number for the purposes of
// parsing. This allows us to strip away meaningless prefixes to phone numbers that may be
// does not contain alpha characters, although they may be used later in the number. It also does
// not include other punctuation, as this will be stripped later during parsing and is of no
// information value when parsing a number.
- private static final String VALID_START_CHAR = "[" + PLUS_CHARS + VALID_DIGITS + "]";
+ private static final String VALID_START_CHAR = "[" + PLUS_CHARS + DIGITS + "]";
static final Pattern VALID_START_CHAR_PATTERN = Pattern.compile(VALID_START_CHAR);
// Regular expression of characters typically used to start a second phone number for the purposes
// plus_sign*([punctuation]*[digits]){3,}([punctuation]|[digits]|[alpha])*
// Note VALID_PUNCTUATION starts with a -, so must be the first in the range.
private static final String VALID_PHONE_NUMBER =
- "[" + PLUS_CHARS + "]*(?:[" + VALID_PUNCTUATION + "]*[" + VALID_DIGITS + "]){3,}[" +
- VALID_PUNCTUATION + VALID_ALPHA + VALID_DIGITS + "]*";
+ "[" + PLUS_CHARS + "]*(?:[" + VALID_PUNCTUATION + "]*" + DIGITS + "){3,}[" +
+ VALID_PUNCTUATION + VALID_ALPHA + DIGITS + "]*";
// Default extension prefix to use when formatting. This will be put in front of any extension
// component of the number, after the main national number is formatted. For example, if you wish
// Canonical-equivalence doesn't seem to be an option with Android java, so we allow two options
// for representing the accented o - the character itself, and one in the unicode decomposed form
// with the combining acute accent.
- private static final String CAPTURING_EXTN_DIGITS = "([" + VALID_DIGITS + "]{1,7})";
+ private static final String CAPTURING_EXTN_DIGITS = "(" + DIGITS + "{1,7})";
static final String KNOWN_EXTN_PATTERNS =
RFC3966_EXTN_PREFIX + CAPTURING_EXTN_DIGITS + "|" +
"[ \u00A0\\t,]*(?:ext(?:ensi(?:o\u0301?|\u00F3))?n?|" +
"\uFF45\uFF58\uFF54\uFF4E?|[,x\uFF58#\uFF03~\uFF5E]|int|anexo|\uFF49\uFF4E\uFF54)" +
"[:\\.\uFF0E]?[ \u00A0\\t,-]*" + CAPTURING_EXTN_DIGITS + "#?|" +
- "[- ]+([" + VALID_DIGITS + "]{1,5})#";
+ "[- ]+(" + DIGITS + "{1,5})#";
// Regexp of all known extension prefixes used by different regions followed by 1 or more valid
// digits, for use when parsing.
/**
* INTERNATIONAL and NATIONAL formats are consistent with the definition in ITU-T Recommendation
- * E. 123. For example, the number of the Google Zurich office will be written as
+ * E123. For example, the number of the Google Switzerland office will be written as
* "+41 44 668 1800" in INTERNATIONAL format, and as "044 668 1800" in NATIONAL format.
* E164 format is as per INTERNATIONAL format but with no formatting applied, e.g. +41446681800.
* RFC3966 is as per INTERNATIONAL format, but with all spaces and other separating symbols
/**
* Normalizes a string of characters representing a phone number. This performs the following
* conversions:
- * Wide-ascii digits are converted to normal ASCII (European) digits.
+ * Punctuation is stripped.
+ * For ALPHA/VANITY numbers:
* Letters are converted to their numeric representation on a telephone keypad. The keypad
* used here is the one defined in ITU Recommendation E.161. This is only done if there are
- * 3 or more letters in the number, to lessen the risk that such letters are typos -
- * otherwise alpha characters are stripped.
- * Punctuation is stripped.
+ * 3 or more letters in the number, to lessen the risk that such letters are typos.
+ * For other numbers:
+ * Wide-ascii digits are converted to normal ASCII (European) digits.
* Arabic-Indic numerals are converted to European numerals.
+ * Spurious alpha characters are stripped.
*
* @param number a string of characters representing a phone number
* @return the normalized string version of the phone number
static String normalize(String number) {
Matcher m = VALID_ALPHA_PHONE_PATTERN.matcher(number);
if (m.matches()) {
- return normalizeHelper(number, ALL_NORMALIZATION_MAPPINGS, true);
+ return normalizeHelper(number, ALPHA_PHONE_MAPPINGS, true);
} else {
- return normalizeHelper(number, DIGIT_MAPPINGS, true);
+ return normalizeDigitsOnly(number);
}
}
* @return the normalized string version of the phone number
*/
public static String normalizeDigitsOnly(String number) {
- return normalizeHelper(number, DIGIT_MAPPINGS, true);
+ int numberLength = number.length();
+ StringBuilder normalizedDigits = new StringBuilder(numberLength);
+ for (int i = 0; i < numberLength; i++) {
+ int d = Character.digit(number.charAt(i), 10);
+ if (d != -1) {
+ normalizedDigits.append(d);
+ }
+ }
+ return normalizedDigits.toString();
}
/**
* Converts all alpha characters in a number to their respective digits on a keypad, but retains
- * existing formatting. This Java implementation of this function also converts wide-ascii digits
- * to normal ascii digits, and converts Arabic-Indic numerals to European numerals.
+ * existing formatting.
*/
public static String convertAlphaCharactersInNumber(String number) {
- return normalizeHelper(number, ALL_NORMALIZATION_MAPPINGS, false);
+ return normalizeHelper(number, ALPHA_PHONE_MAPPINGS, false);
}
/**
* </pre>
*
* N.B.: area code is a very ambiguous concept, so the I18N team generally recommends against
- * using it for most purposes. Read the following carefully before deciding to use this method:
- *
- * - geographical area codes change over time, and this method honors those changes; therefore,
- * it doesn't guarantee the stability of the result it produces.
- * - subscriber numbers may not be diallable from all devices (notably mobile devices, which
- * typically requires the full national_number to be dialled in most countries).
- * - most non-geographical numbers have no area codes.
- * - some geographical numbers have no area codes.
- *
+ * using it for most purposes, but recommends using the more general {@code national_number}
+ * instead. Read the following carefully before deciding to use this method:
+ * <ul>
+ * <li> geographical area codes change over time, and this method honors those changes;
+ * therefore, it doesn't guarantee the stability of the result it produces.
+ * <li> subscriber numbers may not be diallable from all devices (notably mobile devices, which
+ * typically requires the full national_number to be dialled in most regions).
+ * <li> most non-geographical numbers have no area codes.
+ * <li> some geographical numbers have no area codes.
+ * </ul>
* @param number the PhoneNumber object for which clients want to know the length of the area
* code.
* @return the length of area code of the PhoneNumber object passed in.
* </pre>
*
* Refer to the unittests to see the difference between this function and
- * {@link #getLengthOfGeographicalAreaCode()}.
+ * {@link #getLengthOfGeographicalAreaCode}.
*
* @param number the PhoneNumber object for which clients want to know the length of the NDC.
* @return the length of NDC of the PhoneNumber object passed in.
* {@code carrierCode}. The {@code carrierCode} will always be used regardless of whether the
* phone number already has a preferred domestic carrier code stored. If {@code carrierCode}
* contains an empty string, returns the number in national format without any carrier code.
- *
+ *
* @param number the phone number to be formatted
* @param carrierCode the carrier selection code to be used
* @return the formatted phone number in national format for dialing using the carrier as
/**
* Formats a phone number for out-of-country dialing purposes. If no regionCallingFrom is
* supplied, we format the number in its INTERNATIONAL format. If the country calling code is the
- * same as the region where the number is from, then NATIONAL formatting will be applied.
+ * same as that of the region where the number is from, then NATIONAL formatting will be applied.
*
* <p>If the number itself has a country calling code of zero or an otherwise invalid country
* calling code, then we return the number with no formatting applied.
// cannot begin with 0.
Matcher digitMatcher = CAPTURING_DIGIT_PATTERN.matcher(number.substring(matchEnd));
if (digitMatcher.find()) {
- String normalizedGroup = normalizeHelper(digitMatcher.group(1), DIGIT_MAPPINGS, true);
+ String normalizedGroup = normalizeDigitsOnly(digitMatcher.group(1));
if (normalizedGroup.equals("0")) {
return false;
}
assertEquals("650253", formatter.inputDigit('3'));
}
+ public void testTooLongNumberMatchingMultipleLeadingDigits() {
+ // See http://code.google.com/p/libphonenumber/issues/detail?id=36
+ // The bug occurred last time for countries which have two formatting rules with exactly the
+ // same leading digits pattern but differ in length.
+ AsYouTypeFormatter formatter = phoneUtil.getAsYouTypeFormatter("ZZ");
+ assertEquals("+", formatter.inputDigit('+'));
+ assertEquals("+8", formatter.inputDigit('8'));
+ assertEquals("+81 ", formatter.inputDigit('1'));
+ assertEquals("+81 9", formatter.inputDigit('9'));
+ assertEquals("+81 90", formatter.inputDigit('0'));
+ assertEquals("+81 90 1", formatter.inputDigit('1'));
+ assertEquals("+81 90 12", formatter.inputDigit('2'));
+ assertEquals("+81 90 123", formatter.inputDigit('3'));
+ assertEquals("+81 90 1234", formatter.inputDigit('4'));
+ assertEquals("+81 90 1234 5", formatter.inputDigit('5'));
+ assertEquals("+81 90 1234 56", formatter.inputDigit('6'));
+ assertEquals("+81 90 1234 567", formatter.inputDigit('7'));
+ assertEquals("+81 90 1234 5678", formatter.inputDigit('8'));
+ assertEquals("+81 90 12 345 6789", formatter.inputDigit('9'));
+ assertEquals("+81901234567890", formatter.inputDigit('0'));
+ }
+
public void testAYTFUS() {
AsYouTypeFormatter formatter = phoneUtil.getAsYouTypeFormatter("US");
assertEquals("6", formatter.inputDigit('6'));
assertEquals(number, matchWithSpaces.rawString());
}
+ public void testIsLatinLetter() throws Exception {
+ assertTrue(PhoneNumberMatcher.isLatinLetter('c'));
+ assertTrue(PhoneNumberMatcher.isLatinLetter('C'));
+ assertTrue(PhoneNumberMatcher.isLatinLetter('\u00C9'));
+ assertTrue(PhoneNumberMatcher.isLatinLetter('\u0301')); // Combining acute accent
+ // Punctuation, digits and white-space are not considered "latin letters".
+ assertFalse(PhoneNumberMatcher.isLatinLetter(':'));
+ assertFalse(PhoneNumberMatcher.isLatinLetter('5'));
+ assertFalse(PhoneNumberMatcher.isLatinLetter('-'));
+ assertFalse(PhoneNumberMatcher.isLatinLetter('.'));
+ assertFalse(PhoneNumberMatcher.isLatinLetter(' '));
+ assertFalse(PhoneNumberMatcher.isLatinLetter('\u6211')); // Chinese character
+ }
+
+ public void testMatchesWithSurroundingLatinChars() throws Exception {
+ ArrayList<NumberContext> contextPairs = new ArrayList<NumberContext>(5);
+ contextPairs.add(new NumberContext("abc", "def"));
+ contextPairs.add(new NumberContext("abc", ""));
+ contextPairs.add(new NumberContext("", "def"));
+ // Latin small letter e with an acute accent.
+ contextPairs.add(new NumberContext("\u00C9", ""));
+ // Same character decomposed (with combining mark).
+ contextPairs.add(new NumberContext("e\u0301", ""));
+
+ // Numbers should not be considered valid, if they are surrounded by Latin characters, but
+ // should be considered possible.
+ findMatchesInContexts(contextPairs, false, true);
+ }
+
+ public void testMatchesWithSurroundingLatinCharsAndLeadingPunctuation() throws Exception {
+ // Contexts with trailing characters. Leading characters are okay here since the numbers we will
+ // insert start with punctuation, but trailing characters are still not allowed.
+ ArrayList<NumberContext> possibleOnlyContexts = new ArrayList<NumberContext>(3);
+ possibleOnlyContexts.add(new NumberContext("abc", "def"));
+ possibleOnlyContexts.add(new NumberContext("", "def"));
+ possibleOnlyContexts.add(new NumberContext("", "\u00C9"));
+
+ // Numbers should not be considered valid, if they have trailing Latin characters, but should be
+ // considered possible.
+ String numberWithPlus = "+14156667777";
+ String numberWithBrackets = "(415)6667777";
+ findMatchesInContexts(possibleOnlyContexts, false, true, "US", numberWithPlus);
+ findMatchesInContexts(possibleOnlyContexts, false, true, "US", numberWithBrackets);
+
+ ArrayList<NumberContext> validContexts = new ArrayList<NumberContext>(4);
+ validContexts.add(new NumberContext("abc", ""));
+ validContexts.add(new NumberContext("\u00C9", ""));
+ validContexts.add(new NumberContext("\u00C9", ".")); // Trailing punctuation.
+ validContexts.add(new NumberContext("\u00C9", " def")); // Trailing white-space.
+
+ // Numbers should be considered valid, since they start with punctuation.
+ findMatchesInContexts(validContexts, true, true, "US", numberWithPlus);
+ findMatchesInContexts(validContexts, true, true, "US", numberWithBrackets);
+ }
+
+ public void testMatchesWithSurroundingChineseChars() throws Exception {
+ ArrayList<NumberContext> validContexts = new ArrayList<NumberContext>(3);
+ validContexts.add(new NumberContext("\u6211\u7684\u7535\u8BDD\u53F7\u7801\u662F", ""));
+ validContexts.add(new NumberContext("", "\u662F\u6211\u7684\u7535\u8BDD\u53F7\u7801"));
+ validContexts.add(new NumberContext("\u8BF7\u62E8\u6253", "\u6211\u5728\u660E\u5929"));
+
+ // Numbers should be considered valid, since they are surrounded by Chinese.
+ findMatchesInContexts(validContexts, true, true);
+ }
+
+ public void testMatchesWithSurroundingPunctuation() throws Exception {
+ ArrayList<NumberContext> validContexts = new ArrayList<NumberContext>(4);
+ validContexts.add(new NumberContext("My number-", "")); // At end of text.
+ validContexts.add(new NumberContext("", ".Nice day.")); // At start of text.
+ validContexts.add(new NumberContext("Tel:", ".")); // Punctuation surrounds number.
+ validContexts.add(new NumberContext("Tel: ", " on Saturdays.")); // White-space is also fine.
+
+ // Numbers should be considered valid, since they are surrounded by punctuation.
+ findMatchesInContexts(validContexts, true, true);
+ }
+
+ /**
+ * Helper method which tests the contexts provided and ensures that:
+ * -- if isValid is true, they all find a test number inserted in the middle when leniency of
+ * matching is set to VALID; else no test number should be extracted at that leniency level
+ * -- if isPossible is true, they all find a test number inserted in the middle when leniency of
+ * matching is set to POSSIBLE; else no test number should be extracted at that leniency level
+ */
+ private void findMatchesInContexts(List<NumberContext> contexts, boolean isValid,
+ boolean isPossible, String region, String number) {
+ if (isValid) {
+ doTestInContext(number, region, contexts, Leniency.VALID);
+ } else {
+ for (NumberContext context : contexts) {
+ String text = context.leadingText + number + context.trailingText;
+ assertTrue("Should not have found a number in " + text,
+ hasNoMatches(phoneUtil.findNumbers(text, region)));
+ }
+ }
+ if (isPossible) {
+ doTestInContext(number, region, contexts, Leniency.POSSIBLE);
+ } else {
+ for (NumberContext context : contexts) {
+ String text = context.leadingText + number + context.trailingText;
+ assertTrue("Should not have found a number in " + text,
+ hasNoMatches(phoneUtil.findNumbers(text, region, Leniency.POSSIBLE,
+ Long.MAX_VALUE)));
+ }
+ }
+ }
+
+ /**
+ * Variant of findMatchesInContexts that uses a default number and region.
+ */
+ private void findMatchesInContexts(List<NumberContext> contexts, boolean isValid,
+ boolean isPossible) {
+ String region = "US";
+ String number = "415-666-7777";
+
+ findMatchesInContexts(contexts, isValid, isPossible, region, number);
+ }
+
public void testNonMatchingBracketsAreInvalid() throws Exception {
// The digits up to the ", " form a valid US number, but it shouldn't be matched as one since
// there was a non-matching bracket present.
}
}
+ /**
+ * Tests valid numbers in contexts that should pass for {@link Leniency#POSSIBLE}.
+ */
private void findPossibleInContext(String number, String defaultCountry) {
ArrayList<NumberContext> contextPairs = new ArrayList<NumberContext>(15);
contextPairs.add(new NumberContext("", "")); // no context
}
/**
- * Tests valid numbers in contexts that fail for {@link Leniency#POSSIBLE}.
+ * Tests valid numbers in contexts that fail for {@link Leniency#POSSIBLE} but are valid for
+ * {@link Leniency#VALID}.
*/
private void findValidInContext(String number, String defaultCountry) {
ArrayList<NumberContext> contextPairs = new ArrayList<NumberContext>(5);
assertTrue(metadata.hasNationalPrefix());
assertEquals(2, metadata.numberFormatSize());
assertEquals("(\\d{3})(\\d{3})(\\d{4})",
- metadata.getNumberFormat(0).getPattern());
- assertEquals("$1 $2 $3", metadata.getNumberFormat(0).getFormat());
+ metadata.getNumberFormat(1).getPattern());
+ assertEquals("$1 $2 $3", metadata.getNumberFormat(1).getFormat());
assertEquals("[13-9]\\d{9}|2[0-35-9]\\d{8}",
metadata.getGeneralDesc().getNationalNumberPattern());
assertEquals("\\d{7}(?:\\d{3})?", metadata.getGeneralDesc().getPossibleNumberPattern());
PhoneNumberUtil.PhoneNumberType.MOBILE));
}
+ public void testConvertAlphaCharactersInNumber() {
+ String input = "1800-ABC-DEF";
+ // Alpha chars are converted to digits; everything else is left untouched.
+ String expectedOutput = "1800-222-333";
+ assertEquals(expectedOutput, PhoneNumberUtil.convertAlphaCharactersInNumber(input));
+ }
+
public void testNormaliseRemovePunctuation() {
String inputNumber = "034-56&+#234";
String expectedOutput = "03456234";
// Alpha numbers.
assertTrue(PhoneNumberUtil.isViablePhoneNumber("0800-4-pizza"));
assertTrue(PhoneNumberUtil.isViablePhoneNumber("0800-4-PIZZA"));
+ }
+
+ public void testIsViablePhoneNumberNonAscii() {
// Only one or two digits before possible punctuation followed by more digits.
assertTrue(PhoneNumberUtil.isViablePhoneNumber("1\u300034"));
assertFalse(PhoneNumberUtil.isViablePhoneNumber("1\u30003+4"));
assertEquals(US_NUMBER, phoneUtil.parse("0~01-650-253-0000", RegionCode.PL));
// Using "++" at the start.
assertEquals(US_NUMBER, phoneUtil.parse("++1 (650) 253-0000", RegionCode.PL));
+ }
+
+ public void testParseNonAscii() throws Exception {
// Using a full-width plus sign.
assertEquals(US_NUMBER, phoneUtil.parse("\uFF0B1 (650) 253-0000", RegionCode.SG));
// The whole number, including punctuation, is here represented in full-width form.
"\u3000\uFF12\uFF15\uFF13\u30FC\uFF10\uFF10\uFF10" +
"\uFF10",
RegionCode.SG));
+
+ // Using a very strange decimal digit range (Mongolian digits).
+ assertEquals(US_NUMBER, phoneUtil.parse("\u1811 \u1816\u1815\u1810 " +
+ "\u1812\u1815\u1813 \u1810\u1810\u1810\u1810",
+ RegionCode.US));
}
public void testParseWithLeadingZero() throws Exception {
<territory id="BG" countryCode="359" internationalPrefix="00"
nationalPrefix="0" nationalPrefixFormattingRule="$NP$FG">
<availableFormats>
+ <numberFormat pattern="(2)(\d{5})">
+ <leadingDigits>29</leadingDigits>
+ <format>$1/$2</format>
+ </numberFormat>
<numberFormat pattern="(2)(\d{3})(\d{3,4})">
<leadingDigits>2</leadingDigits>
<format>$1/$2 $3</format>
</numberFormat>
</availableFormats>
<generalDesc>
- <nationalNumberPattern>[2-9]\d{6,8}</nationalNumberPattern>
- <possibleNumberPattern>\d{7,9}</possibleNumberPattern>
+ <nationalNumberPattern>
+ [23567]\d{5,7}|
+ [489]\d{6,8}
+ </nationalNumberPattern>
+ <possibleNumberPattern>\d{5,9}</possibleNumberPattern>
</generalDesc>
<fixedLine>
+ <!-- 29xxxx numbers have been added because they can be found online, and are typically
+ used by taxi companies. -->
<nationalNumberPattern>
+ 2(?:
+ [0-8]\d{5,6}|
+ 9\d{4,6}
+ )|
(?:
- 2\d|
[36]\d|
5[1-9]|
8[1-6]|
)
)\d{4,5}
</nationalNumberPattern>
- <possibleNumberPattern>\d{7,8}</possibleNumberPattern>
+ <possibleNumberPattern>\d{5,8}</possibleNumberPattern>
<exampleNumber>2123456</exampleNumber>
</fixedLine>
<mobile>
<generalDesc>
<nationalNumberPattern>
1\d{4,9}|
- [2-689]\d{7,9}
+ [2456]\d{8}|
+ 3\d{7}|
+ [89]\d{8,9}
</nationalNumberPattern>
<possibleNumberPattern>\d{5,10}</possibleNumberPattern>
</generalDesc>
<exampleNumber>810123456</exampleNumber>
</fixedLine>
<mobile>
- <nationalNumberPattern>6\d{8}</nationalNumberPattern>
+ <nationalNumberPattern>
+ (?:
+ 6\d|
+ 7[1-4]
+ )\d{7}
+ </nationalNumberPattern>
<exampleNumber>612345678</exampleNumber>
</mobile>
<tollFree>
<nationalNumberPattern>
(?:
2[034678]|
- 54
+ 5[47]
)\d{7}
</nationalNumberPattern>
<possibleNumberPattern>\d{9}</possibleNumberPattern>
<mobile>
<nationalNumberPattern>
7(?:
- 0[01257]\d{2}|
- 6[02-4]\d{2}|
- 7[157]\d{2}
- )\d{5}
+ 0[01257]|
+ 6[02-4]|
+ 7[157]
+ )\d{7}
</nationalNumberPattern>
<exampleNumber>7710009998</exampleNumber>
</mobile>
<nationalNumberPattern>
(?:
[27]\d{3}|
- 3[0-49]\d{2}|
+ 3[0-59]\d{2}|
411[3-6]
)\d{2}
</nationalNumberPattern>
<availableFormats>
<numberFormat pattern="(\d{3})(\d{3})">
<leadingDigits>
- [3578]|
+ [35789]|
2[1-4689]|
- 6(?:
- [0-35-9]|
- 4[0-689]
- )
+ 6[0-35-9]
</leadingDigits>
<format>$1 $2</format>
</numberFormat>
<numberFormat pattern="(\d)(\d{3})(\d{3})">
<leadingDigits>
2[57]|
- 4[2-46]|
- 647
+ 4|
+ 64
</leadingDigits>
<format>$1 $2 $3</format>
</numberFormat>
</availableFormats>
<generalDesc>
- <nationalNumberPattern>[2-8]\d{5,6}</nationalNumberPattern>
+ <nationalNumberPattern>[2-9]\d{5,6}</nationalNumberPattern>
<possibleNumberPattern>\d{6,7}</possibleNumberPattern>
</generalDesc>
<fixedLine>
phone numbers. -->
<!-- Old numbers and new numbers (with 4 at front) running in parallel from 4th May
2011 until 4 August 2011. Note the 55 and 78 numbers will have a 2 inserted at
- front instead of a 4. -->
+ front instead of a 4.
+ Since the 4 is optional, the new group 44\d{5} that have been assigned have been
+ separated out in the reg-exp for now to prevent 4\d{5} being considered valid. -->
<nationalNumberPattern>
(?:
2?(?:
)|
4?(?:
2(?:
- 1[78]|
- 2[14-69]|
- 3[2-4]|
- 4[1-36-8]|
- 6[167]|
- [89]\d
+ 0[589]|
+ 1[03-9]|
+ [2-9]\d
)|
- 3(?:
- 0[34]|
- 2[1-6]|
- 4[4-6]|
- 55|
- 6[016]|
- 7\d|
- 8[0-589]|
- 9[0-5]
- )|
- 6(?:
- 0[0-256]|
- 1[0-478]|
- 2[145]|
- 3[02-4]|
- 4[124]|
- 6[015]|
- 7\d|
- 8[1-3]
- )
- )
+ [36]\d{2}
+ )|
+ 44\d{2}
)\d{3}
</nationalNumberPattern>
<possibleNumberPattern>\d{6,7}</possibleNumberPattern>
<possibleNumberPattern>\d{6}</possibleNumberPattern>
<exampleNumber>800000</exampleNumber>
</tollFree>
+ <premiumRate>
+ <!-- Using this to represent Value Added Service in the plan. -->
+ <nationalNumberPattern>98\d{4}</nationalNumberPattern>
+ <possibleNumberPattern>\d{6}</possibleNumberPattern>
+ <exampleNumber>981234</exampleNumber>
+ </premiumRate>
<voip>
- <nationalNumberPattern>
- (?:
- 44[1-3]|
- 647
- )\d{4}
- </nationalNumberPattern>
+ <nationalNumberPattern>64\d{5}</nationalNumberPattern>
<possibleNumberPattern>\d{7}</possibleNumberPattern>
- <exampleNumber>4410123</exampleNumber>
+ <exampleNumber>6412345</exampleNumber>
</voip>
+ <shortCode>
+ <nationalNumberPattern>
+ 1(?:
+ 0\d|
+ 1[027]|
+ 2[0-8]|
+ 3[13]|
+ 4[0-2]|
+ [59][15]|
+ 6[1-9]|
+ 7[124-6]|
+ 8[158]
+ )|
+ 96\d{2}
+ </nationalNumberPattern>
+ <possibleNumberPattern>\d{3,4}</possibleNumberPattern>
+ <exampleNumber>100</exampleNumber>
+ </shortCode>
</territory>
<!-- Sudan -->
)|
2[1-35]|
3(?:
- 1\d|
- [34]
+ [13]\d|
+ 4
)|
4[13]|
5[1-3]
<territory id="US" countryCode="1" internationalPrefix="011"
mainCountryForCode="true" nationalPrefix="1">
<availableFormats>
+ <numberFormat pattern="(\d{3})(\d{4})">
+ <format>$1-$2</format>
+ <intlFormat>NA</intlFormat>
+ </numberFormat>
<numberFormat pattern="(\d{3})(\d{3})(\d{4})">
<format>($1) $2-$3</format>
<!-- A different pattern is used when formatting internationally, as the area code is no
longer optional and should not be in brackets. -->
<intlFormat>$1-$2-$3</intlFormat>
</numberFormat>
- <numberFormat pattern="(\d{3})(\d{4})">
- <format>$1-$2</format>
- <intlFormat>NA</intlFormat>
- </numberFormat>
</availableFormats>
<generalDesc>
<nationalNumberPattern>[2-9]\d{9}</nationalNumberPattern>
<!-- Should be formatted in one block, apart from the mobile numbers. -->
<availableFormats>
<numberFormat pattern="(\d{3})(\d{4})">
- <leadingDigits>[57]</leadingDigits>
+ <leadingDigits>[579]</leadingDigits>
<format>$1 $2</format>
</numberFormat>
</availableFormats>
<generalDesc>
- <nationalNumberPattern>[2-578]\d{4,6}</nationalNumberPattern>
+ <nationalNumberPattern>[2-57-9]\d{4,6}</nationalNumberPattern>
<possibleNumberPattern>\d{5,7}</possibleNumberPattern>
</generalDesc>
<fixedLine>
<possibleNumberPattern>\d{7}</possibleNumberPattern>
<exampleNumber>5912345</exampleNumber>
</mobile>
- <!-- Using this for non-geographical numbers. -->
+ <!-- Using this for non-geographical numbers, since they have not been clearly defined. -->
<uan>
- <nationalNumberPattern>30\d{3}</nationalNumberPattern>
- <possibleNumberPattern>\d{5}</possibleNumberPattern>
+ <nationalNumberPattern>
+ 30\d{3}|
+ 900\d{4}
+ </nationalNumberPattern>
+ <possibleNumberPattern>\d{5,7}</possibleNumberPattern>
<exampleNumber>30123</exampleNumber>
</uan>
</territory>
<leadingDigits>[57-9]0</leadingDigits>
<format>$1 $2 $3</format>
</numberFormat>
+ <numberFormat pattern="(\d{2})(\d{2})(\d{3})(\d{4})">
+ <leadingDigits>[57-9]0</leadingDigits>
+ <format>$1 $2 $3 $4</format>
+ </numberFormat>
<numberFormat pattern="(\d{2})(\d{3})(\d{4})">
<leadingDigits>222|333</leadingDigits>
<leadingDigits>(?:222|333)1</leadingDigits>
preferredExtnPrefix=" extn. " nationalPrefix="1"
mainCountryForCode="true" >
<availableFormats>
- <numberFormat pattern="(\d{3})(\d{3})(\d{4})">
- <format>$1 $2 $3</format>
- </numberFormat>
<numberFormat pattern="(\d{3})(\d{4})">
<format>$1 $2</format>
<intlFormat>NA</intlFormat>
</numberFormat>
+ <numberFormat pattern="(\d{3})(\d{3})(\d{4})">
+ <format>$1 $2 $3</format>
+ </numberFormat>
</availableFormats>
<generalDesc>
<nationalNumberPattern>[13-9]\d{9}|2[0-35-9]\d{8}</nationalNumberPattern>