JAVA: Metadata changes and bug fixes

author lararennie@google.com <lararennie@google.com@ee073f10-1060-11df-b6a4-87a95322a99c>

Fri, 10 Jun 2011 14:02:31 +0000 (14:02 +0000)

committer lararennie@google.com <lararennie@google.com@ee073f10-1060-11df-b6a4-87a95322a99c>

Fri, 10 Jun 2011 14:02:31 +0000 (14:02 +0000)
author lararennie@google.com <lararennie@google.com@ee073f10-1060-11df-b6a4-87a95322a99c>
Fri, 10 Jun 2011 14:02:31 +0000 (14:02 +0000)
committer lararennie@google.com <lararennie@google.com@ee073f10-1060-11df-b6a4-87a95322a99c>
Fri, 10 Jun 2011 14:02:31 +0000 (14:02 +0000)
diff --git a/java/release_notes.txt b/java/release_notes.txt

index 1963e56..7f80b91 100644 (file)
--- a/java/release_notes.txt
+++ b/java/release_notes.txt
@@ -1,3 +1,13 @@
+June 10th, 2011
+* Code changes:
+ - Fixes for PhoneNumberMatcher to be more restrictive in valid mode and not match numbers
+   surrounded by Latin characters. This ensures, for example, the string abc123456789acg will not be
+   marked as a phone numbers.
+ - Enable PhoneNumberUtil to handle all digits, rather than a subset
+ - Fix for AYTF issue36 and improvement for US AYTF behaviour.
+* Metadata changes:
+ - Updates: BG, EG, ES, GH, PF, SC, SY, VA
+
  May 24th, 2011
  * Code changes:
   - Phonenumber now implements Serializable.
diff --git a/java/src/com/google/i18n/phonenumbers/AsYouTypeFormatter.java b/java/src/com/google/i18n/phonenumbers/AsYouTypeFormatter.java

index 806ecc9..1728eee 100644 (file)
--- a/java/src/com/google/i18n/phonenumbers/AsYouTypeFormatter.java
+++ b/java/src/com/google/i18n/phonenumbers/AsYouTypeFormatter.java
@@ -124,7 +124,9 @@ public class AsYouTypeFormatter {
    private boolean maybeCreateNewTemplate() {
      // When there are multiple available formats, the formatter uses the first format where a
      // formatting template could be created.
-    for (NumberFormat numberFormat : possibleFormats) {
+    Iterator<NumberFormat> it = possibleFormats.iterator();
+    while (it.hasNext()) {
+      NumberFormat numberFormat = it.next();
        String pattern = numberFormat.getPattern();
        if (currentFormattingPattern.equals(pattern)) {
          return false;
@@ -132,6 +134,8 @@ public class AsYouTypeFormatter {
        if (createFormattingTemplate(numberFormat)) {
          currentFormattingPattern = pattern;
          return true;
+      } else {  // Remove the current number format from possibleFormats.
+        it.remove();
        }
      }
      ableToFormat = false;
@@ -189,7 +193,7 @@ public class AsYouTypeFormatter {
      numberPattern = STANDALONE_DIGIT_PATTERN.matcher(numberPattern).replaceAll("\\\\d");
      formattingTemplate.setLength(0);
      String tempTemplate = getFormattingTemplate(numberPattern, format.getFormat());
-    if (tempTemplate.length() > nationalNumber.length()) {
+    if (tempTemplate.length() > 0) {
        formattingTemplate.append(tempTemplate);
        return true;
      }
@@ -205,6 +209,11 @@ public class AsYouTypeFormatter {
      Matcher m = regexCache.getPatternForRegex(numberPattern).matcher(longestPhoneNumber);
      m.find();  // this will always succeed
      String aPhoneNumber = m.group();
+    // No formatting template can be created if the number of digits entered so far is longer than
+    // the maximum the current formatting rule can accommodate.
+    if (aPhoneNumber.length() < nationalNumber.length()) {
+      return "";
+    }
      // Formats the number according to numberFormat
      String template = aPhoneNumber.replaceAll(numberPattern, numberFormat);
      // Replaces each digit with character digitPlaceholder
@@ -471,17 +480,20 @@ public class AsYouTypeFormatter {
    // version, if nextChar is a digit in non-ASCII format. This method assumes its input is either a
    // digit or the plus sign.
    private char normalizeAndAccrueDigitsAndPlusSign(char nextChar, boolean rememberPosition) {
+    char normalizedChar;
      if (nextChar == PhoneNumberUtil.PLUS_SIGN) {
+      normalizedChar = nextChar;
        accruedInputWithoutFormatting.append(nextChar);
      } else {
-      nextChar = PhoneNumberUtil.DIGIT_MAPPINGS.get(nextChar);
-      accruedInputWithoutFormatting.append(nextChar);
-      nationalNumber.append(nextChar);
+      int radix = 10;
+      normalizedChar = Character.forDigit(Character.digit(nextChar, radix), radix);
+      accruedInputWithoutFormatting.append(normalizedChar);
+      nationalNumber.append(normalizedChar);
      }
      if (rememberPosition) {
        positionToRemember = accruedInputWithoutFormatting.length();
      }
-    return nextChar;
+    return normalizedChar;
    }
  
    private String inputDigitHelper(char nextChar) {
diff --git a/java/src/com/google/i18n/phonenumbers/PhoneNumberMatch.java b/java/src/com/google/i18n/phonenumbers/PhoneNumberMatch.java

index b00a0e1..e994959 100644 (file)
--- a/java/src/com/google/i18n/phonenumbers/PhoneNumberMatch.java
+++ b/java/src/com/google/i18n/phonenumbers/PhoneNumberMatch.java
@@ -57,7 +57,7 @@ public final class PhoneNumberMatch {
    /** The start index into the text. */
    private final int start;
    /** The raw substring matched. */
-  private final String match;
+  private final String rawString;
    /** The matched phone number. */
    private final PhoneNumber number;
  
@@ -65,18 +65,18 @@ public final class PhoneNumberMatch {
     * Creates a new match.
     *
     * @param start  the start index into the target text
-   * @param match  the matched substring of the target text
+   * @param rawString  the matched substring of the target text
     * @param number  the matched phone number
     */
-  PhoneNumberMatch(int start, String match, PhoneNumber number) {
+  PhoneNumberMatch(int start, String rawString, PhoneNumber number) {
      if (start < 0) {
        throw new IllegalArgumentException("Start index must be >= 0.");
      }
-    if (match == null || number == null) {
+    if (rawString == null || number == null) {
        throw new NullPointerException();
      }
      this.start = start;
-    this.match = match;
+    this.rawString = rawString;
      this.number = number;
    }
  
@@ -92,17 +92,17 @@ public final class PhoneNumberMatch {
  
    /** Returns the exclusive end index of the matched phone number within the searched text. */
    public int end() {
-    return start + match.length();
+    return start + rawString.length();
    }
  
    /** Returns the raw string matched as a phone number in the searched text. */
    public String rawString() {
-    return match;
+    return rawString;
    }
  
    @Override
    public int hashCode() {
-    return Arrays.hashCode(new Object[]{start, match, number});
+    return Arrays.hashCode(new Object[]{start, rawString, number});
    }
  
    @Override
@@ -114,11 +114,12 @@ public final class PhoneNumberMatch {
        return false;
      }
      PhoneNumberMatch other = (PhoneNumberMatch) obj;
-    return match.equals(other.match) && (start == other.start) && number.equals(other.number);
+    return rawString.equals(other.rawString) && (start == other.start) &&
+        number.equals(other.number);
    }
  
    @Override
    public String toString() {
-    return "PhoneNumberMatch [" + start() + "," + end() + ") " + match;
+    return "PhoneNumberMatch [" + start() + "," + end() + ") " + rawString;
    }
  }
diff --git a/java/src/com/google/i18n/phonenumbers/PhoneNumberMatcher.java b/java/src/com/google/i18n/phonenumbers/PhoneNumberMatcher.java

index 68ad3ad..c8a7651 100644 (file)
--- a/java/src/com/google/i18n/phonenumbers/PhoneNumberMatcher.java
+++ b/java/src/com/google/i18n/phonenumbers/PhoneNumberMatcher.java
@@ -19,6 +19,7 @@ package com.google.i18n.phonenumbers;
  import com.google.i18n.phonenumbers.PhoneNumberUtil.Leniency;
  import com.google.i18n.phonenumbers.Phonenumber.PhoneNumber;
  
+import java.lang.Character.UnicodeBlock;
  import java.util.Iterator;
  import java.util.NoSuchElementException;
  import java.util.regex.Matcher;
@@ -82,6 +83,11 @@ final class PhoneNumberMatcher implements Iterator<PhoneNumberMatch> {
     */
    private static final Pattern GROUP_SEPARATOR = Pattern.compile("\\p{Z}+");
  
+  /**
+   * Punctuation that may be at the start of a phone number - brackets and plus signs.
+   */
+  private static final Pattern LEAD_CLASS;
+
    static {
      /* Builds the MATCHING_BRACKETS and PATTERN regular expressions. The building blocks below exist
       * to make the pattern more easily understood. */
@@ -112,7 +118,7 @@ final class PhoneNumberMatcher implements Iterator<PhoneNumberMatch> {
       * country code. */
      int digitBlockLimit =
          PhoneNumberUtil.MAX_LENGTH_FOR_NSN + PhoneNumberUtil.MAX_LENGTH_COUNTRY_CODE;
-    /* Limit on the number of blocks separated by punctuation. Use digitBlockLimit since in some
+    /* Limit on the number of blocks separated by punctuation. Uses digitBlockLimit since some
       * formats use spaces to separate each digit. */
      String blockLimit = limit(0, digitBlockLimit);
  
@@ -120,8 +126,9 @@ final class PhoneNumberMatcher implements Iterator<PhoneNumberMatch> {
      String punctuation = "[" + PhoneNumberUtil.VALID_PUNCTUATION + "]" + punctuationLimit;
      /* A digits block without punctuation. */
      String digitSequence = "\\p{Nd}" + limit(1, digitBlockLimit);
-    /* Punctuation that may be at the start of a phone number - brackets and plus signs. */
+
      String leadClass = "[" + openingParens + PhoneNumberUtil.PLUS_CHARS + "]";
+    LEAD_CLASS = Pattern.compile(leadClass);
  
      /* Phone number pattern allowing optional punctuation. */
      PATTERN = Pattern.compile(
@@ -145,7 +152,7 @@ final class PhoneNumberMatcher implements Iterator<PhoneNumberMatch> {
    }
  
    /** The phone number utility. */
-  private final PhoneNumberUtil util;
+  private final PhoneNumberUtil phoneUtil;
    /** The text searched for phone numbers. */
    private final CharSequence text;
    /**
@@ -189,7 +196,7 @@ final class PhoneNumberMatcher implements Iterator<PhoneNumberMatch> {
      if (maxTries < 0) {
        throw new IllegalArgumentException();
      }
-    this.util = util;
+    this.phoneUtil = util;
      this.text = (text != null) ? text : "";
      this.preferredRegion = country;
      this.leniency = leniency;
@@ -265,6 +272,25 @@ final class PhoneNumberMatcher implements Iterator<PhoneNumberMatch> {
    }
  
    /**
+   * Helper method to determine if a character is a Latin-script letter or not. For our purposes,
+   * combining marks should also return true since we assume they have been added to a preceding
+   * Latin character.
+   */
+  static boolean isLatinLetter(char letter) {
+    // Combining marks are a subset of non-spacing-mark.
+    if (!Character.isLetter(letter) && Character.getType(letter) != Character.NON_SPACING_MARK) {
+      return false;
+    }
+    UnicodeBlock block = UnicodeBlock.of(letter);
+    return block.equals(UnicodeBlock.BASIC_LATIN) ||
+        block.equals(UnicodeBlock.LATIN_1_SUPPLEMENT) ||
+        block.equals(UnicodeBlock.LATIN_EXTENDED_A) ||
+        block.equals(UnicodeBlock.LATIN_EXTENDED_ADDITIONAL) ||
+        block.equals(UnicodeBlock.LATIN_EXTENDED_B) ||
+        block.equals(UnicodeBlock.COMBINING_DIACRITICAL_MARKS);
+  }
+
+  /**
     * Attempts to extract a match from a {@code candidate} character sequence.
     *
     * @param candidate  the candidate text that might contain a phone number
@@ -277,6 +303,21 @@ final class PhoneNumberMatcher implements Iterator<PhoneNumberMatch> {
        return null;
      }
  
+    // If leniency is set to VALID only, we also want to skip numbers that are surrounded by Latin
+    // alphabetic characters, to skip cases like abc8005001234 or 8005001234def.
+    if (leniency == Leniency.VALID) {
+      // If the candidate is not at the start of the text, and does not start with punctuation and
+      // the previous character is not a Latin letter, return null.
+      if (offset > 0 &&
+          (!LEAD_CLASS.matcher(candidate).lookingAt() && isLatinLetter(text.charAt(offset - 1)))) {
+        return null;
+      }
+      int lastCharIndex = offset + candidate.length();
+      if (lastCharIndex < text.length() && isLatinLetter(text.charAt(lastCharIndex))) {
+        return null;
+      }
+    }
+
      // Try to come up with a valid match given the entire candidate.
      String rawString = candidate.toString();
      PhoneNumberMatch match = parseAndVerify(rawString, offset);
@@ -299,7 +340,7 @@ final class PhoneNumberMatcher implements Iterator<PhoneNumberMatch> {
     */
    private PhoneNumberMatch extractInnerMatch(String candidate, int offset) {
      // Try removing either the first or last "group" in the number and see if this gives a result.
-    // We consider white space to be a possible indications of the start or end of the phone number.
+    // We consider white space to be a possible indication of the start or end of the phone number.
      Matcher groupMatcher = GROUP_SEPARATOR.matcher(candidate);
  
      if (groupMatcher.find()) {
@@ -350,8 +391,8 @@ final class PhoneNumberMatcher implements Iterator<PhoneNumberMatch> {
        if (!MATCHING_BRACKETS.matcher(candidate).matches()) {
          return null;
        }
-      PhoneNumber number = util.parse(candidate, preferredRegion);
-      if (leniency.verify(number, util)) {
+      PhoneNumber number = phoneUtil.parse(candidate, preferredRegion);
+      if (leniency.verify(number, phoneUtil)) {
          return new PhoneNumberMatch(offset, candidate, number);
        }
      } catch (NumberParseException e) {
diff --git a/java/src/com/google/i18n/phonenumbers/PhoneNumberUtil.java b/java/src/com/google/i18n/phonenumbers/PhoneNumberUtil.java

index e129f34..af4d147 100644 (file)
--- a/java/src/com/google/i18n/phonenumbers/PhoneNumberUtil.java
+++ b/java/src/com/google/i18n/phonenumbers/PhoneNumberUtil.java
@@ -77,7 +77,7 @@ public class PhoneNumberUtil {
    // Region-code for the unknown region.
    private static final String UNKNOWN_REGION = "ZZ";
  
-  // The set of regions that share country code 1.
+  // The set of regions that share country calling code 1.
    // There are roughly 26 regions and we set the initial capacity of the HashSet to 35 to offer a
    // load factor of roughly 0.75.
    private final Set<String> nanpaRegions = new HashSet<String>(35);
@@ -88,23 +88,18 @@ public class PhoneNumberUtil {
  
    private static final String RFC3966_EXTN_PREFIX = ";ext=";
  
-  // These mappings map a character (key) to a specific digit that should replace it for
-  // normalization purposes. Non-European digits that may be used in phone numbers are mapped to a
-  // European equivalent.
-  static final Map<Character, Character> DIGIT_MAPPINGS;
-
    // Only upper-case variants of alpha characters are stored.
    private static final Map<Character, Character> ALPHA_MAPPINGS;
  
    // For performance reasons, amalgamate both into one map.
-  private static final Map<Character, Character> ALL_NORMALIZATION_MAPPINGS;
+  private static final Map<Character, Character> ALPHA_PHONE_MAPPINGS;
  
    // Separate map of all symbols that we wish to retain when formatting alpha numbers. This
    // includes digits, ASCII letters and number grouping symbols such as "-" and " ".
    private static final Map<Character, Character> ALL_PLUS_NUMBER_GROUPING_SYMBOLS;
  
    static {
-    // Simple ASCII digits map used to populate DIGIT_MAPPINGS and
+    // Simple ASCII digits map used to populate ALPHA_PHONE_MAPPINGS and
      // ALL_PLUS_NUMBER_GROUPING_SYMBOLS.
      HashMap<Character, Character> asciiDigitMappings = new HashMap<Character, Character>();
      asciiDigitMappings.put('0', '0');
@@ -118,40 +113,6 @@ public class PhoneNumberUtil {
      asciiDigitMappings.put('8', '8');
      asciiDigitMappings.put('9', '9');
  
-    HashMap<Character, Character> digitMap = new HashMap<Character, Character>(50);
-    digitMap.putAll(asciiDigitMappings);
-    digitMap.put('\uFF10', '0');  // Fullwidth digit 0
-    digitMap.put('\u0660', '0');  // Arabic-indic digit 0
-    digitMap.put('\u06F0', '0');  // Eastern-Arabic digit 0
-    digitMap.put('\uFF11', '1');  // Fullwidth digit 1
-    digitMap.put('\u0661', '1');  // Arabic-indic digit 1
-    digitMap.put('\u06F1', '1');  // Eastern-Arabic digit 1
-    digitMap.put('\uFF12', '2');  // Fullwidth digit 2
-    digitMap.put('\u0662', '2');  // Arabic-indic digit 2
-    digitMap.put('\u06F2', '2');  // Eastern-Arabic digit 2
-    digitMap.put('\uFF13', '3');  // Fullwidth digit 3
-    digitMap.put('\u0663', '3');  // Arabic-indic digit 3
-    digitMap.put('\u06F3', '3');  // Eastern-Arabic digit 3
-    digitMap.put('\uFF14', '4');  // Fullwidth digit 4
-    digitMap.put('\u0664', '4');  // Arabic-indic digit 4
-    digitMap.put('\u06F4', '4');  // Eastern-Arabic digit 4
-    digitMap.put('\uFF15', '5');  // Fullwidth digit 5
-    digitMap.put('\u0665', '5');  // Arabic-indic digit 5
-    digitMap.put('\u06F5', '5');  // Eastern-Arabic digit 5
-    digitMap.put('\uFF16', '6');  // Fullwidth digit 6
-    digitMap.put('\u0666', '6');  // Arabic-indic digit 6
-    digitMap.put('\u06F6', '6');  // Eastern-Arabic digit 6
-    digitMap.put('\uFF17', '7');  // Fullwidth digit 7
-    digitMap.put('\u0667', '7');  // Arabic-indic digit 7
-    digitMap.put('\u06F7', '7');  // Eastern-Arabic digit 7
-    digitMap.put('\uFF18', '8');  // Fullwidth digit 8
-    digitMap.put('\u0668', '8');  // Arabic-indic digit 8
-    digitMap.put('\u06F8', '8');  // Eastern-Arabic digit 8
-    digitMap.put('\uFF19', '9');  // Fullwidth digit 9
-    digitMap.put('\u0669', '9');  // Arabic-indic digit 9
-    digitMap.put('\u06F9', '9');  // Eastern-Arabic digit 9
-    DIGIT_MAPPINGS = Collections.unmodifiableMap(digitMap);
-
      HashMap<Character, Character> alphaMap = new HashMap<Character, Character>(40);
      alphaMap.put('A', '2');
      alphaMap.put('B', '2');
@@ -182,9 +143,9 @@ public class PhoneNumberUtil {
      ALPHA_MAPPINGS = Collections.unmodifiableMap(alphaMap);
  
      HashMap<Character, Character> combinedMap = new HashMap<Character, Character>(100);
-    combinedMap.putAll(alphaMap);
-    combinedMap.putAll(digitMap);
-    ALL_NORMALIZATION_MAPPINGS = Collections.unmodifiableMap(combinedMap);
+    combinedMap.putAll(ALPHA_MAPPINGS);
+    combinedMap.putAll(asciiDigitMappings);
+    ALPHA_PHONE_MAPPINGS = Collections.unmodifiableMap(combinedMap);
  
      HashMap<Character, Character> allPlusNumberGroupings = new HashMap<Character, Character>();
      // Put (lower letter -> upper letter) and (upper letter -> upper letter) mappings.
@@ -226,13 +187,12 @@ public class PhoneNumberUtil {
    // found as a leading character only.
    // This consists of dash characters, white space characters, full stops, slashes,
    // square brackets, parentheses and tildes. It also includes the letter 'x' as that is found as a
-  // placeholder for carrier information in some phone numbers.
+  // placeholder for carrier information in some phone numbers. Full-width variants are also
+  // present.
    static final String VALID_PUNCTUATION = "-x\u2010-\u2015\u2212\u30FC\uFF0D-\uFF0F " +
        "\u00A0\u200B\u2060\u3000()\uFF08\uFF09\uFF3B\uFF3D.\\[\\]/~\u2053\u223C\uFF5E";
  
-  // Digits accepted in phone numbers that we understand.
-  private static final String VALID_DIGITS =
-      Arrays.toString(DIGIT_MAPPINGS.keySet().toArray()).replaceAll("[, \\[\\]]", "");
+  private static final String DIGITS = "\\p{Nd}";
    // We accept alpha characters in phone numbers, ASCII only, upper and lower case.
    private static final String VALID_ALPHA =
        Arrays.toString(ALPHA_MAPPINGS.keySet().toArray()).replaceAll("[, \\[\\]]", "") +
@@ -240,8 +200,7 @@ public class PhoneNumberUtil {
    static final String PLUS_CHARS = "+\uFF0B";
    private static final Pattern PLUS_CHARS_PATTERN = Pattern.compile("[" + PLUS_CHARS + "]+");
    private static final Pattern SEPARATOR_PATTERN = Pattern.compile("[" + VALID_PUNCTUATION + "]+");
-  private static final Pattern CAPTURING_DIGIT_PATTERN =
-      Pattern.compile("([" + VALID_DIGITS + "])");
+  private static final Pattern CAPTURING_DIGIT_PATTERN = Pattern.compile("(" + DIGITS + ")");
  
    // Regular expression of acceptable characters that may start a phone number for the purposes of
    // parsing. This allows us to strip away meaningless prefixes to phone numbers that may be
@@ -249,7 +208,7 @@ public class PhoneNumberUtil {
    // does not contain alpha characters, although they may be used later in the number. It also does
    // not include other punctuation, as this will be stripped later during parsing and is of no
    // information value when parsing a number.
-  private static final String VALID_START_CHAR = "[" + PLUS_CHARS + VALID_DIGITS + "]";
+  private static final String VALID_START_CHAR = "[" + PLUS_CHARS + DIGITS + "]";
    static final Pattern VALID_START_CHAR_PATTERN = Pattern.compile(VALID_START_CHAR);
  
    // Regular expression of characters typically used to start a second phone number for the purposes
@@ -280,8 +239,8 @@ public class PhoneNumberUtil {
    // plus_sign*([punctuation]*[digits]){3,}([punctuation]|[digits]|[alpha])*
    // Note VALID_PUNCTUATION starts with a -, so must be the first in the range.
    private static final String VALID_PHONE_NUMBER =
-      "[" + PLUS_CHARS + "]*(?:[" + VALID_PUNCTUATION + "]*[" + VALID_DIGITS + "]){3,}[" +
-      VALID_PUNCTUATION + VALID_ALPHA + VALID_DIGITS + "]*";
+      "[" + PLUS_CHARS + "]*(?:[" + VALID_PUNCTUATION + "]*" + DIGITS + "){3,}[" +
+      VALID_PUNCTUATION + VALID_ALPHA + DIGITS + "]*";
  
    // Default extension prefix to use when formatting. This will be put in front of any extension
    // component of the number, after the main national number is formatted. For example, if you wish
@@ -301,13 +260,13 @@ public class PhoneNumberUtil {
    // Canonical-equivalence doesn't seem to be an option with Android java, so we allow two options
    // for representing the accented o - the character itself, and one in the unicode decomposed form
    // with the combining acute accent.
-  private static final String CAPTURING_EXTN_DIGITS = "([" + VALID_DIGITS + "]{1,7})";
+  private static final String CAPTURING_EXTN_DIGITS = "(" + DIGITS + "{1,7})";
    static final String KNOWN_EXTN_PATTERNS =
        RFC3966_EXTN_PREFIX + CAPTURING_EXTN_DIGITS + "|" +
        "[ \u00A0\\t,]*(?:ext(?:ensi(?:o\u0301?|\u00F3))?n?|" +
        "\uFF45\uFF58\uFF54\uFF4E?|[,x\uFF58#\uFF03~\uFF5E]|int|anexo|\uFF49\uFF4E\uFF54)" +
        "[:\\.\uFF0E]?[ \u00A0\\t,-]*" + CAPTURING_EXTN_DIGITS + "#?|" +
-      "[- ]+([" + VALID_DIGITS + "]{1,5})#";
+      "[- ]+(" + DIGITS + "{1,5})#";
  
    // Regexp of all known extension prefixes used by different regions followed by 1 or more valid
    // digits, for use when parsing.
@@ -342,7 +301,7 @@ public class PhoneNumberUtil {
  
    /**
     * INTERNATIONAL and NATIONAL formats are consistent with the definition in ITU-T Recommendation
-   * E. 123. For example, the number of the Google Zurich office will be written as
+   * E123. For example, the number of the Google Switzerland office will be written as
     * "+41 44 668 1800" in INTERNATIONAL format, and as "044 668 1800" in NATIONAL format.
     * E164 format is as per INTERNATIONAL format but with no formatting applied, e.g. +41446681800.
     * RFC3966 is as per INTERNATIONAL format, but with all spaces and other separating symbols
@@ -527,13 +486,15 @@ public class PhoneNumberUtil {
    /**
     * Normalizes a string of characters representing a phone number. This performs the following
     * conversions:
-   *   Wide-ascii digits are converted to normal ASCII (European) digits.
+   *   Punctuation is stripped.
+   *   For ALPHA/VANITY numbers:
     *   Letters are converted to their numeric representation on a telephone keypad. The keypad
     *       used here is the one defined in ITU Recommendation E.161. This is only done if there are
-   *       3 or more letters in the number, to lessen the risk that such letters are typos -
-   *       otherwise alpha characters are stripped.
-   *   Punctuation is stripped.
+   *       3 or more letters in the number, to lessen the risk that such letters are typos.
+   *   For other numbers:
+   *   Wide-ascii digits are converted to normal ASCII (European) digits.
     *   Arabic-Indic numerals are converted to European numerals.
+   *   Spurious alpha characters are stripped.
     *
     * @param number  a string of characters representing a phone number
     * @return        the normalized string version of the phone number
@@ -541,9 +502,9 @@ public class PhoneNumberUtil {
    static String normalize(String number) {
      Matcher m = VALID_ALPHA_PHONE_PATTERN.matcher(number);
      if (m.matches()) {
-      return normalizeHelper(number, ALL_NORMALIZATION_MAPPINGS, true);
+      return normalizeHelper(number, ALPHA_PHONE_MAPPINGS, true);
      } else {
-      return normalizeHelper(number, DIGIT_MAPPINGS, true);
+      return normalizeDigitsOnly(number);
      }
    }
  
@@ -567,16 +528,23 @@ public class PhoneNumberUtil {
     * @return        the normalized string version of the phone number
     */
    public static String normalizeDigitsOnly(String number) {
-    return normalizeHelper(number, DIGIT_MAPPINGS, true);
+    int numberLength = number.length();
+    StringBuilder normalizedDigits = new StringBuilder(numberLength);
+    for (int i = 0; i < numberLength; i++) {
+      int d = Character.digit(number.charAt(i), 10);
+      if (d != -1) {
+        normalizedDigits.append(d);
+      }
+    }
+    return normalizedDigits.toString();
    }
  
    /**
     * Converts all alpha characters in a number to their respective digits on a keypad, but retains
-   * existing formatting. This Java implementation of this function also converts wide-ascii digits
-   * to normal ascii digits, and converts Arabic-Indic numerals to European numerals.
+   * existing formatting.
     */
    public static String convertAlphaCharactersInNumber(String number) {
-    return normalizeHelper(number, ALL_NORMALIZATION_MAPPINGS, false);
+    return normalizeHelper(number, ALPHA_PHONE_MAPPINGS, false);
    }
  
    /**
@@ -604,15 +572,16 @@ public class PhoneNumberUtil {
     * </pre>
     *
     * N.B.: area code is a very ambiguous concept, so the I18N team generally recommends against
-   * using it for most purposes. Read the following carefully before deciding to use this method:
-   *
-   *  - geographical area codes change over time, and this method honors those changes; therefore,
-   *    it doesn't guarantee the stability of the result it produces.
-   *  - subscriber numbers may not be diallable from all devices (notably mobile devices, which
-   *    typically requires the full national_number to be dialled in most countries).
-   *  - most non-geographical numbers have no area codes.
-   *  - some geographical numbers have no area codes.
-   *
+   * using it for most purposes, but recommends using the more general {@code national_number}
+   * instead. Read the following carefully before deciding to use this method:
+   * <ul>
+   *  <li> geographical area codes change over time, and this method honors those changes;
+   *    therefore, it doesn't guarantee the stability of the result it produces.
+   *  <li> subscriber numbers may not be diallable from all devices (notably mobile devices, which
+   *    typically requires the full national_number to be dialled in most regions).
+   *  <li> most non-geographical numbers have no area codes.
+   *  <li> some geographical numbers have no area codes.
+   * </ul>
     * @param number  the PhoneNumber object for which clients want to know the length of the area
     *     code.
     * @return  the length of area code of the PhoneNumber object passed in.
@@ -663,7 +632,7 @@ public class PhoneNumberUtil {
     * </pre>
     *
     * Refer to the unittests to see the difference between this function and
-   * {@link #getLengthOfGeographicalAreaCode()}.
+   * {@link #getLengthOfGeographicalAreaCode}.
     *
     * @param number  the PhoneNumber object for which clients want to know the length of the NDC.
     * @return  the length of NDC of the PhoneNumber object passed in.
@@ -915,7 +884,7 @@ public class PhoneNumberUtil {
     * {@code carrierCode}. The {@code carrierCode} will always be used regardless of whether the
     * phone number already has a preferred domestic carrier code stored. If {@code carrierCode}
     * contains an empty string, returns the number in national format without any carrier code.
-   * 
+   *
     * @param number  the phone number to be formatted
     * @param carrierCode  the carrier selection code to be used
     * @return  the formatted phone number in national format for dialing using the carrier as
@@ -969,7 +938,7 @@ public class PhoneNumberUtil {
    /**
     * Formats a phone number for out-of-country dialing purposes. If no regionCallingFrom is
     * supplied, we format the number in its INTERNATIONAL format. If the country calling code is the
-   * same as the region where the number is from, then NATIONAL formatting will be applied.
+   * same as that of the region where the number is from, then NATIONAL formatting will be applied.
     *
     * <p>If the number itself has a country calling code of zero or an otherwise invalid country
     * calling code, then we return the number with no formatting applied.
@@ -1935,7 +1904,7 @@ public class PhoneNumberUtil {
        // cannot begin with 0.
        Matcher digitMatcher = CAPTURING_DIGIT_PATTERN.matcher(number.substring(matchEnd));
        if (digitMatcher.find()) {
-        String normalizedGroup = normalizeHelper(digitMatcher.group(1), DIGIT_MAPPINGS, true);
+        String normalizedGroup = normalizeDigitsOnly(digitMatcher.group(1));
          if (normalizedGroup.equals("0")) {
            return false;
          }
diff --git a/java/src/com/google/i18n/phonenumbers/data/PhoneNumberMetadataProto_BG b/java/src/com/google/i18n/phonenumbers/data/PhoneNumberMetadataProto_BG

index e1e34d9..f3645fd 100644 (file)

Binary files a/java/src/com/google/i18n/phonenumbers/data/PhoneNumberMetadataProto_BG and b/java/src/com/google/i18n/phonenumbers/data/PhoneNumberMetadataProto_BG differ
diff --git a/java/src/com/google/i18n/phonenumbers/data/PhoneNumberMetadataProto_EG b/java/src/com/google/i18n/phonenumbers/data/PhoneNumberMetadataProto_EG

index e3a1c56..a00edb4 100644 (file)

Binary files a/java/src/com/google/i18n/phonenumbers/data/PhoneNumberMetadataProto_EG and b/java/src/com/google/i18n/phonenumbers/data/PhoneNumberMetadataProto_EG differ
diff --git a/java/src/com/google/i18n/phonenumbers/data/PhoneNumberMetadataProto_ES b/java/src/com/google/i18n/phonenumbers/data/PhoneNumberMetadataProto_ES

index 2ae03c9..c5717d0 100644 (file)

Binary files a/java/src/com/google/i18n/phonenumbers/data/PhoneNumberMetadataProto_ES and b/java/src/com/google/i18n/phonenumbers/data/PhoneNumberMetadataProto_ES differ
diff --git a/java/src/com/google/i18n/phonenumbers/data/PhoneNumberMetadataProto_GH b/java/src/com/google/i18n/phonenumbers/data/PhoneNumberMetadataProto_GH

index 64e2f87..4e97d13 100644 (file)

Binary files a/java/src/com/google/i18n/phonenumbers/data/PhoneNumberMetadataProto_GH and b/java/src/com/google/i18n/phonenumbers/data/PhoneNumberMetadataProto_GH differ
diff --git a/java/src/com/google/i18n/phonenumbers/data/PhoneNumberMetadataProto_KZ b/java/src/com/google/i18n/phonenumbers/data/PhoneNumberMetadataProto_KZ

index bf98d95..06708a2 100644 (file)

Binary files a/java/src/com/google/i18n/phonenumbers/data/PhoneNumberMetadataProto_KZ and b/java/src/com/google/i18n/phonenumbers/data/PhoneNumberMetadataProto_KZ differ
diff --git a/java/src/com/google/i18n/phonenumbers/data/PhoneNumberMetadataProto_PF b/java/src/com/google/i18n/phonenumbers/data/PhoneNumberMetadataProto_PF

index cd67c8b..f09ce4c 100644 (file)

Binary files a/java/src/com/google/i18n/phonenumbers/data/PhoneNumberMetadataProto_PF and b/java/src/com/google/i18n/phonenumbers/data/PhoneNumberMetadataProto_PF differ
diff --git a/java/src/com/google/i18n/phonenumbers/data/PhoneNumberMetadataProto_SC b/java/src/com/google/i18n/phonenumbers/data/PhoneNumberMetadataProto_SC

index ff47679..95fa89a 100644 (file)

Binary files a/java/src/com/google/i18n/phonenumbers/data/PhoneNumberMetadataProto_SC and b/java/src/com/google/i18n/phonenumbers/data/PhoneNumberMetadataProto_SC differ
diff --git a/java/src/com/google/i18n/phonenumbers/data/PhoneNumberMetadataProto_SY b/java/src/com/google/i18n/phonenumbers/data/PhoneNumberMetadataProto_SY

index b2256b3..858b5da 100644 (file)

Binary files a/java/src/com/google/i18n/phonenumbers/data/PhoneNumberMetadataProto_SY and b/java/src/com/google/i18n/phonenumbers/data/PhoneNumberMetadataProto_SY differ
diff --git a/java/src/com/google/i18n/phonenumbers/data/PhoneNumberMetadataProto_US b/java/src/com/google/i18n/phonenumbers/data/PhoneNumberMetadataProto_US

index 7f80f1d..b0b6597 100644 (file)

Binary files a/java/src/com/google/i18n/phonenumbers/data/PhoneNumberMetadataProto_US and b/java/src/com/google/i18n/phonenumbers/data/PhoneNumberMetadataProto_US differ
diff --git a/java/src/com/google/i18n/phonenumbers/data/PhoneNumberMetadataProto_VU b/java/src/com/google/i18n/phonenumbers/data/PhoneNumberMetadataProto_VU

index 40c22cd..fe110d2 100644 (file)

Binary files a/java/src/com/google/i18n/phonenumbers/data/PhoneNumberMetadataProto_VU and b/java/src/com/google/i18n/phonenumbers/data/PhoneNumberMetadataProto_VU differ
diff --git a/java/test/com/google/i18n/phonenumbers/AsYouTypeFormatterTest.java b/java/test/com/google/i18n/phonenumbers/AsYouTypeFormatterTest.java

index b03f6e5..fdd5908 100644 (file)
--- a/java/test/com/google/i18n/phonenumbers/AsYouTypeFormatterTest.java
+++ b/java/test/com/google/i18n/phonenumbers/AsYouTypeFormatterTest.java
@@ -56,6 +56,28 @@ public class AsYouTypeFormatterTest extends TestCase {
      assertEquals("650253", formatter.inputDigit('3'));
    }
  
+  public void testTooLongNumberMatchingMultipleLeadingDigits() {
+    // See http://code.google.com/p/libphonenumber/issues/detail?id=36
+    // The bug occurred last time for countries which have two formatting rules with exactly the
+    // same leading digits pattern but differ in length.
+    AsYouTypeFormatter formatter = phoneUtil.getAsYouTypeFormatter("ZZ");
+    assertEquals("+", formatter.inputDigit('+'));
+    assertEquals("+8", formatter.inputDigit('8'));
+    assertEquals("+81 ", formatter.inputDigit('1'));
+    assertEquals("+81 9", formatter.inputDigit('9'));
+    assertEquals("+81 90", formatter.inputDigit('0'));
+    assertEquals("+81 90 1", formatter.inputDigit('1'));
+    assertEquals("+81 90 12", formatter.inputDigit('2'));
+    assertEquals("+81 90 123", formatter.inputDigit('3'));
+    assertEquals("+81 90 1234", formatter.inputDigit('4'));
+    assertEquals("+81 90 1234 5", formatter.inputDigit('5'));
+    assertEquals("+81 90 1234 56", formatter.inputDigit('6'));
+    assertEquals("+81 90 1234 567", formatter.inputDigit('7'));
+    assertEquals("+81 90 1234 5678", formatter.inputDigit('8'));
+    assertEquals("+81 90 12 345 6789", formatter.inputDigit('9'));
+    assertEquals("+81901234567890", formatter.inputDigit('0'));
+  }
+
    public void testAYTFUS() {
      AsYouTypeFormatter formatter = phoneUtil.getAsYouTypeFormatter("US");
      assertEquals("6", formatter.inputDigit('6'));
diff --git a/java/test/com/google/i18n/phonenumbers/PhoneNumberMatcherTest.java b/java/test/com/google/i18n/phonenumbers/PhoneNumberMatcherTest.java

index 0c55964..1f4d5fb 100644 (file)
--- a/java/test/com/google/i18n/phonenumbers/PhoneNumberMatcherTest.java
+++ b/java/test/com/google/i18n/phonenumbers/PhoneNumberMatcherTest.java
@@ -221,6 +221,123 @@ public class PhoneNumberMatcherTest extends TestCase {
      assertEquals(number, matchWithSpaces.rawString());
    }
  
+  public void testIsLatinLetter() throws Exception {
+    assertTrue(PhoneNumberMatcher.isLatinLetter('c'));
+    assertTrue(PhoneNumberMatcher.isLatinLetter('C'));
+    assertTrue(PhoneNumberMatcher.isLatinLetter('\u00C9'));
+    assertTrue(PhoneNumberMatcher.isLatinLetter('\u0301'));  // Combining acute accent
+    // Punctuation, digits and white-space are not considered "latin letters".
+    assertFalse(PhoneNumberMatcher.isLatinLetter(':'));
+    assertFalse(PhoneNumberMatcher.isLatinLetter('5'));
+    assertFalse(PhoneNumberMatcher.isLatinLetter('-'));
+    assertFalse(PhoneNumberMatcher.isLatinLetter('.'));
+    assertFalse(PhoneNumberMatcher.isLatinLetter(' '));
+    assertFalse(PhoneNumberMatcher.isLatinLetter('\u6211'));  // Chinese character
+  }
+
+  public void testMatchesWithSurroundingLatinChars() throws Exception {
+    ArrayList<NumberContext> contextPairs = new ArrayList<NumberContext>(5);
+    contextPairs.add(new NumberContext("abc", "def"));
+    contextPairs.add(new NumberContext("abc", ""));
+    contextPairs.add(new NumberContext("", "def"));
+    // Latin small letter e with an acute accent.
+    contextPairs.add(new NumberContext("\u00C9", ""));
+    // Same character decomposed (with combining mark).
+    contextPairs.add(new NumberContext("e\u0301", ""));
+
+    // Numbers should not be considered valid, if they are surrounded by Latin characters, but
+    // should be considered possible.
+    findMatchesInContexts(contextPairs, false, true);
+  }
+
+  public void testMatchesWithSurroundingLatinCharsAndLeadingPunctuation() throws Exception {
+    // Contexts with trailing characters. Leading characters are okay here since the numbers we will
+    // insert start with punctuation, but trailing characters are still not allowed.
+    ArrayList<NumberContext> possibleOnlyContexts = new ArrayList<NumberContext>(3);
+    possibleOnlyContexts.add(new NumberContext("abc", "def"));
+    possibleOnlyContexts.add(new NumberContext("", "def"));
+    possibleOnlyContexts.add(new NumberContext("", "\u00C9"));
+
+    // Numbers should not be considered valid, if they have trailing Latin characters, but should be
+    // considered possible.
+    String numberWithPlus = "+14156667777";
+    String numberWithBrackets = "(415)6667777";
+    findMatchesInContexts(possibleOnlyContexts, false, true, "US", numberWithPlus);
+    findMatchesInContexts(possibleOnlyContexts, false, true, "US", numberWithBrackets);
+
+    ArrayList<NumberContext> validContexts = new ArrayList<NumberContext>(4);
+    validContexts.add(new NumberContext("abc", ""));
+    validContexts.add(new NumberContext("\u00C9", ""));
+    validContexts.add(new NumberContext("\u00C9", "."));  // Trailing punctuation.
+    validContexts.add(new NumberContext("\u00C9", " def"));  // Trailing white-space.
+
+    // Numbers should be considered valid, since they start with punctuation.
+    findMatchesInContexts(validContexts, true, true, "US", numberWithPlus);
+    findMatchesInContexts(validContexts, true, true, "US", numberWithBrackets);
+  }
+
+  public void testMatchesWithSurroundingChineseChars() throws Exception {
+    ArrayList<NumberContext> validContexts = new ArrayList<NumberContext>(3);
+    validContexts.add(new NumberContext("\u6211\u7684\u7535\u8BDD\u53F7\u7801\u662F", ""));
+    validContexts.add(new NumberContext("", "\u662F\u6211\u7684\u7535\u8BDD\u53F7\u7801"));
+    validContexts.add(new NumberContext("\u8BF7\u62E8\u6253", "\u6211\u5728\u660E\u5929"));
+
+    // Numbers should be considered valid, since they are surrounded by Chinese.
+    findMatchesInContexts(validContexts, true, true);
+  }
+
+  public void testMatchesWithSurroundingPunctuation() throws Exception {
+    ArrayList<NumberContext> validContexts = new ArrayList<NumberContext>(4);
+    validContexts.add(new NumberContext("My number-", ""));  // At end of text.
+    validContexts.add(new NumberContext("", ".Nice day."));  // At start of text.
+    validContexts.add(new NumberContext("Tel:", "."));  // Punctuation surrounds number.
+    validContexts.add(new NumberContext("Tel: ", " on Saturdays."));  // White-space is also fine.
+
+    // Numbers should be considered valid, since they are surrounded by punctuation.
+    findMatchesInContexts(validContexts, true, true);
+  }
+
+  /**
+   * Helper method which tests the contexts provided and ensures that:
+   * -- if isValid is true, they all find a test number inserted in the middle when leniency of
+   *  matching is set to VALID; else no test number should be extracted at that leniency level
+   * -- if isPossible is true, they all find a test number inserted in the middle when leniency of
+   *  matching is set to POSSIBLE; else no test number should be extracted at that leniency level
+   */
+  private void findMatchesInContexts(List<NumberContext> contexts, boolean isValid,
+                                     boolean isPossible, String region, String number) {
+    if (isValid) {
+      doTestInContext(number, region, contexts, Leniency.VALID);
+    } else {
+      for (NumberContext context : contexts) {
+        String text = context.leadingText + number + context.trailingText;
+        assertTrue("Should not have found a number in " + text,
+                   hasNoMatches(phoneUtil.findNumbers(text, region)));
+      }
+    }
+    if (isPossible) {
+      doTestInContext(number, region, contexts, Leniency.POSSIBLE);
+    } else {
+      for (NumberContext context : contexts) {
+        String text = context.leadingText + number + context.trailingText;
+        assertTrue("Should not have found a number in " + text,
+                   hasNoMatches(phoneUtil.findNumbers(text, region, Leniency.POSSIBLE,
+                                                      Long.MAX_VALUE)));
+      }
+    }
+  }
+
+  /**
+   * Variant of findMatchesInContexts that uses a default number and region.
+   */
+  private void findMatchesInContexts(List<NumberContext> contexts, boolean isValid,
+                                     boolean isPossible) {
+    String region = "US";
+    String number = "415-666-7777";
+
+    findMatchesInContexts(contexts, isValid, isPossible, region, number);
+  }
+
    public void testNonMatchingBracketsAreInvalid() throws Exception {
      // The digits up to the ", " form a valid US number, but it shouldn't be matched as one since
      // there was a non-matching bracket present.
@@ -473,6 +590,9 @@ public class PhoneNumberMatcherTest extends TestCase {
      }
    }
  
+  /**
+   * Tests valid numbers in contexts that should pass for {@link Leniency#POSSIBLE}.
+   */
    private void findPossibleInContext(String number, String defaultCountry) {
      ArrayList<NumberContext> contextPairs = new ArrayList<NumberContext>(15);
      contextPairs.add(new NumberContext("", ""));  // no context
@@ -513,7 +633,8 @@ public class PhoneNumberMatcherTest extends TestCase {
    }
  
    /**
-   * Tests valid numbers in contexts that fail for {@link Leniency#POSSIBLE}.
+   * Tests valid numbers in contexts that fail for {@link Leniency#POSSIBLE} but are valid for
+   * {@link Leniency#VALID}.
     */
    private void findValidInContext(String number, String defaultCountry) {
      ArrayList<NumberContext> contextPairs = new ArrayList<NumberContext>(5);
diff --git a/java/test/com/google/i18n/phonenumbers/PhoneNumberUtilTest.java b/java/test/com/google/i18n/phonenumbers/PhoneNumberUtilTest.java

index f4a2751..3308dec 100644 (file)
--- a/java/test/com/google/i18n/phonenumbers/PhoneNumberUtilTest.java
+++ b/java/test/com/google/i18n/phonenumbers/PhoneNumberUtilTest.java
@@ -145,8 +145,8 @@ public class PhoneNumberUtilTest extends TestCase {
      assertTrue(metadata.hasNationalPrefix());
      assertEquals(2, metadata.numberFormatSize());
      assertEquals("(\\d{3})(\\d{3})(\\d{4})",
-                 metadata.getNumberFormat(0).getPattern());
-    assertEquals("$1 $2 $3", metadata.getNumberFormat(0).getFormat());
+                 metadata.getNumberFormat(1).getPattern());
+    assertEquals("$1 $2 $3", metadata.getNumberFormat(1).getFormat());
      assertEquals("[13-9]\\d{9}|2[0-35-9]\\d{8}",
                   metadata.getGeneralDesc().getNationalNumberPattern());
      assertEquals("\\d{7}(?:\\d{3})?", metadata.getGeneralDesc().getPossibleNumberPattern());
@@ -290,6 +290,13 @@ public class PhoneNumberUtilTest extends TestCase {
                                                   PhoneNumberUtil.PhoneNumberType.MOBILE));
    }
  
+  public void testConvertAlphaCharactersInNumber() {
+    String input = "1800-ABC-DEF";
+    // Alpha chars are converted to digits; everything else is left untouched.
+    String expectedOutput = "1800-222-333";
+    assertEquals(expectedOutput, PhoneNumberUtil.convertAlphaCharactersInNumber(input));
+  }
+
    public void testNormaliseRemovePunctuation() {
      String inputNumber = "034-56&+#234";
      String expectedOutput = "03456234";
@@ -1037,6 +1044,9 @@ public class PhoneNumberUtilTest extends TestCase {
      // Alpha numbers.
      assertTrue(PhoneNumberUtil.isViablePhoneNumber("0800-4-pizza"));
      assertTrue(PhoneNumberUtil.isViablePhoneNumber("0800-4-PIZZA"));
+  }
+
+  public void testIsViablePhoneNumberNonAscii() {
      // Only one or two digits before possible punctuation followed by more digits.
      assertTrue(PhoneNumberUtil.isViablePhoneNumber("1\u300034"));
      assertFalse(PhoneNumberUtil.isViablePhoneNumber("1\u30003+4"));
@@ -1352,6 +1362,9 @@ public class PhoneNumberUtilTest extends TestCase {
      assertEquals(US_NUMBER, phoneUtil.parse("0~01-650-253-0000", RegionCode.PL));
      // Using "++" at the start.
      assertEquals(US_NUMBER, phoneUtil.parse("++1 (650) 253-0000", RegionCode.PL));
+  }
+
+  public void testParseNonAscii() throws Exception {
      // Using a full-width plus sign.
      assertEquals(US_NUMBER, phoneUtil.parse("\uFF0B1 (650) 253-0000", RegionCode.SG));
      // The whole number, including punctuation, is here represented in full-width form.
@@ -1364,6 +1377,11 @@ public class PhoneNumberUtilTest extends TestCase {
                                              "\u3000\uFF12\uFF15\uFF13\u30FC\uFF10\uFF10\uFF10" +
                                              "\uFF10",
                                              RegionCode.SG));
+
+    // Using a very strange decimal digit range (Mongolian digits).
+    assertEquals(US_NUMBER, phoneUtil.parse("\u1811 \u1816\u1815\u1810 " +
+                                            "\u1812\u1815\u1813 \u1810\u1810\u1810\u1810",
+                                            RegionCode.US));
    }
  
    public void testParseWithLeadingZero() throws Exception {
diff --git a/java/test/com/google/i18n/phonenumbers/data/PhoneNumberMetadataProtoForTesting_JP b/java/test/com/google/i18n/phonenumbers/data/PhoneNumberMetadataProtoForTesting_JP

index f1c9af5..cfb8f71 100644 (file)

Binary files a/java/test/com/google/i18n/phonenumbers/data/PhoneNumberMetadataProtoForTesting_JP and b/java/test/com/google/i18n/phonenumbers/data/PhoneNumberMetadataProtoForTesting_JP differ
diff --git a/java/test/com/google/i18n/phonenumbers/data/PhoneNumberMetadataProtoForTesting_US b/java/test/com/google/i18n/phonenumbers/data/PhoneNumberMetadataProtoForTesting_US

index 9e6ba69..d93e0a2 100644 (file)

Binary files a/java/test/com/google/i18n/phonenumbers/data/PhoneNumberMetadataProtoForTesting_US and b/java/test/com/google/i18n/phonenumbers/data/PhoneNumberMetadataProtoForTesting_US differ
diff --git a/resources/PhoneNumberMetaData.xml b/resources/PhoneNumberMetaData.xml

index 2b8dd96..59a7cb9 100644 (file)
--- a/resources/PhoneNumberMetaData.xml
+++ b/resources/PhoneNumberMetaData.xml
@@ -1902,6 +1902,10 @@
      <territory id="BG" countryCode="359" internationalPrefix="00"
                 nationalPrefix="0" nationalPrefixFormattingRule="$NP$FG">
        <availableFormats>
+        <numberFormat pattern="(2)(\d{5})">
+          <leadingDigits>29</leadingDigits>
+          <format>$1/$2</format>
+        </numberFormat>
          <numberFormat pattern="(2)(\d{3})(\d{3,4})">
            <leadingDigits>2</leadingDigits>
            <format>$1/$2 $3</format>
@@ -1943,13 +1947,21 @@
          </numberFormat>
        </availableFormats>
        <generalDesc>
-        <nationalNumberPattern>[2-9]\d{6,8}</nationalNumberPattern>
-        <possibleNumberPattern>\d{7,9}</possibleNumberPattern>
+        <nationalNumberPattern>
+            [23567]\d{5,7}|
+            [489]\d{6,8}
+        </nationalNumberPattern>
+        <possibleNumberPattern>\d{5,9}</possibleNumberPattern>
        </generalDesc>
        <fixedLine>
+        <!-- 29xxxx numbers have been added because they can be found online, and are typically
+             used by taxi companies. -->
          <nationalNumberPattern>
+          2(?:
+            [0-8]\d{5,6}|
+            9\d{4,6}
+          )|
            (?:
-            2\d|
              [36]\d|
              5[1-9]|
              8[1-6]|
@@ -1966,7 +1978,7 @@
              )
            )\d{4,5}
          </nationalNumberPattern>
-        <possibleNumberPattern>\d{7,8}</possibleNumberPattern>
+        <possibleNumberPattern>\d{5,8}</possibleNumberPattern>
          <exampleNumber>2123456</exampleNumber>
        </fixedLine>
        <mobile>
@@ -4876,7 +4888,9 @@
        <generalDesc>
          <nationalNumberPattern>
            1\d{4,9}|
-          [2-689]\d{7,9}
+          [2456]\d{8}|
+          3\d{7}|
+          [89]\d{8,9}
          </nationalNumberPattern>
          <possibleNumberPattern>\d{5,10}</possibleNumberPattern>
        </generalDesc>
@@ -5001,7 +5015,12 @@
          <exampleNumber>810123456</exampleNumber>
        </fixedLine>
        <mobile>
-        <nationalNumberPattern>6\d{8}</nationalNumberPattern>
+        <nationalNumberPattern>
+          (?:
+            6\d|
+            7[1-4]
+          )\d{7}
+        </nationalNumberPattern>
          <exampleNumber>612345678</exampleNumber>
        </mobile>
        <tollFree>
@@ -6619,7 +6638,7 @@
          <nationalNumberPattern>
            (?:
              2[034678]|
-            54
+            5[47]
            )\d{7}
          </nationalNumberPattern>
          <possibleNumberPattern>\d{9}</possibleNumberPattern>
@@ -11558,10 +11577,10 @@
        <mobile>
          <nationalNumberPattern>
            7(?:
-            0[01257]\d{2}|
-            6[02-4]\d{2}|
-            7[157]\d{2}
-          )\d{5}
+            0[01257]|
+            6[02-4]|
+            7[157]
+          )\d{7}
          </nationalNumberPattern>
          <exampleNumber>7710009998</exampleNumber>
        </mobile>
@@ -15197,7 +15216,7 @@
          <nationalNumberPattern>
            (?:
              [27]\d{3}|
-            3[0-49]\d{2}|
+            3[0-59]\d{2}|
              411[3-6]
            )\d{2}
          </nationalNumberPattern>
@@ -16673,26 +16692,23 @@
        <availableFormats>
          <numberFormat pattern="(\d{3})(\d{3})">
            <leadingDigits>
-            [3578]|
+            [35789]|
              2[1-4689]|
-            6(?:
-              [0-35-9]|
-              4[0-689]
-            )
+            6[0-35-9]
            </leadingDigits>
            <format>$1 $2</format>
          </numberFormat>
          <numberFormat pattern="(\d)(\d{3})(\d{3})">
            <leadingDigits>
              2[57]|
-            4[2-46]|
-            647
+            4|
+            64
            </leadingDigits>
            <format>$1 $2 $3</format>
          </numberFormat>
        </availableFormats>
        <generalDesc>
-        <nationalNumberPattern>[2-8]\d{5,6}</nationalNumberPattern>
+        <nationalNumberPattern>[2-9]\d{5,6}</nationalNumberPattern>
          <possibleNumberPattern>\d{6,7}</possibleNumberPattern>
        </generalDesc>
        <fixedLine>
@@ -16702,7 +16718,9 @@
               phone numbers. -->
          <!-- Old numbers and new numbers (with 4 at front) running in parallel from 4th May
               2011 until 4 August 2011.  Note the 55 and 78 numbers will have a 2 inserted at
-             front instead of a 4. -->
+             front instead of a 4.
+             Since the 4 is optional, the new group 44\d{5} that have been assigned have been
+             separated out in the reg-exp for now to prevent 4\d{5} being considered valid. -->
          <nationalNumberPattern>
            (?:
              2?(?:
@@ -16711,34 +16729,13 @@
              )|
              4?(?:
                2(?:
-                1[78]|
-                2[14-69]|
-                3[2-4]|
-                4[1-36-8]|
-                6[167]|
-                [89]\d
+                0[589]|
+                1[03-9]|
+                [2-9]\d
                )|
-              3(?:
-                0[34]|
-                2[1-6]|
-                4[4-6]|
-                55|
-                6[016]|
-                7\d|
-                8[0-589]|
-                9[0-5]
-              )|
-              6(?:
-                0[0-256]|
-                1[0-478]|
-                2[145]|
-                3[02-4]|
-                4[124]|
-                6[015]|
-                7\d|
-                8[1-3]
-              )
-            )
+              [36]\d{2}
+            )|
+            44\d{2}
            )\d{3}
          </nationalNumberPattern>
          <possibleNumberPattern>\d{6,7}</possibleNumberPattern>
@@ -16767,16 +16764,35 @@
          <possibleNumberPattern>\d{6}</possibleNumberPattern>
          <exampleNumber>800000</exampleNumber>
        </tollFree>
+      <premiumRate>
+        <!-- Using this to represent Value Added Service in the plan. -->
+        <nationalNumberPattern>98\d{4}</nationalNumberPattern>
+        <possibleNumberPattern>\d{6}</possibleNumberPattern>
+        <exampleNumber>981234</exampleNumber>
+      </premiumRate>
        <voip>
-        <nationalNumberPattern>
-          (?:
-            44[1-3]|
-            647
-          )\d{4}
-        </nationalNumberPattern>
+        <nationalNumberPattern>64\d{5}</nationalNumberPattern>
          <possibleNumberPattern>\d{7}</possibleNumberPattern>
-        <exampleNumber>4410123</exampleNumber>
+        <exampleNumber>6412345</exampleNumber>
        </voip>
+      <shortCode>
+        <nationalNumberPattern>
+          1(?:
+            0\d|
+            1[027]|
+            2[0-8]|
+            3[13]|
+            4[0-2]|
+            [59][15]|
+            6[1-9]|
+            7[124-6]|
+            8[158]
+          )|
+          96\d{2}
+        </nationalNumberPattern>
+        <possibleNumberPattern>\d{3,4}</possibleNumberPattern>
+        <exampleNumber>100</exampleNumber>
+      </shortCode>
      </territory>
  
      <!-- Sudan -->
@@ -17747,8 +17763,8 @@
              )|
              2[1-35]|
              3(?:
-              1\d|
-              [34]
+              [13]\d|
+              4
              )|
              4[13]|
              5[1-3]
@@ -18908,16 +18924,16 @@
      <territory id="US" countryCode="1" internationalPrefix="011"
        mainCountryForCode="true" nationalPrefix="1">
        <availableFormats>
+        <numberFormat pattern="(\d{3})(\d{4})">
+          <format>$1-$2</format>
+          <intlFormat>NA</intlFormat>
+        </numberFormat>
          <numberFormat pattern="(\d{3})(\d{3})(\d{4})">
            <format>($1) $2-$3</format>
            <!-- A different pattern is used when formatting internationally, as the area code is no
                 longer optional and should not be in brackets. -->
            <intlFormat>$1-$2-$3</intlFormat>
          </numberFormat>
-        <numberFormat pattern="(\d{3})(\d{4})">
-          <format>$1-$2</format>
-          <intlFormat>NA</intlFormat>
-        </numberFormat>
        </availableFormats>
        <generalDesc>
          <nationalNumberPattern>[2-9]\d{9}</nationalNumberPattern>
@@ -19730,12 +19746,12 @@
        <!-- Should be formatted in one block, apart from the mobile numbers. -->
        <availableFormats>
          <numberFormat pattern="(\d{3})(\d{4})">
-          <leadingDigits>[57]</leadingDigits>
+          <leadingDigits>[579]</leadingDigits>
            <format>$1 $2</format>
          </numberFormat>
        </availableFormats>
        <generalDesc>
-        <nationalNumberPattern>[2-578]\d{4,6}</nationalNumberPattern>
+        <nationalNumberPattern>[2-57-9]\d{4,6}</nationalNumberPattern>
          <possibleNumberPattern>\d{5,7}</possibleNumberPattern>
        </generalDesc>
        <fixedLine>
@@ -19766,10 +19782,13 @@
          <possibleNumberPattern>\d{7}</possibleNumberPattern>
          <exampleNumber>5912345</exampleNumber>
        </mobile>
-      <!-- Using this for non-geographical numbers. -->
+      <!-- Using this for non-geographical numbers, since they have not been clearly defined. -->
        <uan>
-        <nationalNumberPattern>30\d{3}</nationalNumberPattern>
-        <possibleNumberPattern>\d{5}</possibleNumberPattern>
+        <nationalNumberPattern>
+          30\d{3}|
+          900\d{4}
+        </nationalNumberPattern>
+        <possibleNumberPattern>\d{5,7}</possibleNumberPattern>
          <exampleNumber>30123</exampleNumber>
        </uan>
      </territory>
diff --git a/resources/PhoneNumberMetaDataForTesting.xml b/resources/PhoneNumberMetaDataForTesting.xml

index 951c64e..cc74b7d 100644 (file)
--- a/resources/PhoneNumberMetaDataForTesting.xml
+++ b/resources/PhoneNumberMetaDataForTesting.xml
@@ -330,6 +330,10 @@
            <leadingDigits>[57-9]0</leadingDigits>
            <format>$1 $2 $3</format>
          </numberFormat>
+        <numberFormat pattern="(\d{2})(\d{2})(\d{3})(\d{4})">
+          <leadingDigits>[57-9]0</leadingDigits>
+          <format>$1 $2 $3 $4</format>
+        </numberFormat>
          <numberFormat pattern="(\d{2})(\d{3})(\d{4})">
            <leadingDigits>222|333</leadingDigits>
            <leadingDigits>(?:222|333)1</leadingDigits>
@@ -672,13 +676,13 @@
                 preferredExtnPrefix=" extn. " nationalPrefix="1"
                 mainCountryForCode="true" >
        <availableFormats>
-        <numberFormat pattern="(\d{3})(\d{3})(\d{4})">
-          <format>$1 $2 $3</format>
-        </numberFormat>
          <numberFormat pattern="(\d{3})(\d{4})">
            <format>$1 $2</format>
            <intlFormat>NA</intlFormat>
         </numberFormat>
+        <numberFormat pattern="(\d{3})(\d{3})(\d{4})">
+          <format>$1 $2 $3</format>
+        </numberFormat>
        </availableFormats>
        <generalDesc>
          <nationalNumberPattern>[13-9]\d{9}|2[0-35-9]\d{8}</nationalNumberPattern>
author	lararennie@google.com <lararennie@google.com@ee073f10-1060-11df-b6a4-87a95322a99c>
	Fri, 10 Jun 2011 14:02:31 +0000 (14:02 +0000)
committer	lararennie@google.com <lararennie@google.com@ee073f10-1060-11df-b6a4-87a95322a99c>
	Fri, 10 Jun 2011 14:02:31 +0000 (14:02 +0000)
java/release_notes.txt		patch \| blob \| history
java/src/com/google/i18n/phonenumbers/AsYouTypeFormatter.java		patch \| blob \| history
java/src/com/google/i18n/phonenumbers/PhoneNumberMatch.java		patch \| blob \| history
java/src/com/google/i18n/phonenumbers/PhoneNumberMatcher.java		patch \| blob \| history
java/src/com/google/i18n/phonenumbers/PhoneNumberUtil.java		patch \| blob \| history
java/src/com/google/i18n/phonenumbers/data/PhoneNumberMetadataProto_BG		patch \| blob \| history
java/src/com/google/i18n/phonenumbers/data/PhoneNumberMetadataProto_EG		patch \| blob \| history
java/src/com/google/i18n/phonenumbers/data/PhoneNumberMetadataProto_ES		patch \| blob \| history
java/src/com/google/i18n/phonenumbers/data/PhoneNumberMetadataProto_GH		patch \| blob \| history
java/src/com/google/i18n/phonenumbers/data/PhoneNumberMetadataProto_KZ		patch \| blob \| history
java/src/com/google/i18n/phonenumbers/data/PhoneNumberMetadataProto_PF		patch \| blob \| history
java/src/com/google/i18n/phonenumbers/data/PhoneNumberMetadataProto_SC		patch \| blob \| history
java/src/com/google/i18n/phonenumbers/data/PhoneNumberMetadataProto_SY		patch \| blob \| history
java/src/com/google/i18n/phonenumbers/data/PhoneNumberMetadataProto_US		patch \| blob \| history
java/src/com/google/i18n/phonenumbers/data/PhoneNumberMetadataProto_VU		patch \| blob \| history
java/test/com/google/i18n/phonenumbers/AsYouTypeFormatterTest.java		patch \| blob \| history
java/test/com/google/i18n/phonenumbers/PhoneNumberMatcherTest.java		patch \| blob \| history
java/test/com/google/i18n/phonenumbers/PhoneNumberUtilTest.java		patch \| blob \| history
java/test/com/google/i18n/phonenumbers/data/PhoneNumberMetadataProtoForTesting_JP		patch \| blob \| history
java/test/com/google/i18n/phonenumbers/data/PhoneNumberMetadataProtoForTesting_US		patch \| blob \| history
resources/PhoneNumberMetaData.xml		patch \| blob \| history
resources/PhoneNumberMetaDataForTesting.xml		patch \| blob \| history