From 126c688c23f92d93e5e9e49110ed32112b935be0 Mon Sep 17 00:00:00 2001 From: "dbeaumont@google.com" Date: Thu, 12 Jul 2012 13:14:13 +0000 Subject: [PATCH] JAVA: Metadata changes and bug fixes; libphonenumber v5.0 git-svn-id: http://libphonenumber.googlecode.com/svn/trunk@491 ee073f10-1060-11df-b6a4-87a95322a99c --- .../i18n/phonenumbers/PhoneNumberMatcher.java | 14 +++- .../google/i18n/phonenumbers/PhoneNumberUtil.java | 8 ++ .../phonenumbers/data/PhoneNumberMetadataProto_GA | Bin 610 -> 329 bytes .../phonenumbers/data/PhoneNumberMetadataProto_IL | Bin 989 -> 1075 bytes .../i18n/phonenumbers/PhoneNumberMatcherTest.java | 19 ++++- .../i18n/phonenumbers/PhoneNumberUtilTest.java | 49 +++++++++++- java/release_notes.txt | 89 +++++++++++---------- 7 files changed, 128 insertions(+), 51 deletions(-) diff --git a/java/libphonenumber/src/com/google/i18n/phonenumbers/PhoneNumberMatcher.java b/java/libphonenumber/src/com/google/i18n/phonenumbers/PhoneNumberMatcher.java index da300ca..a7a5e18 100644 --- a/java/libphonenumber/src/com/google/i18n/phonenumbers/PhoneNumberMatcher.java +++ b/java/libphonenumber/src/com/google/i18n/phonenumbers/PhoneNumberMatcher.java @@ -433,7 +433,8 @@ final class PhoneNumberMatcher implements Iterator { /** * Small helper interface such that the number groups can be checked according to different - * criteria. + * criteria, both for our default way of performing formatting and for any alternate formats we + * may want to check. */ interface NumberGroupingChecker { /** @@ -553,6 +554,17 @@ final class PhoneNumberMatcher implements Iterator { if (checker.checkGroups(util, number, normalizedCandidate, formattedNumberGroups)) { return true; } + // If this didn't pass, see if there are any alternate formats, and try them instead. + PhoneMetadata alternateFormats = + MetadataManager.getAlternateFormatsForCountry(number.getCountryCode()); + if (alternateFormats != null) { + for (NumberFormat alternateFormat : alternateFormats.numberFormats()) { + formattedNumberGroups = getNationalNumberGroups(util, number, alternateFormat); + if (checker.checkGroups(util, number, normalizedCandidate, formattedNumberGroups)) { + return true; + } + } + } return false; } diff --git a/java/libphonenumber/src/com/google/i18n/phonenumbers/PhoneNumberUtil.java b/java/libphonenumber/src/com/google/i18n/phonenumbers/PhoneNumberUtil.java index d984bbe..8bcbc6e 100644 --- a/java/libphonenumber/src/com/google/i18n/phonenumbers/PhoneNumberUtil.java +++ b/java/libphonenumber/src/com/google/i18n/phonenumbers/PhoneNumberUtil.java @@ -265,9 +265,17 @@ public class PhoneNumberUtil { // carrier codes, for example in Brazilian phone numbers. We also allow multiple "+" characters at // the start. // Corresponds to the following: + // [digits]{minLengthNsn}| // plus_sign*(([punctuation]|[star])*[digits]){3,}([punctuation]|[star]|[digits]|[alpha])* + // + // The first reg-ex is to allow short numbers (two digits long) to be parsed if they are entered + // as "15" etc, but only if there is no punctuation in them. The second expression restricts the + // number of digits to three or more, but then allows them to be in international form, and to + // have alpha-characters and punctuation. + // // Note VALID_PUNCTUATION starts with a -, so must be the first in the range. private static final String VALID_PHONE_NUMBER = + DIGITS + "{" + MIN_LENGTH_FOR_NSN + "}" + "|" + "[" + PLUS_CHARS + "]*+(?:[" + VALID_PUNCTUATION + STAR_SIGN + "]*" + DIGITS + "){3,}[" + VALID_PUNCTUATION + STAR_SIGN + VALID_ALPHA + DIGITS + "]*"; diff --git a/java/libphonenumber/src/com/google/i18n/phonenumbers/data/PhoneNumberMetadataProto_GA b/java/libphonenumber/src/com/google/i18n/phonenumbers/data/PhoneNumberMetadataProto_GA index fb204d0893b2e88d15975c84e9b66e659905711c..1b49771504528698013313c9f716d25ee38c8ff9 100644 GIT binary patch delta 138 zcmaFFa*`=w&07Z6Dh38d2L>R)$jHDIZK!J=8fy>IU{ebwR@Iv1X`#1`ie@p5Dn-(y%d z5`G-d;XJ|l6$f^c;bFg3gsIi$KlV2!I<6*Fb+hYM!CAZeotK8)7T@?m12ZX7 z(}W`$=x&L>!hp!xLf4~NjCL`)#VD-}Xx5rQ(FKo2HTL}yy>P~wr2di>gVSk^O6n7X Ca(W*C diff --git a/java/libphonenumber/src/com/google/i18n/phonenumbers/data/PhoneNumberMetadataProto_IL b/java/libphonenumber/src/com/google/i18n/phonenumbers/data/PhoneNumberMetadataProto_IL index 9419291fc160589973542c268a0cab011245786d..b01451a51b2bbd0b46801944f1b47a4ddfb2cd5b 100644 GIT binary patch delta 274 zcmXwzO-e&C6onJDw1o;z-9ZBuUVd(Fo(O`4F2JF;nRNq>M0<;CC~q6i>P&DaF2Ox` z^1wlIa!-=;ouBDxHvjlFlQ&~7%yItkzL2m=rLWuFo)6Yco8Aw`DVJ7u|Oq8wKX6iBcmE);~Eo_8q;V~i`W`7vl?^r z8Vd`}iFc6M`fkol93G4A8#GXP2$)EI)DvUvhiCF5jn G77YNsdM()i diff --git a/java/libphonenumber/test/com/google/i18n/phonenumbers/PhoneNumberMatcherTest.java b/java/libphonenumber/test/com/google/i18n/phonenumbers/PhoneNumberMatcherTest.java index 1309038..f596989 100644 --- a/java/libphonenumber/test/com/google/i18n/phonenumbers/PhoneNumberMatcherTest.java +++ b/java/libphonenumber/test/com/google/i18n/phonenumbers/PhoneNumberMatcherTest.java @@ -400,7 +400,12 @@ public class PhoneNumberMatcherTest extends TestMetadataTestCase { new NumberTest("\uFF14\uFF11\uFF15\uFF16\uFF16\uFF16\uFF16-\uFF17\uFF17\uFF17", RegionCode.US), new NumberTest("2012-0102 08", RegionCode.US), // Very strange formatting. new NumberTest("2012-01-02 08", RegionCode.US), - new NumberTest("1800-10-10 22", RegionCode.AU), // Breakdown assistance number. + // Breakdown assistance number with unexpected formatting. + new NumberTest("1800-1-0-10 22", RegionCode.AU), + new NumberTest("030-3-2 23 12 34", RegionCode.DE), + new NumberTest("03 0 -3 2 23 12 34", RegionCode.DE), + new NumberTest("(0)3 0 -3 2 23 12 34", RegionCode.DE), + new NumberTest("0 3 0 -3 2 23 12 34", RegionCode.DE), }; /** @@ -413,6 +418,11 @@ public class PhoneNumberMatcherTest extends TestMetadataTestCase { // Should be found by strict grouping but not exact grouping, as the last two groups are // formatted together as a block. new NumberTest("0800-2491234", RegionCode.DE), + // Doesn't match any formatting in the test file, but almost matches an alternate format (the + // last two groups have been squashed together here). + new NumberTest("0900-1 123123", RegionCode.DE), + new NumberTest("(0)900-1 123123", RegionCode.DE), + new NumberTest("0 900-1 123123", RegionCode.DE), }; /** @@ -439,6 +449,11 @@ public class PhoneNumberMatcherTest extends TestMetadataTestCase { new NumberTest("0494949 ext. 49", RegionCode.DE), new NumberTest("01 (33) 3461 2234", RegionCode.MX), // Optional NP present new NumberTest("(33) 3461 2234", RegionCode.MX), // Optional NP omitted + new NumberTest("1800-10-10 22", RegionCode.AU), // Breakdown assistance number. + // Doesn't match any formatting in the test file, but matches an alternate format exactly. + new NumberTest("0900-1 123 123", RegionCode.DE), + new NumberTest("(0)900-1 123 123", RegionCode.DE), + new NumberTest("0 900-1 123 123", RegionCode.DE), }; public void testMatchesWithPossibleLeniency() throws Exception { @@ -863,7 +878,7 @@ public class PhoneNumberMatcherTest extends TestMetadataTestCase { contextPairs.add(new NumberContext("It's cheap! Call ", " before 6:30")); // With a second number later. contextPairs.add(new NumberContext("Call ", " or +1800-123-4567!")); - contextPairs.add(new NumberContext("Call me on June 21 at", "")); // with a Month-Day date + contextPairs.add(new NumberContext("Call me on June 2 at", "")); // with a Month-Day date // With publication pages. contextPairs.add(new NumberContext( "As quoted by Alfonso 12-15 (2009), you may call me at ", "")); diff --git a/java/libphonenumber/test/com/google/i18n/phonenumbers/PhoneNumberUtilTest.java b/java/libphonenumber/test/com/google/i18n/phonenumbers/PhoneNumberUtilTest.java index 2b7f0a3..59f3f70 100644 --- a/java/libphonenumber/test/com/google/i18n/phonenumbers/PhoneNumberUtilTest.java +++ b/java/libphonenumber/test/com/google/i18n/phonenumbers/PhoneNumberUtilTest.java @@ -1256,16 +1256,20 @@ public class PhoneNumberUtilTest extends TestMetadataTestCase { } public void testIsViablePhoneNumber() { + assertFalse(PhoneNumberUtil.isViablePhoneNumber("1")); // Only one or two digits before strange non-possible punctuation. - assertFalse(PhoneNumberUtil.isViablePhoneNumber("12. March")); assertFalse(PhoneNumberUtil.isViablePhoneNumber("1+1+1")); assertFalse(PhoneNumberUtil.isViablePhoneNumber("80+0")); - assertFalse(PhoneNumberUtil.isViablePhoneNumber("00")); - // Three digits is viable. + // Two digits is viable. + assertTrue(PhoneNumberUtil.isViablePhoneNumber("00")); assertTrue(PhoneNumberUtil.isViablePhoneNumber("111")); // Alpha numbers. assertTrue(PhoneNumberUtil.isViablePhoneNumber("0800-4-pizza")); assertTrue(PhoneNumberUtil.isViablePhoneNumber("0800-4-PIZZA")); + // We need at least three digits before any alpha characters. + assertFalse(PhoneNumberUtil.isViablePhoneNumber("08-PIZZA")); + assertFalse(PhoneNumberUtil.isViablePhoneNumber("8-PIZZA")); + assertFalse(PhoneNumberUtil.isViablePhoneNumber("12. March")); } public void testIsViablePhoneNumberNonAscii() { @@ -1600,6 +1604,10 @@ public class PhoneNumberUtilTest extends TestMetadataTestCase { // Test star numbers. Although this is not strictly valid, we would like to make sure we can // parse the output we produce when formatting the number. assertEquals(JP_STAR_NUMBER, phoneUtil.parse("+81 *2345", RegionCode.JP)); + + PhoneNumber shortNumber = new PhoneNumber(); + shortNumber.setCountryCode(64).setNationalNumber(12L); + assertEquals(shortNumber, phoneUtil.parse("12", RegionCode.NZ)); } public void testParseNumberWithAlphaCharacters() throws Exception { @@ -1766,6 +1774,36 @@ public class PhoneNumberUtilTest extends TestMetadataTestCase { e.getErrorType()); } try { + String sentencePhoneNumber = "1 Still not a number"; + phoneUtil.parse(sentencePhoneNumber, RegionCode.NZ); + fail("This should not parse without throwing an exception " + sentencePhoneNumber); + } catch (NumberParseException e) { + // Expected this exception. + assertEquals("Wrong error type stored in exception.", + NumberParseException.ErrorType.NOT_A_NUMBER, + e.getErrorType()); + } + try { + String sentencePhoneNumber = "1 MICROSOFT"; + phoneUtil.parse(sentencePhoneNumber, RegionCode.NZ); + fail("This should not parse without throwing an exception " + sentencePhoneNumber); + } catch (NumberParseException e) { + // Expected this exception. + assertEquals("Wrong error type stored in exception.", + NumberParseException.ErrorType.NOT_A_NUMBER, + e.getErrorType()); + } + try { + String sentencePhoneNumber = "12 MICROSOFT"; + phoneUtil.parse(sentencePhoneNumber, RegionCode.NZ); + fail("This should not parse without throwing an exception " + sentencePhoneNumber); + } catch (NumberParseException e) { + // Expected this exception. + assertEquals("Wrong error type stored in exception.", + NumberParseException.ErrorType.NOT_A_NUMBER, + e.getErrorType()); + } + try { String tooLongPhoneNumber = "01495 72553301873 810104"; phoneUtil.parse(tooLongPhoneNumber, RegionCode.GB); fail("This should not parse without throwing an exception " + tooLongPhoneNumber); @@ -2209,7 +2247,7 @@ public class PhoneNumberUtilTest extends TestMetadataTestCase { // Invalid numbers that can't be parsed. assertEquals(PhoneNumberUtil.MatchType.NOT_A_NUMBER, - phoneUtil.isNumberMatch("43", "3 331 6043")); + phoneUtil.isNumberMatch("4", "3 331 6043")); assertEquals(PhoneNumberUtil.MatchType.NOT_A_NUMBER, phoneUtil.isNumberMatch("+43", "+64 3 331 6005")); assertEquals(PhoneNumberUtil.MatchType.NOT_A_NUMBER, @@ -2321,7 +2359,10 @@ public class PhoneNumberUtilTest extends TestMetadataTestCase { assertTrue(phoneUtil.isAlphaNumber("1800 six-flags")); assertTrue(phoneUtil.isAlphaNumber("1800 six-flags ext. 1234")); assertTrue(phoneUtil.isAlphaNumber("+800 six-flags")); + assertTrue(phoneUtil.isAlphaNumber("180 six-flags")); assertFalse(phoneUtil.isAlphaNumber("1800 123-1234")); + assertFalse(phoneUtil.isAlphaNumber("1 six-flags")); + assertFalse(phoneUtil.isAlphaNumber("18 six-flags")); assertFalse(phoneUtil.isAlphaNumber("1800 123-1234 extension: 1234")); assertFalse(phoneUtil.isAlphaNumber("+800 1234-1234")); } diff --git a/java/release_notes.txt b/java/release_notes.txt index 7b2d433..43a4497 100644 --- a/java/release_notes.txt +++ b/java/release_notes.txt @@ -1,23 +1,30 @@ +July 12th, 2012: libphonenumber-5.0 +* Code changes: + - Support for alternate formats when finding phone numbers. + - Allowing two-digit numbers to be parsed if they are entered in national-format with no + punctuation +* Metadata changes + - IL, GA +* Other + - Reflowed this file to 100 char width to make it unambiguous as to what the standard should be. + July 6th, 2012: libphonenumber-4.9.1 * Metadata changes - AR, BA, BF, CR, DE, EC, ES, KZ, MK, NC, NG, PF, SB, UZ, non-geo entity 882 - - Geocoding data updates for country calling codes 54 (AR) and 81 (JP), new - data for 234 (NG) + - Geocoding data updates for country calling codes 54 (AR) and 81 (JP), new data for 234 (NG) June 21st, 2012: libphonenumber-4.9 * Bug fix - - formatInOriginalFormat fixed not to add a star sign if it was not present - originally. + - formatInOriginalFormat fixed not to add a star sign if it was not present originally. * Metadata changes - BF, CZ, ES, KW - - Non-geographical entities with calling code 882 (BebbiCell, Maritime - Communications, Oration Technologies, Telespazio and Thuraya) + - Non-geographical entities with calling code 882 (BebbiCell, Maritime Communications, Oration + Technologies, Telespazio and Thuraya) - Geocoding data updates for country calling codes 221, 224, 226, 242, 244, 245 * Functionality changes: - - Minimum allowed length for a national significant number (NSN) when parsing - changed from 3 to 2. - - Support parsing of RFC3966-formatted strings with an isdn-subaddress or extra - parameters specified. + - Minimum allowed length for a national significant number (NSN) when parsing changed from 3 to 2. + - Support parsing of RFC3966-formatted strings with an isdn-subaddress or extra parameters + specified. - Allow soft hyphen to appear in phone numbers (\u00AD) * Testing changes: - Add extra unit tests for non-geographical phone number entities @@ -123,33 +130,31 @@ November 24th, 2011: libphonenumber-4.3 November 10th, 2011: libphonenumber-4.2 * Code changes - Providing an "exact match" isEmergencyNumber method - - Improvement to PhoneNumberMatcher: requires national prefix to be present - when matching national-format numbers, unless matching for a region where it - is explicitly marked in the metadata that they may be omitted. Applies to - leniency level VALID and higher. + - Improvement to PhoneNumberMatcher: requires national prefix to be present when matching + national-format numbers, unless matching for a region where it is explicitly marked in the + metadata that they may be omitted. Applies to leniency level VALID and higher. - Change formatNumberForMobileDialing not to modify the phoneNumber passed in. * Metadata changes - Emergency numbers added for all remaining countries - - Collected data on which numbers we format with a national prefix are - commonly written without one and added this + - Collected data on which numbers we format with a national prefix are commonly written without one + and added this - Updates for AR, AT, BH, CZ, GR, IR, KM, LT, MX, PT, SE, SO, UG - Addition of SX (Sint Maarten) October 19th, 2011: libphonenumber-4.1 * Code changes - - Update code and metadata for countries with IDD "8~10" to accept phone - numbers where the "~" is omitted. - - Modify formatInOriginalFormat to use raw_input (when present) when the number - is considered as invalid by the library. - - Add ShortNumberUtil to deal with international short phone numbers, such as - short codes and emergency numbers. + - Update code and metadata for countries with IDD "8~10" to accept phone numbers where the "~" is + omitted. + - Modify formatInOriginalFormat to use raw_input (when present) when the number is considered as + invalid by the library. + - Add ShortNumberUtil to deal with international short phone numbers, such as short codes and + emergency numbers. - Increase the maximum phone-number length accepted when parsing (now set to 16). * Metadata changes - - Updates: BF, BN, CN, DE, DK, DO, FR, IN, KI, KW, MC, MD, ML, PA, QA, SB, UK, - WS + - Updates: BF, BN, CN, DE, DK, DO, FR, IN, KI, KW, MC, MD, ML, PA, QA, SB, UK, WS - Emergency number information also added to: AE, AF, AL, AM, AR, AT, AU, BA, BB, BD, BE, BG, BH, BO, BR, BY, CA, CH, CL, CN, CO, CR, CY, CZ, DE, DJ, DK, DO, DZ, EC, EE, EG, ES, FI, FJ, FO, FR, GB, GE, GF, GH, GI, GL, GR, GT, GY, @@ -161,10 +166,9 @@ October 19th, 2011: libphonenumber-4.1 October 6th, 2011: libphonenumber-4.0 * Code changes - - New function formatNumberForMobileDialing, which attempts to format a number in - such a way that the call can be connected from a mobile phone. If this is - impossible, for example for numbers that cannot be internationally dialled, - then an empty string is returned. + - New function formatNumberForMobileDialing, which attempts to format a number in such a way that + the call can be connected from a mobile phone. If this is impossible, for example for numbers + that cannot be internationally dialled, then an empty string is returned. - Fallback functionality to English for non-CJK languages for geocoding * Metadata changes @@ -178,8 +182,7 @@ September 13th, 2011: libphonenumber-3.9 - Enable AsYouTypeFormatter to handle long IDD and NDD. - Allow the presence of an IDD following a +. - Fix formatting of phone numbers which contain only 0s in the national number. - - Refactored some code in geocoding including AreaCodeMap and the storage - strategies. + - Refactored some code in geocoding including AreaCodeMap and the storage strategies. * Metadata changes - Updates: AM, BE, BH, BJ, BR, BT, BZ, CI, CL, CN, DE, DK, DM, DZ, EC, EG, FJ, @@ -194,16 +197,14 @@ August 11th, 2011: libphonenumber-3.8 * Code changes - Fix to demo to not throw null-ptr exceptions for invalid NANPA numbers - Fixed AYTF to not accept plus signs in the middle of input - - PhoneNumberMatcher improvements - added STRICT_GROUPING and EXACT_GROUPING - levels, numbers followed/preceded by a currency symbol will not match, - multiple numbers separated by phone-number punctuation will now match. ", " - is no longer accepted as an extension symbol when matching, only when - parsing. "x" is only accepted as a carrier code or extension marker, not - otherwise. - - Changes to handling of leading zeroes - these will not be silently ignored - anymore, but will be stored as part of the number. - - PhoneNumberOfflineGeocoder - new method to get the description of a number that assumes - the validity of the number has already been checked and will not re-verify it. + - PhoneNumberMatcher improvements - added STRICT_GROUPING and EXACT_GROUPING levels, numbers + followed/preceded by a currency symbol will not match, multiple numbers separated by phone-number + punctuation will now match. ", " is no longer accepted as an extension symbol when matching, only + when parsing. "x" is only accepted as a carrier code or extension marker, not otherwise. + - Changes to handling of leading zeroes - these will not be silently ignored anymore, but will be + stored as part of the number. + - PhoneNumberOfflineGeocoder - new method to get the description of a number that assumes the + validity of the number has already been checked and will not re-verify it. - Split geocoding US binary data into multiple files. * Metadata changes @@ -461,16 +462,16 @@ August 16th, 2010 August 4th, 2010 * Further improve startup performance - Preload no country specific metadata at startup. - - Stop creating the file containing mapping from country calling code to region code - and loading it at startup; instead, do the initialization in PhoneNumberUtil. + - Stop creating the file containing mapping from country calling code to region code and loading + it at startup; instead, do the initialization in PhoneNumberUtil. July 31th, 2010 * Improve startup performance - Separate generated metadata binary file to one file per region - Preload US at start up, and load other region at the time when needed - - Create a file containing mapping from country calling code to region code, - and load it at startup + - Create a file containing mapping from country calling code to region code, and load it at + startup - Same change also applied to unittests July 30th, 2010 -- 2.7.4