lib/punycode.js

   1 /*! https://mths.be/punycode v1.3.2 by @mathias */
   2 ;(function(root) {
   3
   4         /** Detect free variables */
   5         var freeExports = typeof exports == 'object' && exports &&
   6                 !exports.nodeType && exports;
   7         var freeModule = typeof module == 'object' && module &&
   8                 !module.nodeType && module;
   9         var freeGlobal = typeof global == 'object' && global;
  10         if (
  11                 freeGlobal.global === freeGlobal ||
  12                 freeGlobal.window === freeGlobal ||
  13                 freeGlobal.self === freeGlobal
  14         ) {
  15                 root = freeGlobal;
  16         }
  17
  18         /**
  19          * The `punycode` object.
  20          * @name punycode
  21          * @type Object
  22          */
  23         var punycode,
  24
  25         /** Highest positive signed 32-bit float value */
  26         maxInt = 2147483647, // aka. 0x7FFFFFFF or 2^31-1
  27
  28         /** Bootstring parameters */
  29         base = 36,
  30         tMin = 1,
  31         tMax = 26,
  32         skew = 38,
  33         damp = 700,
  34         initialBias = 72,
  35         initialN = 128, // 0x80
  36         delimiter = '-', // '\x2D'
  37
  38         /** Regular expressions */
  39         regexPunycode = /^xn--/,
  40         regexNonASCII = /[^\x20-\x7E]/, // unprintable ASCII chars + non-ASCII chars
  41         regexSeparators = /[\x2E\u3002\uFF0E\uFF61]/g, // RFC 3490 separators
  42
  43         /** Error messages */
  44         errors = {
  45                 'overflow': 'Overflow: input needs wider integers to process',
  46                 'not-basic': 'Illegal input >= 0x80 (not a basic code point)',
  47                 'invalid-input': 'Invalid input'
  48         },
  49
  50         /** Convenience shortcuts */
  51         baseMinusTMin = base - tMin,
  52         floor = Math.floor,
  53         stringFromCharCode = String.fromCharCode,
  54
  55         /** Temporary variable */
  56         key;
  57
  58         /*--------------------------------------------------------------------------*/
  59
  60         /**
  61          * A generic error utility function.
  62          * @private
  63          * @param {String} type The error type.
  64          * @returns {Error} Throws a `RangeError` with the applicable error message.
  65          */
  66         function error(type) {
  67                 throw new RangeError(errors[type]);
  68         }
  69
  70         /**
  71          * A generic `Array#map` utility function.
  72          * @private
  73          * @param {Array} array The array to iterate over.
  74          * @param {Function} callback The function that gets called for every array
  75          * item.
  76          * @returns {Array} A new array of values returned by the callback function.
  77          */
  78         function map(array, fn) {
  79                 var length = array.length;
  80                 var result = [];
  81                 while (length--) {
  82                         result[length] = fn(array[length]);
  83                 }
  84                 return result;
  85         }
  86
  87         /**
  88          * A simple `Array#map`-like wrapper to work with domain name strings or email
  89          * addresses.
  90          * @private
  91          * @param {String} domain The domain name or email address.
  92          * @param {Function} callback The function that gets called for every
  93          * character.
  94          * @returns {Array} A new string of characters returned by the callback
  95          * function.
  96          */
  97         function mapDomain(string, fn) {
  98                 var parts = string.split('@');
  99                 var result = '';
 100                 if (parts.length > 1) {
 101                         // In email addresses, only the domain name should be punycoded. Leave
 102                         // the local part (i.e. everything up to `@`) intact.
 103                         result = parts[0] + '@';
 104                         string = parts[1];
 105                 }
 106                 // Avoid `split(regex)` for IE8 compatibility. See #17.
 107                 string = string.replace(regexSeparators, '\x2E');
 108                 var labels = string.split('.');
 109                 var encoded = map(labels, fn).join('.');
 110                 return result + encoded;
 111         }
 112
 113         /**
 114          * Creates an array containing the numeric code points of each Unicode
 115          * character in the string. While JavaScript uses UCS-2 internally,
 116          * this function will convert a pair of surrogate halves (each of which
 117          * UCS-2 exposes as separate characters) into a single code point,
 118          * matching UTF-16.
 119          * @see `punycode.ucs2.encode`
 120          * @see <https://mathiasbynens.be/notes/javascript-encoding>
 121          * @memberOf punycode.ucs2
 122          * @name decode
 123          * @param {String} string The Unicode input string (UCS-2).
 124          * @returns {Array} The new array of code points.
 125          */
 126         function ucs2decode(string) {
 127                 var output = [],
 128                     counter = 0,
 129                     length = string.length,
 130                     value,
 131                     extra;
 132                 while (counter < length) {
 133                         value = string.charCodeAt(counter++);
 134                         if (value >= 0xD800 && value <= 0xDBFF && counter < length) {
 135                                 // high surrogate, and there is a next character
 136                                 extra = string.charCodeAt(counter++);
 137                                 if ((extra & 0xFC00) == 0xDC00) { // low surrogate
 138                                         output.push(((value & 0x3FF) << 10) + (extra & 0x3FF) + 0x10000);
 139                                 } else {
 140                                         // unmatched surrogate; only append this code unit, in case the next
 141                                         // code unit is the high surrogate of a surrogate pair
 142                                         output.push(value);
 143                                         counter--;
 144                                 }
 145                         } else {
 146                                 output.push(value);
 147                         }
 148                 }
 149                 return output;
 150         }
 151
 152         /**
 153          * Creates a string based on an array of numeric code points.
 154          * @see `punycode.ucs2.decode`
 155          * @memberOf punycode.ucs2
 156          * @name encode
 157          * @param {Array} codePoints The array of numeric code points.
 158          * @returns {String} The new Unicode string (UCS-2).
 159          */
 160         function ucs2encode(array) {
 161                 return map(array, function(value) {
 162                         var output = '';
 163                         if (value > 0xFFFF) {
 164                                 value -= 0x10000;
 165                                 output += stringFromCharCode(value >>> 10 & 0x3FF | 0xD800);
 166                                 value = 0xDC00 | value & 0x3FF;
 167                         }
 168                         output += stringFromCharCode(value);
 169                         return output;
 170                 }).join('');
 171         }
 172
 173         /**
 174          * Converts a basic code point into a digit/integer.
 175          * @see `digitToBasic()`
 176          * @private
 177          * @param {Number} codePoint The basic numeric code point value.
 178          * @returns {Number} The numeric value of a basic code point (for use in
 179          * representing integers) in the range `0` to `base - 1`, or `base` if
 180          * the code point does not represent a value.
 181          */
 182         function basicToDigit(codePoint) {
 183                 if (codePoint - 48 < 10) {
 184                         return codePoint - 22;
 185                 }
 186                 if (codePoint - 65 < 26) {
 187                         return codePoint - 65;
 188                 }
 189                 if (codePoint - 97 < 26) {
 190                         return codePoint - 97;
 191                 }
 192                 return base;
 193         }
 194
 195         /**
 196          * Converts a digit/integer into a basic code point.
 197          * @see `basicToDigit()`
 198          * @private
 199          * @param {Number} digit The numeric value of a basic code point.
 200          * @returns {Number} The basic code point whose value (when used for
 201          * representing integers) is `digit`, which needs to be in the range
 202          * `0` to `base - 1`. If `flag` is non-zero, the uppercase form is
 203          * used; else, the lowercase form is used. The behavior is undefined
 204          * if `flag` is non-zero and `digit` has no uppercase form.
 205          */
 206         function digitToBasic(digit, flag) {
 207                 //  0..25 map to ASCII a..z or A..Z
 208                 // 26..35 map to ASCII 0..9
 209                 return digit + 22 + 75 * (digit < 26) - ((flag != 0) << 5);
 210         }
 211
 212         /**
 213          * Bias adaptation function as per section 3.4 of RFC 3492.
 214          * https://tools.ietf.org/html/rfc3492#section-3.4
 215          * @private
 216          */
 217         function adapt(delta, numPoints, firstTime) {
 218                 var k = 0;
 219                 delta = firstTime ? floor(delta / damp) : delta >> 1;
 220                 delta += floor(delta / numPoints);
 221                 for (/* no initialization */; delta > baseMinusTMin * tMax >> 1; k += base) {
 222                         delta = floor(delta / baseMinusTMin);
 223                 }
 224                 return floor(k + (baseMinusTMin + 1) * delta / (delta + skew));
 225         }
 226
 227         /**
 228          * Converts a Punycode string of ASCII-only symbols to a string of Unicode
 229          * symbols.
 230          * @memberOf punycode
 231          * @param {String} input The Punycode string of ASCII-only symbols.
 232          * @returns {String} The resulting string of Unicode symbols.
 233          */
 234         function decode(input) {
 235                 // Don't use UCS-2
 236                 var output = [],
 237                     inputLength = input.length,
 238                     out,
 239                     i = 0,
 240                     n = initialN,
 241                     bias = initialBias,
 242                     basic,
 243                     j,
 244                     index,
 245                     oldi,
 246                     w,
 247                     k,
 248                     digit,
 249                     t,
 250                     /** Cached calculation results */
 251                     baseMinusT;
 252
 253                 // Handle the basic code points: let `basic` be the number of input code
 254                 // points before the last delimiter, or `0` if there is none, then copy
 255                 // the first basic code points to the output.
 256
 257                 basic = input.lastIndexOf(delimiter);
 258                 if (basic < 0) {
 259                         basic = 0;
 260                 }
 261
 262                 for (j = 0; j < basic; ++j) {
 263                         // if it's not a basic code point
 264                         if (input.charCodeAt(j) >= 0x80) {
 265                                 error('not-basic');
 266                         }
 267                         output.push(input.charCodeAt(j));
 268                 }
 269
 270                 // Main decoding loop: start just after the last delimiter if any basic code
 271                 // points were copied; start at the beginning otherwise.
 272
 273                 for (index = basic > 0 ? basic + 1 : 0; index < inputLength; /* no final expression */) {
 274
 275                         // `index` is the index of the next character to be consumed.
 276                         // Decode a generalized variable-length integer into `delta`,
 277                         // which gets added to `i`. The overflow checking is easier
 278                         // if we increase `i` as we go, then subtract off its starting
 279                         // value at the end to obtain `delta`.
 280                         for (oldi = i, w = 1, k = base; /* no condition */; k += base) {
 281
 282                                 if (index >= inputLength) {
 283                                         error('invalid-input');
 284                                 }
 285
 286                                 digit = basicToDigit(input.charCodeAt(index++));
 287
 288                                 if (digit >= base || digit > floor((maxInt - i) / w)) {
 289                                         error('overflow');
 290                                 }
 291
 292                                 i += digit * w;
 293                                 t = k <= bias ? tMin : (k >= bias + tMax ? tMax : k - bias);
 294
 295                                 if (digit < t) {
 296                                         break;
 297                                 }
 298
 299                                 baseMinusT = base - t;
 300                                 if (w > floor(maxInt / baseMinusT)) {
 301                                         error('overflow');
 302                                 }
 303
 304                                 w *= baseMinusT;
 305
 306                         }
 307
 308                         out = output.length + 1;
 309                         bias = adapt(i - oldi, out, oldi == 0);
 310
 311                         // `i` was supposed to wrap around from `out` to `0`,
 312                         // incrementing `n` each time, so we'll fix that now:
 313                         if (floor(i / out) > maxInt - n) {
 314                                 error('overflow');
 315                         }
 316
 317                         n += floor(i / out);
 318                         i %= out;
 319
 320                         // Insert `n` at position `i` of the output
 321                         output.splice(i++, 0, n);
 322
 323                 }
 324
 325                 return ucs2encode(output);
 326         }
 327
 328         /**
 329          * Converts a string of Unicode symbols (e.g. a domain name label) to a
 330          * Punycode string of ASCII-only symbols.
 331          * @memberOf punycode
 332          * @param {String} input The string of Unicode symbols.
 333          * @returns {String} The resulting Punycode string of ASCII-only symbols.
 334          */
 335         function encode(input) {
 336                 var n,
 337                     delta,
 338                     handledCPCount,
 339                     basicLength,
 340                     bias,
 341                     j,
 342                     m,
 343                     q,
 344                     k,
 345                     t,
 346                     currentValue,
 347                     output = [],
 348                     /** `inputLength` will hold the number of code points in `input`. */
 349                     inputLength,
 350                     /** Cached calculation results */
 351                     handledCPCountPlusOne,
 352                     baseMinusT,
 353                     qMinusT;
 354
 355                 // Convert the input in UCS-2 to Unicode
 356                 input = ucs2decode(input);
 357
 358                 // Cache the length
 359                 inputLength = input.length;
 360
 361                 // Initialize the state
 362                 n = initialN;
 363                 delta = 0;
 364                 bias = initialBias;
 365
 366                 // Handle the basic code points
 367                 for (j = 0; j < inputLength; ++j) {
 368                         currentValue = input[j];
 369                         if (currentValue < 0x80) {
 370                                 output.push(stringFromCharCode(currentValue));
 371                         }
 372                 }
 373
 374                 handledCPCount = basicLength = output.length;
 375
 376                 // `handledCPCount` is the number of code points that have been handled;
 377                 // `basicLength` is the number of basic code points.
 378
 379                 // Finish the basic string - if it is not empty - with a delimiter
 380                 if (basicLength) {
 381                         output.push(delimiter);
 382                 }
 383
 384                 // Main encoding loop:
 385                 while (handledCPCount < inputLength) {
 386
 387                         // All non-basic code points < n have been handled already. Find the next
 388                         // larger one:
 389                         for (m = maxInt, j = 0; j < inputLength; ++j) {
 390                                 currentValue = input[j];
 391                                 if (currentValue >= n && currentValue < m) {
 392                                         m = currentValue;
 393                                 }
 394                         }
 395
 396                         // Increase `delta` enough to advance the decoder's <n,i> state to <m,0>,
 397                         // but guard against overflow
 398                         handledCPCountPlusOne = handledCPCount + 1;
 399                         if (m - n > floor((maxInt - delta) / handledCPCountPlusOne)) {
 400                                 error('overflow');
 401                         }
 402
 403                         delta += (m - n) * handledCPCountPlusOne;
 404                         n = m;
 405
 406                         for (j = 0; j < inputLength; ++j) {
 407                                 currentValue = input[j];
 408
 409                                 if (currentValue < n && ++delta > maxInt) {
 410                                         error('overflow');
 411                                 }
 412
 413                                 if (currentValue == n) {
 414                                         // Represent delta as a generalized variable-length integer
 415                                         for (q = delta, k = base; /* no condition */; k += base) {
 416                                                 t = k <= bias ? tMin : (k >= bias + tMax ? tMax : k - bias);
 417                                                 if (q < t) {
 418                                                         break;
 419                                                 }
 420                                                 qMinusT = q - t;
 421                                                 baseMinusT = base - t;
 422                                                 output.push(
 423                                                         stringFromCharCode(digitToBasic(t + qMinusT % baseMinusT, 0))
 424                                                 );
 425                                                 q = floor(qMinusT / baseMinusT);
 426                                         }
 427
 428                                         output.push(stringFromCharCode(digitToBasic(q, 0)));
 429                                         bias = adapt(delta, handledCPCountPlusOne, handledCPCount == basicLength);
 430                                         delta = 0;
 431                                         ++handledCPCount;
 432                                 }
 433                         }
 434
 435                         ++delta;
 436                         ++n;
 437
 438                 }
 439                 return output.join('');
 440         }
 441
 442         /**
 443          * Converts a Punycode string representing a domain name or an email address
 444          * to Unicode. Only the Punycoded parts of the input will be converted, i.e.
 445          * it doesn't matter if you call it on a string that has already been
 446          * converted to Unicode.
 447          * @memberOf punycode
 448          * @param {String} input The Punycoded domain name or email address to
 449          * convert to Unicode.
 450          * @returns {String} The Unicode representation of the given Punycode
 451          * string.
 452          */
 453         function toUnicode(input) {
 454                 return mapDomain(input, function(string) {
 455                         return regexPunycode.test(string)
 456                                 ? decode(string.slice(4).toLowerCase())
 457                                 : string;
 458                 });
 459         }
 460
 461         /**
 462          * Converts a Unicode string representing a domain name or an email address to
 463          * Punycode. Only the non-ASCII parts of the domain name will be converted,
 464          * i.e. it doesn't matter if you call it with a domain that's already in
 465          * ASCII.
 466          * @memberOf punycode
 467          * @param {String} input The domain name or email address to convert, as a
 468          * Unicode string.
 469          * @returns {String} The Punycode representation of the given domain name or
 470          * email address.
 471          */
 472         function toASCII(input) {
 473                 return mapDomain(input, function(string) {
 474                         return regexNonASCII.test(string)
 475                                 ? 'xn--' + encode(string)
 476                                 : string;
 477                 });
 478         }
 479
 480         /*--------------------------------------------------------------------------*/
 481
 482         /** Define the public API */
 483         punycode = {
 484                 /**
 485                  * A string representing the current Punycode.js version number.
 486                  * @memberOf punycode
 487                  * @type String
 488                  */
 489                 'version': '1.3.2',
 490                 /**
 491                  * An object of methods to convert from JavaScript's internal character
 492                  * representation (UCS-2) to Unicode code points, and back.
 493                  * @see <https://mathiasbynens.be/notes/javascript-encoding>
 494                  * @memberOf punycode
 495                  * @type Object
 496                  */
 497                 'ucs2': {
 498                         'decode': ucs2decode,
 499                         'encode': ucs2encode
 500                 },
 501                 'decode': decode,
 502                 'encode': encode,
 503                 'toASCII': toASCII,
 504                 'toUnicode': toUnicode
 505         };
 506
 507         /** Expose `punycode` */
 508         // Some AMD build optimizers, like r.js, check for specific condition patterns
 509         // like the following:
 510         if (
 511                 typeof define == 'function' &&
 512                 typeof define.amd == 'object' &&
 513                 define.amd
 514         ) {
 515                 define('punycode', function() {
 516                         return punycode;
 517                 });
 518         } else if (freeExports && freeModule) {
 519                 if (module.exports == freeExports) { // in Node.js or RingoJS v0.8.0+
 520                         freeModule.exports = punycode;
 521                 } else { // in Narwhal or RingoJS v0.7.0-
 522                         for (key in punycode) {
 523                                 punycode.hasOwnProperty(key) && (freeExports[key] = punycode[key]);
 524                         }
 525                 }
 526         } else { // in Rhino or a web browser
 527                 root.punycode = punycode;
 528         }
 529
 530 }(this));