1 /*! https://mths.be/utf8js v2.1.2 by @mathias */
3 var stringFromCharCode = String.fromCharCode;
5 // Taken from https://mths.be/punycode
6 function ucs2decode(string) {
9 var length = string.length;
12 while (counter < length) {
13 value = string.charCodeAt(counter++);
14 if (value >= 0xD800 && value <= 0xDBFF && counter < length) {
15 // high surrogate, and there is a next character
16 extra = string.charCodeAt(counter++);
17 if ((extra & 0xFC00) == 0xDC00) { // low surrogate
18 output.push(((value & 0x3FF) << 10) + (extra & 0x3FF) + 0x10000);
20 // unmatched surrogate; only append this code unit, in case the next
21 // code unit is the high surrogate of a surrogate pair
32 // Taken from https://mths.be/punycode
33 function ucs2encode(array) {
34 var length = array.length;
38 while (++index < length) {
42 output += stringFromCharCode(value >>> 10 & 0x3FF | 0xD800);
43 value = 0xDC00 | value & 0x3FF;
45 output += stringFromCharCode(value);
50 function checkScalarValue(codePoint, strict) {
51 if (codePoint >= 0xD800 && codePoint <= 0xDFFF) {
54 'Lone surrogate U+' + codePoint.toString(16).toUpperCase() +
55 ' is not a scalar value'
62 /*--------------------------------------------------------------------------*/
64 function createByte(codePoint, shift) {
65 return stringFromCharCode(((codePoint >> shift) & 0x3F) | 0x80);
68 function encodeCodePoint(codePoint, strict) {
69 if ((codePoint & 0xFFFFFF80) == 0) { // 1-byte sequence
70 return stringFromCharCode(codePoint);
73 if ((codePoint & 0xFFFFF800) == 0) { // 2-byte sequence
74 symbol = stringFromCharCode(((codePoint >> 6) & 0x1F) | 0xC0);
76 else if ((codePoint & 0xFFFF0000) == 0) { // 3-byte sequence
77 if (!checkScalarValue(codePoint, strict)) {
80 symbol = stringFromCharCode(((codePoint >> 12) & 0x0F) | 0xE0);
81 symbol += createByte(codePoint, 6);
83 else if ((codePoint & 0xFFE00000) == 0) { // 4-byte sequence
84 symbol = stringFromCharCode(((codePoint >> 18) & 0x07) | 0xF0);
85 symbol += createByte(codePoint, 12);
86 symbol += createByte(codePoint, 6);
88 symbol += stringFromCharCode((codePoint & 0x3F) | 0x80);
92 function utf8encode(string, opts) {
94 var strict = false !== opts.strict;
96 var codePoints = ucs2decode(string);
97 var length = codePoints.length;
101 while (++index < length) {
102 codePoint = codePoints[index];
103 byteString += encodeCodePoint(codePoint, strict);
108 /*--------------------------------------------------------------------------*/
110 function readContinuationByte() {
111 if (byteIndex >= byteCount) {
112 throw Error('Invalid byte index');
115 var continuationByte = byteArray[byteIndex] & 0xFF;
118 if ((continuationByte & 0xC0) == 0x80) {
119 return continuationByte & 0x3F;
122 // If we end up here, it’s not a continuation byte
123 throw Error('Invalid continuation byte');
126 function decodeSymbol(strict) {
133 if (byteIndex > byteCount) {
134 throw Error('Invalid byte index');
137 if (byteIndex == byteCount) {
142 byte1 = byteArray[byteIndex] & 0xFF;
145 // 1-byte sequence (no continuation bytes)
146 if ((byte1 & 0x80) == 0) {
151 if ((byte1 & 0xE0) == 0xC0) {
152 byte2 = readContinuationByte();
153 codePoint = ((byte1 & 0x1F) << 6) | byte2;
154 if (codePoint >= 0x80) {
157 throw Error('Invalid continuation byte');
161 // 3-byte sequence (may include unpaired surrogates)
162 if ((byte1 & 0xF0) == 0xE0) {
163 byte2 = readContinuationByte();
164 byte3 = readContinuationByte();
165 codePoint = ((byte1 & 0x0F) << 12) | (byte2 << 6) | byte3;
166 if (codePoint >= 0x0800) {
167 return checkScalarValue(codePoint, strict) ? codePoint : 0xFFFD;
169 throw Error('Invalid continuation byte');
174 if ((byte1 & 0xF8) == 0xF0) {
175 byte2 = readContinuationByte();
176 byte3 = readContinuationByte();
177 byte4 = readContinuationByte();
178 codePoint = ((byte1 & 0x07) << 0x12) | (byte2 << 0x0C) |
179 (byte3 << 0x06) | byte4;
180 if (codePoint >= 0x010000 && codePoint <= 0x10FFFF) {
185 throw Error('Invalid UTF-8 detected');
191 function utf8decode(byteString, opts) {
193 var strict = false !== opts.strict;
195 byteArray = ucs2decode(byteString);
196 byteCount = byteArray.length;
200 while ((tmp = decodeSymbol(strict)) !== false) {
201 codePoints.push(tmp);
203 return ucs2encode(codePoints);