1 /* Copyright (c) 2011, 2018 Ben Noordhuis <info@bnoordhuis.nl>
3 * Permission to use, copy, modify, and/or distribute this software for any
4 * purpose with or without fee is hereby granted, provided that the above
5 * copyright notice and this permission notice appear in all copies.
7 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
8 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
9 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
10 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
11 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
12 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
13 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16 /* Derived from https://github.com/bnoordhuis/punycode
17 * but updated to support IDNA 2008.
25 static unsigned uv__utf8_decode1_slow(const char** p,
41 b = (unsigned char) *(*p)++;
42 c = (unsigned char) *(*p)++;
43 d = (unsigned char) *(*p)++;
51 c = (unsigned char) *(*p)++;
52 d = (unsigned char) *(*p)++;
62 d = (unsigned char) *(*p)++;
68 return -1; /* Invalid continuation byte. */
71 if (0x80 != (0xC0 & (b ^ c ^ d)))
72 return -1; /* Invalid sequence. */
77 a = (a << 18) | (b << 12) | (c << 6) | d;
80 return -1; /* Overlong sequence. */
83 return -1; /* Four-byte sequence > U+10FFFF. */
85 if (a >= 0xD800 && a <= 0xDFFF)
86 return -1; /* Surrogate pair. */
91 unsigned uv__utf8_decode1(const char** p, const char* pe) {
96 a = (unsigned char) *(*p)++;
99 return a; /* ASCII, common case. */
101 return uv__utf8_decode1_slow(p, pe, a);
104 static int uv__idna_toascii_label(const char* s, const char* se,
105 char** d, char* de) {
106 static const char alphabet[] = "abcdefghijklmnopqrstuvwxyz0123456789";
126 /* Note: after this loop we've visited all UTF-8 characters and know
127 * they're legal so we no longer need to check for decode errors.
130 c = uv__utf8_decode1(&s, se);
141 /* Only write "xn--" when there are non-ASCII characters. */
143 if (*d < de) *(*d)++ = 'x';
144 if (*d < de) *(*d)++ = 'n';
145 if (*d < de) *(*d)++ = '-';
146 if (*d < de) *(*d)++ = '-';
149 /* Write ASCII characters. */
153 c = uv__utf8_decode1(&s, se);
163 break; /* Visited all ASCII characters. */
169 /* Only write separator when we've written ASCII characters first. */
184 c = uv__utf8_decode1(&s, se);
196 return UV_E2BIG; /* Overflow. */
203 c = uv__utf8_decode1(&s, se);
208 return UV_E2BIG; /* Overflow. */
213 for (k = 36, q = delta; /* empty */; k += 36) {
225 /* TODO(bnoordhuis) Since 1 <= t <= 26 and therefore
226 * 10 <= y <= 35, we can optimize the long division
227 * into a table-based reciprocal multiplication.
230 y = 36 - t; /* 10 <= y <= 35 since 1 <= t <= 26. */
232 t = t + x % y; /* 1 <= t <= 35 because of y. */
235 *(*d)++ = alphabet[t];
239 *(*d)++ = alphabet[q];
248 /* No overflow check is needed because |delta| was just
249 * divided by 2 and |delta+delta >= delta + delta/h|.
254 for (bias = 0; delta > 35 * 26 / 2; bias += 36)
257 bias += 36 * delta / (delta + 38);
269 long uv__idna_toascii(const char* s, const char* se, char* d, char* de) {
281 c = uv__utf8_decode1(&si, se);
287 if (c != 0x3002) /* 。 */
288 if (c != 0xFF0E) /* . */
289 if (c != 0xFF61) /* 。 */
292 rc = uv__idna_toascii_label(s, st, &d, de);
304 rc = uv__idna_toascii_label(s, se, &d, de);
313 return d - ds; /* Number of bytes written. */