url/url_canon_host.cc

   1 // Copyright 2013 The Chromium Authors
   2 // Use of this source code is governed by a BSD-style license that can be
   3 // found in the LICENSE file.
   4
   5 #include "base/check.h"
   6 #include "base/cpu_reduction_experiment.h"
   7 #include "base/logging.h"
   8 #include "base/strings/string_util.h"
   9 #include "url/url_canon_internal.h"
  10 #include "url/url_features.h"
  11 #include "url/url_util.h"
  12
  13 namespace url {
  14
  15 namespace {
  16
  17 // clang-format off
  18 //
  19 // For reference, here's what IE supports:
  20 // Key: 0 (disallowed: failure if present in the input)
  21 //      + (allowed either escaped or unescaped, and unmodified)
  22 //      U (allowed escaped or unescaped but always unescaped if present in
  23 //         escaped form)
  24 //      E (allowed escaped or unescaped but always escaped if present in
  25 //         unescaped form)
  26 //      % (only allowed escaped in the input, will be unmodified).
  27 //      I left blank alpha numeric characters.
  28 //
  29 //    00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f
  30 //    -----------------------------------------------
  31 // 0   0  E  E  E  E  E  E  E  E  E  E  E  E  E  E  E
  32 // 1   E  E  E  E  E  E  E  E  E  E  E  E  E  E  E  E
  33 // 2   E  +  E  E  +  E  +  +  +  +  +  +  +  U  U  0
  34 // 3                                 %  %  E  +  E  0  <-- Those are  : ; < = > ?
  35 // 4   %
  36 // 5                                    U  0  U  U  U  <-- Those are  [ \ ] ^ _
  37 // 6   E                                               <-- That's  `
  38 // 7                                    E  E  E  U  E  <-- Those are { | } ~ (UNPRINTABLE)
  39 //
  40 // NOTE: I didn't actually test all the control characters. Some may be
  41 // disallowed in the input, but they are all accepted escaped except for 0.
  42 // I also didn't test if characters affecting HTML parsing are allowed
  43 // unescaped, e.g. (") or (#), which would indicate the beginning of the path.
  44 // Surprisingly, space is accepted in the input and always escaped.
  45 //
  46 // TODO(https://crbug.com/1416013): Remove the above historical reference
  47 // information once we are 100% standard compliant to the URL Standard.
  48 //
  49 // This table lists the canonical version of all characters we allow in the
  50 // input, with 0 indicating it is disallowed. We use the magic kEscapedHostChar
  51 // value to indicate that this character should be escaped. We are a little more
  52 // restrictive than IE, but less restrictive than Firefox.
  53 //
  54 const unsigned char kEsc = 0xff;
  55 const unsigned char kHostCharLookup[0x80] = {
  56 // 00-1f: all are invalid
  57      0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
  58      0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
  59 //  ' '   !    "    #    $    %    &    '    (    )    *    +    ,    -    .    /
  60    kEsc,kEsc,kEsc,kEsc,kEsc,  0, kEsc,kEsc,kEsc,kEsc,kEsc, '+',kEsc, '-', '.',  0,
  61 //   0    1    2    3    4    5    6    7    8    9    :    ;    <    =    >    ?
  62     '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', ':',  0 ,kEsc,kEsc,kEsc,  0 ,
  63 //   @    A    B    C    D    E    F    G    H    I    J    K    L    M    N    O
  64    kEsc, 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o',
  65 //   P    Q    R    S    T    U    V    W    X    Y    Z    [    \    ]    ^    _
  66     'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', '[',  0 , ']',  0 , '_',
  67 //   `    a    b    c    d    e    f    g    h    i    j    k    l    m    n    o
  68    kEsc, 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o',
  69 //   p    q    r    s    t    u    v    w    x    y    z    {    |    }    ~
  70     'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z',kEsc,kEsc,kEsc,  0 ,  0 };
  71
  72 // The following table is used when kStandardCompliantHostCharLookup feature is
  73 // enabled. See https://crbug.com/1416013 for details. At present, ' ' (SPACE)
  74 // and '*' (asterisk) are still non-compliant to the URL Standard.
  75 const unsigned char kStandardCompliantHostCharLookup[0x80] = {
  76 // 00-1f: all are invalid
  77      0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
  78      0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
  79 //  ' '   !    "    #    $    %    &    '    (    )    *    +    ,    -    .    /
  80     kEsc,'!', '"',  0,  '$',  0,  '&', '\'','(', ')', kEsc, '+', ',', '-', '.',  0,
  81 //   0    1    2    3    4    5    6    7    8    9    :    ;    <    =    >    ?
  82     '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', ':', ';' , 0,  '=',  0,   0,
  83 //   @    A    B    C    D    E    F    G    H    I    J    K    L    M    N    O
  84      0,  'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o',
  85 //   P    Q    R    S    T    U    V    W    X    Y    Z    [    \    ]    ^    _
  86     'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', '[',  0,  ']',  0,  '_',
  87 //   `    a    b    c    d    e    f    g    h    i    j    k    l    m    n    o
  88     '`', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o',
  89 //   p    q    r    s    t    u    v    w    x    y    z    {    |    }    ~
  90     'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', '{',  0, '}',  '~',  0 };
  91 // clang-format on
  92
  93 // RFC1034 maximum FQDN length.
  94 constexpr size_t kMaxHostLength = 253;
  95
  96 // Generous padding to account for the fact that UTS#46 normalization can cause
  97 // a long string to actually shrink and fit within the 253 character RFC1034
  98 // FQDN length limit. Note that this can still be too short for pathological
  99 // cases: An arbitrary number of characters (e.g. U+00AD SOFT HYPHEN) can be
 100 // removed from the input by UTS#46 processing. However, this should be
 101 // sufficient for all normally-encountered, non-abusive hostname strings.
 102 constexpr size_t kMaxHostBufferLength = kMaxHostLength * 5;
 103
 104 constexpr size_t kTempHostBufferLen = 1024;
 105 using StackBuffer = RawCanonOutputT<char, kTempHostBufferLen>;
 106 using StackBufferW = RawCanonOutputT<char16_t, kTempHostBufferLen>;
 107
 108 // Scans a host name and fills in the output flags according to what we find.
 109 // |has_non_ascii| will be true if there are any non-7-bit characters, and
 110 // |has_escaped| will be true if there is a percent sign.
 111 template<typename CHAR, typename UCHAR>
 112 void ScanHostname(const CHAR* spec,
 113                   const Component& host,
 114                   bool* has_non_ascii,
 115                   bool* has_escaped) {
 116   int end = host.end();
 117   *has_non_ascii = false;
 118   *has_escaped = false;
 119   for (int i = host.begin; i < end; i++) {
 120     if (static_cast<UCHAR>(spec[i]) >= 0x80)
 121       *has_non_ascii = true;
 122     else if (spec[i] == '%')
 123       *has_escaped = true;
 124   }
 125 }
 126
 127 // Canonicalizes a host name that is entirely 8-bit characters (even though
 128 // the type holding them may be 16 bits. Escaped characters will be unescaped.
 129 // Non-7-bit characters (for example, UTF-8) will be passed unchanged.
 130 //
 131 // The |*has_non_ascii| flag will be true if there are non-7-bit characters in
 132 // the output.
 133 //
 134 // This function is used in two situations:
 135 //
 136 //  * When the caller knows there is no non-ASCII or percent escaped
 137 //    characters. This is what DoHost does. The result will be a completely
 138 //    canonicalized host since we know nothing weird can happen (escaped
 139 //    characters could be unescaped to non-7-bit, so they have to be treated
 140 //    with suspicion at this point). It does not use the |has_non_ascii| flag.
 141 //
 142 //  * When the caller has an 8-bit string that may need unescaping.
 143 //    DoComplexHost calls us this situation to do unescaping and validation.
 144 //    After this, it may do other IDN operations depending on the value of the
 145 //    |*has_non_ascii| flag.
 146 //
 147 // The return value indicates if the output is a potentially valid host name.
 148 template <typename INCHAR, typename OUTCHAR>
 149 bool DoSimpleHost(const INCHAR* host,
 150                   size_t host_len,
 151                   CanonOutputT<OUTCHAR>* output,
 152                   bool* has_non_ascii,
 153                   bool to_lower_case = true) {
 154   *has_non_ascii = false;
 155
 156   bool success = true;
 157   for (size_t i = 0; i < host_len; ++i) {
 158     unsigned int source = host[i];
 159     if (source == '%') {
 160       // Unescape first, if possible.
 161       // Source will be used only if decode operation was successful.
 162       if (!DecodeEscaped(host, &i, host_len,
 163                          reinterpret_cast<unsigned char*>(&source))) {
 164         // Invalid escaped character. There is nothing that can make this
 165         // host valid. We append an escaped percent so the URL looks reasonable
 166         // and mark as failed.
 167         AppendEscapedChar('%', output);
 168         success = false;
 169         continue;
 170       }
 171     }
 172
 173     if (source < 0x80) {
 174       // We have ASCII input, we can use our lookup table.
 175       unsigned char replacement;
 176       if (url::IsUsingStandardCompliantHostCharacters()) {
 177         replacement = kStandardCompliantHostCharLookup[source];
 178       } else {
 179         replacement = kHostCharLookup[source];
 180       }
 181       if (!replacement) {
 182         // Invalid character, add it as percent-escaped and mark as failed.
 183         AppendEscapedChar(source, output);
 184         success = false;
 185       } else if (replacement == kEsc) {
 186         // This character is valid but should be escaped.
 187         AppendEscapedChar(source, output);
 188       }
 189 #if BUILDFLAG(IS_TIZEN_TV)
 190       else if (!to_lower_case) {
 191         output->push_back(source);
 192       }
 193 #endif
 194       else {
 195         // Common case, the given character is valid in a hostname, the lookup
 196         // table tells us the canonical representation of that character (lower
 197         // cased).
 198         output->push_back(replacement);
 199       }
 200     } else {
 201       // It's a non-ascii char. Just push it to the output.
 202       // In case where we have char16 input, and char output it's safe to
 203       // cast char16->char only if input string was converted to ASCII.
 204       output->push_back(static_cast<OUTCHAR>(source));
 205       *has_non_ascii = true;
 206     }
 207   }
 208   return success;
 209 }
 210
 211 // Canonicalizes a host that requires IDN conversion. Returns true on success
 212 bool DoIDNHost(const char16_t* src, size_t src_len, CanonOutput* output) {
 213   int original_output_len = output->length();  // So we can rewind below.
 214
 215   // We need to escape URL before doing IDN conversion, since punicode strings
 216   // cannot be escaped after they are created.
 217   RawCanonOutputW<kTempHostBufferLen> url_escaped_host;
 218   bool has_non_ascii;
 219   DoSimpleHost(src, src_len, &url_escaped_host, &has_non_ascii);
 220   if (url_escaped_host.length() > kMaxHostBufferLength) {
 221     AppendInvalidNarrowString(src, 0, src_len, output);
 222     return false;
 223   }
 224
 225   StackBufferW wide_output;
 226   if (!IDNToASCII(url_escaped_host.view(), &wide_output)) {
 227     // Some error, give up. This will write some reasonable looking
 228     // representation of the string to the output.
 229     AppendInvalidNarrowString(src, 0, src_len, output);
 230     return false;
 231   }
 232
 233   // Now we check the ASCII output like a normal host. It will also handle
 234   // unescaping. Although we unescaped everything before this function call, if
 235   // somebody does %00 as fullwidth, ICU will convert this to ASCII.
 236   bool success = DoSimpleHost(wide_output.data(), wide_output.length(), output,
 237                               &has_non_ascii);
 238   if (has_non_ascii) {
 239     // ICU generated something that DoSimpleHost didn't think looked like
 240     // ASCII. This is quite rare, but ICU might convert some characters to
 241     // percent signs which might generate new escape sequences which might in
 242     // turn be invalid. An example is U+FE6A "small percent" which ICU will
 243     // name prep into an ASCII percent and then we can interpret the following
 244     // characters as escaped characters.
 245     //
 246     // If DoSimpleHost didn't think the output was ASCII, just escape the
 247     // thing we gave ICU and give up. DoSimpleHost will have handled a further
 248     // level of escaping from ICU for simple ASCII cases (i.e. if ICU generates
 249     // a new escaped ASCII sequence like "%41" we'll unescape it) but it won't
 250     // do more (like handle escaped non-ASCII sequences). Handling the escaped
 251     // ASCII isn't strictly necessary, but DoSimpleHost handles this case
 252     // anyway so we handle it/
 253     output->set_length(original_output_len);
 254     AppendInvalidNarrowString(wide_output.data(), 0, wide_output.length(),
 255                               output);
 256     return false;
 257   }
 258   return success;
 259 }
 260
 261 // 8-bit convert host to its ASCII version: this converts the UTF-8 input to
 262 // UTF-16. The has_escaped flag should be set if the input string requires
 263 // unescaping.
 264 bool DoComplexHost(const char* host,
 265                    size_t host_len,
 266                    bool has_non_ascii,
 267                    bool has_escaped,
 268                    CanonOutput* output) {
 269   // Save the current position in the output. We may write stuff and rewind it
 270   // below, so we need to know where to rewind to.
 271   size_t begin_length = output->length();
 272
 273   // Points to the UTF-8 data we want to convert. This will either be the
 274   // input or the unescaped version written to |*output| if necessary.
 275   const char* utf8_source;
 276   size_t utf8_source_len;
 277   bool are_all_escaped_valid = true;
 278   if (has_escaped) {
 279     // Unescape before converting to UTF-16 for IDN. We write this into the
 280     // output because it most likely does not require IDNization, and we can
 281     // save another huge stack buffer. It will be replaced below if it requires
 282     // IDN. This will also update our non-ASCII flag so we know whether the
 283     // unescaped input requires IDN.
 284     if (!DoSimpleHost(host, host_len, output, &has_non_ascii)) {
 285       // Error with some escape sequence. We'll call the current output
 286       // complete. DoSimpleHost will have written some "reasonable" output
 287       // for the invalid escapes, but the output could be non-ASCII and
 288       // needs to go through re-encoding below.
 289       are_all_escaped_valid = false;
 290     }
 291
 292     // Unescaping may have left us with ASCII input, in which case the
 293     // unescaped version we wrote to output is complete.
 294     if (!has_non_ascii) {
 295       return are_all_escaped_valid;
 296     }
 297
 298     // Save the pointer into the data was just converted (it may be appended to
 299     // other data in the output buffer).
 300     utf8_source = &output->data()[begin_length];
 301     utf8_source_len = output->length() - begin_length;
 302   } else {
 303     // We don't need to unescape, use input for IDNization later. (We know the
 304     // input has non-ASCII, or the simple version would have been called
 305     // instead of us.)
 306     utf8_source = host;
 307     utf8_source_len = host_len;
 308   }
 309
 310   // Non-ASCII input requires IDN, convert to UTF-16 and do the IDN conversion.
 311   // Above, we may have used the output to write the unescaped values to, so
 312   // we have to rewind it to where we started after we convert it to UTF-16.
 313   StackBufferW utf16;
 314   if (!ConvertUTF8ToUTF16(utf8_source, utf8_source_len, &utf16)) {
 315     // In this error case, the input may or may not be the output.
 316     StackBuffer utf8;
 317     for (size_t i = 0; i < utf8_source_len; i++)
 318       utf8.push_back(utf8_source[i]);
 319     output->set_length(begin_length);
 320     AppendInvalidNarrowString(utf8.data(), 0, utf8.length(), output);
 321     return false;
 322   }
 323   output->set_length(begin_length);
 324
 325   // This will call DoSimpleHost which will do normal ASCII canonicalization
 326   // and also check for IP addresses in the outpt.
 327   return DoIDNHost(utf16.data(), utf16.length(), output) &&
 328          are_all_escaped_valid;
 329 }
 330
 331 // UTF-16 convert host to its ASCII version. The set up is already ready for
 332 // the backend, so we just pass through. The has_escaped flag should be set if
 333 // the input string requires unescaping.
 334 bool DoComplexHost(const char16_t* host,
 335                    size_t host_len,
 336                    bool has_non_ascii,
 337                    bool has_escaped,
 338                    CanonOutput* output) {
 339   if (has_escaped) {
 340     // Yikes, we have escaped characters with wide input. The escaped
 341     // characters should be interpreted as UTF-8. To solve this problem,
 342     // we convert to UTF-8, unescape, then convert back to UTF-16 for IDN.
 343     //
 344     // We don't bother to optimize the conversion in the ASCII case (which
 345     // *could* just be a copy) and use the UTF-8 path, because it should be
 346     // very rare that host names have escaped characters, and it is relatively
 347     // fast to do the conversion anyway.
 348     StackBuffer utf8;
 349     if (!ConvertUTF16ToUTF8(host, host_len, &utf8)) {
 350       AppendInvalidNarrowString(host, 0, host_len, output);
 351       return false;
 352     }
 353
 354     // Once we convert to UTF-8, we can use the 8-bit version of the complex
 355     // host handling code above.
 356     return DoComplexHost(utf8.data(), utf8.length(), has_non_ascii, has_escaped,
 357                          output);
 358   }
 359
 360   // No unescaping necessary, we can safely pass the input to ICU. This
 361   // function will only get called if we either have escaped or non-ascii
 362   // input, so it's safe to just use ICU now. Even if the input is ASCII,
 363   // this function will do the right thing (just slower than we could).
 364   return DoIDNHost(host, host_len, output);
 365 }
 366
 367 template <typename CHAR, typename UCHAR>
 368 bool DoHostSubstring(const CHAR* spec,
 369                      const Component& host,
 370                      CanonOutput* output,
 371                      bool to_lower_case = true) {
 372   DCHECK(host.is_valid());
 373
 374   bool has_non_ascii, has_escaped;
 375   ScanHostname<CHAR, UCHAR>(spec, host, &has_non_ascii, &has_escaped);
 376
 377   if (has_non_ascii || has_escaped) {
 378     return DoComplexHost(&spec[host.begin], static_cast<size_t>(host.len),
 379                          has_non_ascii, has_escaped, output);
 380   }
 381
 382   const bool success =
 383       DoSimpleHost(&spec[host.begin], static_cast<size_t>(host.len), output,
 384                    &has_non_ascii, to_lower_case);
 385   DCHECK(!has_non_ascii);
 386   return success;
 387 }
 388
 389 template <typename CHAR, typename UCHAR>
 390 void DoHost(const CHAR* spec,
 391             const Component& host,
 392             CanonOutput* output,
 393             CanonHostInfo* host_info) {
 394   if (host.is_empty()) {
 395     // Empty hosts don't need anything.
 396     host_info->family = CanonHostInfo::NEUTRAL;
 397     host_info->out_host = Component();
 398     return;
 399   }
 400
 401   // Keep track of output's initial length, so we can rewind later.
 402   const int output_begin = output->length();
 403
 404   bool to_lower_case = true;
 405 #if BUILDFLAG(IS_TIZEN_TV)
 406   Component scheme;
 407   ExtractScheme(spec, host.begin, &scheme);
 408
 409   if (CompareSchemeComponent(spec, scheme, url::kCIScheme))
 410     to_lower_case = false;
 411 #endif
 412
 413   if (DoHostSubstring<CHAR, UCHAR>(spec, host, output, to_lower_case)) {
 414 #if BUILDFLAG(IS_TIZEN_TV)
 415     if (CompareSchemeComponent(spec, scheme, url::kDvbScheme)) {
 416       // The above host checking code can think that the DVB locator is an
 417       // IP address. So we skip the IP Address check if it starts with DVB.
 418       // This allows the later code to check the validity by access.
 419       // TODO: Validate the DVB Locator.
 420       LOG(INFO) << "This is a DVB Locator " << spec;
 421     } else if (CompareSchemeComponent(spec, scheme, url::kTVKeyScheme)) {
 422       // TVKeys address starts with an hexadecimal number so could be
 423       // confused with an IP Address. Also an IP Address would be a
 424       // security hole.
 425       LOG(INFO) << "This is a TVKeys Url " << spec;
 426       host_info->family = CanonHostInfo::NEUTRAL;
 427     } else if (CompareSchemeComponent(spec, scheme, url::kOpAppScheme)) {
 428       // The above host checking code can think that the OpApp locator is an
 429       // IP address. So we skip the IP Address check if it starts with
 430       // 'hbbtv-package'.
 431       // This allows the later code to check the validity by access.
 432       LOG(INFO) << "This is an OpApp Locator " << spec;
 433       host_info->family = CanonHostInfo::NEUTRAL;
 434     } else if (CompareSchemeComponent(spec, scheme,
 435                                       url::kHbbTVCarouselScheme)) {
 436       // HbbTV Carousel URLs have a pattern that could be mistaken for
 437       // a IP address with a port (a single integer). So again we need to
 438       // skip the IP check.
 439       //
 440       // format "hbbtv-carousel://OOOO:CCCC"
 441       //  Where OOOO = organisation ID (max 16bit).
 442       //        CCCC = carousel ID (max 64bit).
 443       //
 444       // Ref: HbbTV 2.0.1 Specification (errata 2) section 6.3.2
 445       LOG(INFO) << "This is a HbbTV Carousel Url " << spec;
 446     } else if (CompareSchemeComponent(spec, scheme, url::kCIScheme)) {
 447       // The above host checking code can think that the CI locator is an
 448       // IP address. So we skip the IP Address check if it starts with CI.
 449       // This allows the later code to check the validity by access.
 450       LOG(INFO) << "This is a HbbTV CI Locator " << spec;
 451       host_info->family = CanonHostInfo::NEUTRAL;
 452     } else
 453 #endif
 454     {
 455       // After all the other canonicalization, check if we ended up with an IP
 456       // address. IP addresses are small, so writing into this temporary buffer
 457       // should not cause an allocation.
 458       RawCanonOutput<64> canon_ip;
 459       CanonicalizeIPAddress(output->data(),
 460                             MakeRange(output_begin, output->length()),
 461                             &canon_ip, host_info);
 462
 463       // If we got an IPv4/IPv6 address, copy the canonical form back to the
 464       // real buffer. Otherwise, it's a hostname or broken IP, in which case
 465       // we just leave it in place.
 466       if (host_info->IsIPAddress()) {
 467         output->set_length(output_begin);
 468         output->Append(canon_ip.view());
 469       }
 470     }
 471   } else {
 472     // Canonicalization failed. Set BROKEN to notify the caller.
 473     host_info->family = CanonHostInfo::BROKEN;
 474   }
 475
 476   host_info->out_host = MakeRange(output_begin, output->length());
 477 }
 478
 479 }  // namespace
 480
 481 bool CanonicalizeHost(const char* spec,
 482                       const Component& host,
 483                       CanonOutput* output,
 484                       Component* out_host) {
 485   CanonHostInfo host_info;
 486   DoHost<char, unsigned char>(spec, host, output, &host_info);
 487   *out_host = host_info.out_host;
 488   return (host_info.family != CanonHostInfo::BROKEN);
 489 }
 490
 491 bool CanonicalizeHost(const char16_t* spec,
 492                       const Component& host,
 493                       CanonOutput* output,
 494                       Component* out_host) {
 495   CanonHostInfo host_info;
 496   DoHost<char16_t, char16_t>(spec, host, output, &host_info);
 497   *out_host = host_info.out_host;
 498   return (host_info.family != CanonHostInfo::BROKEN);
 499 }
 500
 501 void CanonicalizeHostVerbose(const char* spec,
 502                              const Component& host,
 503                              CanonOutput* output,
 504                              CanonHostInfo* host_info) {
 505   DoHost<char, unsigned char>(spec, host, output, host_info);
 506 }
 507
 508 void CanonicalizeHostVerbose(const char16_t* spec,
 509                              const Component& host,
 510                              CanonOutput* output,
 511                              CanonHostInfo* host_info) {
 512   DoHost<char16_t, char16_t>(spec, host, output, host_info);
 513 }
 514
 515 bool CanonicalizeHostSubstring(const char* spec,
 516                                const Component& host,
 517                                CanonOutput* output) {
 518   return DoHostSubstring<char, unsigned char>(spec, host, output);
 519 }
 520
 521 bool CanonicalizeHostSubstring(const char16_t* spec,
 522                                const Component& host,
 523                                CanonOutput* output) {
 524   return DoHostSubstring<char16_t, char16_t>(spec, host, output);
 525 }
 526
 527 }  // namespace url