src/url/url_canon_internal.cc

   1 // Copyright 2013 The Chromium Authors. All rights reserved.
   2 // Use of this source code is governed by a BSD-style license that can be
   3 // found in the LICENSE file.
   4
   5 #include <errno.h>
   6 #include <stdlib.h>
   7
   8 #include <cstdio>
   9 #include <string>
  10
  11 #include "url/url_canon_internal.h"
  12
  13 namespace url_canon {
  14
  15 namespace {
  16
  17 template<typename CHAR, typename UCHAR>
  18 void DoAppendStringOfType(const CHAR* source, int length,
  19                           SharedCharTypes type,
  20                           CanonOutput* output) {
  21   for (int i = 0; i < length; i++) {
  22     if (static_cast<UCHAR>(source[i]) >= 0x80) {
  23       // ReadChar will fill the code point with kUnicodeReplacementCharacter
  24       // when the input is invalid, which is what we want.
  25       unsigned code_point;
  26       ReadUTFChar(source, &i, length, &code_point);
  27       AppendUTF8EscapedValue(code_point, output);
  28     } else {
  29       // Just append the 7-bit character, possibly escaping it.
  30       unsigned char uch = static_cast<unsigned char>(source[i]);
  31       if (!IsCharOfType(uch, type))
  32         AppendEscapedChar(uch, output);
  33       else
  34         output->push_back(uch);
  35     }
  36   }
  37 }
  38
  39 // This function assumes the input values are all contained in 8-bit,
  40 // although it allows any type. Returns true if input is valid, false if not.
  41 template<typename CHAR, typename UCHAR>
  42 void DoAppendInvalidNarrowString(const CHAR* spec, int begin, int end,
  43                                  CanonOutput* output) {
  44   for (int i = begin; i < end; i++) {
  45     UCHAR uch = static_cast<UCHAR>(spec[i]);
  46     if (uch >= 0x80) {
  47       // Handle UTF-8/16 encodings. This call will correctly handle the error
  48       // case by appending the invalid character.
  49       AppendUTF8EscapedChar(spec, &i, end, output);
  50     } else if (uch <= ' ' || uch == 0x7f) {
  51       // This function is for error handling, so we escape all control
  52       // characters and spaces, but not anything else since we lack
  53       // context to do something more specific.
  54       AppendEscapedChar(static_cast<unsigned char>(uch), output);
  55     } else {
  56       output->push_back(static_cast<char>(uch));
  57     }
  58   }
  59 }
  60
  61 // Overrides one component, see the url_canon::Replacements structure for
  62 // what the various combionations of source pointer and component mean.
  63 void DoOverrideComponent(const char* override_source,
  64                          const url_parse::Component& override_component,
  65                          const char** dest,
  66                          url_parse::Component* dest_component) {
  67   if (override_source) {
  68     *dest = override_source;
  69     *dest_component = override_component;
  70   }
  71 }
  72
  73 // Similar to DoOverrideComponent except that it takes a UTF-16 input and does
  74 // not actually set the output character pointer.
  75 //
  76 // The input is converted to UTF-8 at the end of the given buffer as a temporary
  77 // holding place. The component identifying the portion of the buffer used in
  78 // the |utf8_buffer| will be specified in |*dest_component|.
  79 //
  80 // This will not actually set any |dest| pointer like DoOverrideComponent
  81 // does because all of the pointers will point into the |utf8_buffer|, which
  82 // may get resized while we're overriding a subsequent component. Instead, the
  83 // caller should use the beginning of the |utf8_buffer| as the string pointer
  84 // for all components once all overrides have been prepared.
  85 bool PrepareUTF16OverrideComponent(
  86     const base::char16* override_source,
  87     const url_parse::Component& override_component,
  88     CanonOutput* utf8_buffer,
  89     url_parse::Component* dest_component) {
  90   bool success = true;
  91   if (override_source) {
  92     if (!override_component.is_valid()) {
  93       // Non-"valid" component (means delete), so we need to preserve that.
  94       *dest_component = url_parse::Component();
  95     } else {
  96       // Convert to UTF-8.
  97       dest_component->begin = utf8_buffer->length();
  98       success = ConvertUTF16ToUTF8(&override_source[override_component.begin],
  99                                    override_component.len, utf8_buffer);
 100       dest_component->len = utf8_buffer->length() - dest_component->begin;
 101     }
 102   }
 103   return success;
 104 }
 105
 106 }  // namespace
 107
 108 // See the header file for this array's declaration.
 109 const unsigned char kSharedCharTypeTable[0x100] = {
 110     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  // 0x00 - 0x0f
 111     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  // 0x10 - 0x1f
 112     0,                           // 0x20  ' ' (escape spaces in queries)
 113     CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT,  // 0x21  !
 114     0,                           // 0x22  "
 115     0,                           // 0x23  #  (invalid in query since it marks the ref)
 116     CHAR_QUERY | CHAR_USERINFO,  // 0x24  $
 117     CHAR_QUERY | CHAR_USERINFO,  // 0x25  %
 118     CHAR_QUERY | CHAR_USERINFO,  // 0x26  &
 119     0,                           // 0x27  '  (Try to prevent XSS.)
 120     CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT,  // 0x28  (
 121     CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT,  // 0x29  )
 122     CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT,  // 0x2a  *
 123     CHAR_QUERY | CHAR_USERINFO,  // 0x2b  +
 124     CHAR_QUERY | CHAR_USERINFO,  // 0x2c  ,
 125     CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT,  // 0x2d  -
 126     CHAR_QUERY | CHAR_USERINFO | CHAR_IPV4 | CHAR_COMPONENT,  // 0x2e  .
 127     CHAR_QUERY,                  // 0x2f  /
 128     CHAR_QUERY | CHAR_USERINFO | CHAR_IPV4 | CHAR_HEX | CHAR_DEC | CHAR_OCT | CHAR_COMPONENT,  // 0x30  0
 129     CHAR_QUERY | CHAR_USERINFO | CHAR_IPV4 | CHAR_HEX | CHAR_DEC | CHAR_OCT | CHAR_COMPONENT,  // 0x31  1
 130     CHAR_QUERY | CHAR_USERINFO | CHAR_IPV4 | CHAR_HEX | CHAR_DEC | CHAR_OCT | CHAR_COMPONENT,  // 0x32  2
 131     CHAR_QUERY | CHAR_USERINFO | CHAR_IPV4 | CHAR_HEX | CHAR_DEC | CHAR_OCT | CHAR_COMPONENT,  // 0x33  3
 132     CHAR_QUERY | CHAR_USERINFO | CHAR_IPV4 | CHAR_HEX | CHAR_DEC | CHAR_OCT | CHAR_COMPONENT,  // 0x34  4
 133     CHAR_QUERY | CHAR_USERINFO | CHAR_IPV4 | CHAR_HEX | CHAR_DEC | CHAR_OCT | CHAR_COMPONENT,  // 0x35  5
 134     CHAR_QUERY | CHAR_USERINFO | CHAR_IPV4 | CHAR_HEX | CHAR_DEC | CHAR_OCT | CHAR_COMPONENT,  // 0x36  6
 135     CHAR_QUERY | CHAR_USERINFO | CHAR_IPV4 | CHAR_HEX | CHAR_DEC | CHAR_OCT | CHAR_COMPONENT,  // 0x37  7
 136     CHAR_QUERY | CHAR_USERINFO | CHAR_IPV4 | CHAR_HEX | CHAR_DEC | CHAR_COMPONENT,             // 0x38  8
 137     CHAR_QUERY | CHAR_USERINFO | CHAR_IPV4 | CHAR_HEX | CHAR_DEC | CHAR_COMPONENT,             // 0x39  9
 138     CHAR_QUERY,  // 0x3a  :
 139     CHAR_QUERY,  // 0x3b  ;
 140     0,           // 0x3c  <  (Try to prevent certain types of XSS.)
 141     CHAR_QUERY,  // 0x3d  =
 142     0,           // 0x3e  >  (Try to prevent certain types of XSS.)
 143     CHAR_QUERY,  // 0x3f  ?
 144     CHAR_QUERY,  // 0x40  @
 145     CHAR_QUERY | CHAR_USERINFO | CHAR_IPV4 | CHAR_HEX | CHAR_COMPONENT,  // 0x41  A
 146     CHAR_QUERY | CHAR_USERINFO | CHAR_IPV4 | CHAR_HEX | CHAR_COMPONENT,  // 0x42  B
 147     CHAR_QUERY | CHAR_USERINFO | CHAR_IPV4 | CHAR_HEX | CHAR_COMPONENT,  // 0x43  C
 148     CHAR_QUERY | CHAR_USERINFO | CHAR_IPV4 | CHAR_HEX | CHAR_COMPONENT,  // 0x44  D
 149     CHAR_QUERY | CHAR_USERINFO | CHAR_IPV4 | CHAR_HEX | CHAR_COMPONENT,  // 0x45  E
 150     CHAR_QUERY | CHAR_USERINFO | CHAR_IPV4 | CHAR_HEX | CHAR_COMPONENT,  // 0x46  F
 151     CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT,  // 0x47  G
 152     CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT,  // 0x48  H
 153     CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT,  // 0x49  I
 154     CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT,  // 0x4a  J
 155     CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT,  // 0x4b  K
 156     CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT,  // 0x4c  L
 157     CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT,  // 0x4d  M
 158     CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT,  // 0x4e  N
 159     CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT,  // 0x4f  O
 160     CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT,  // 0x50  P
 161     CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT,  // 0x51  Q
 162     CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT,  // 0x52  R
 163     CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT,  // 0x53  S
 164     CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT,  // 0x54  T
 165     CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT,  // 0x55  U
 166     CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT,  // 0x56  V
 167     CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT,  // 0x57  W
 168     CHAR_QUERY | CHAR_USERINFO | CHAR_IPV4 | CHAR_COMPONENT, // 0x58  X
 169     CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT,  // 0x59  Y
 170     CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT,  // 0x5a  Z
 171     CHAR_QUERY,  // 0x5b  [
 172     CHAR_QUERY,  // 0x5c  '\'
 173     CHAR_QUERY,  // 0x5d  ]
 174     CHAR_QUERY,  // 0x5e  ^
 175     CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT,  // 0x5f  _
 176     CHAR_QUERY,  // 0x60  `
 177     CHAR_QUERY | CHAR_USERINFO | CHAR_IPV4 | CHAR_HEX | CHAR_COMPONENT,  // 0x61  a
 178     CHAR_QUERY | CHAR_USERINFO | CHAR_IPV4 | CHAR_HEX | CHAR_COMPONENT,  // 0x62  b
 179     CHAR_QUERY | CHAR_USERINFO | CHAR_IPV4 | CHAR_HEX | CHAR_COMPONENT,  // 0x63  c
 180     CHAR_QUERY | CHAR_USERINFO | CHAR_IPV4 | CHAR_HEX | CHAR_COMPONENT,  // 0x64  d
 181     CHAR_QUERY | CHAR_USERINFO | CHAR_IPV4 | CHAR_HEX | CHAR_COMPONENT,  // 0x65  e
 182     CHAR_QUERY | CHAR_USERINFO | CHAR_IPV4 | CHAR_HEX | CHAR_COMPONENT,  // 0x66  f
 183     CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT,  // 0x67  g
 184     CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT,  // 0x68  h
 185     CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT,  // 0x69  i
 186     CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT,  // 0x6a  j
 187     CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT,  // 0x6b  k
 188     CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT,  // 0x6c  l
 189     CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT,  // 0x6d  m
 190     CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT,  // 0x6e  n
 191     CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT,  // 0x6f  o
 192     CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT,  // 0x70  p
 193     CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT,  // 0x71  q
 194     CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT,  // 0x72  r
 195     CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT,  // 0x73  s
 196     CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT,  // 0x74  t
 197     CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT,  // 0x75  u
 198     CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT,  // 0x76  v
 199     CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT,  // 0x77  w
 200     CHAR_QUERY | CHAR_USERINFO | CHAR_IPV4 | CHAR_COMPONENT,  // 0x78  x
 201     CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT,  // 0x79  y
 202     CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT,  // 0x7a  z
 203     CHAR_QUERY,  // 0x7b  {
 204     CHAR_QUERY,  // 0x7c  |
 205     CHAR_QUERY,  // 0x7d  }
 206     CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT,  // 0x7e  ~
 207     0,           // 0x7f
 208     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  // 0x80 - 0x8f
 209     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  // 0x90 - 0x9f
 210     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  // 0xa0 - 0xaf
 211     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  // 0xb0 - 0xbf
 212     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  // 0xc0 - 0xcf
 213     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  // 0xd0 - 0xdf
 214     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  // 0xe0 - 0xef
 215     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  // 0xf0 - 0xff
 216 };
 217
 218 const char kHexCharLookup[0x10] = {
 219     '0', '1', '2', '3', '4', '5', '6', '7',
 220     '8', '9', 'A', 'B', 'C', 'D', 'E', 'F',
 221 };
 222
 223 const char kCharToHexLookup[8] = {
 224     0,         // 0x00 - 0x1f
 225     '0',       // 0x20 - 0x3f: digits 0 - 9 are 0x30 - 0x39
 226     'A' - 10,  // 0x40 - 0x5f: letters A - F are 0x41 - 0x46
 227     'a' - 10,  // 0x60 - 0x7f: letters a - f are 0x61 - 0x66
 228     0,         // 0x80 - 0x9F
 229     0,         // 0xA0 - 0xBF
 230     0,         // 0xC0 - 0xDF
 231     0,         // 0xE0 - 0xFF
 232 };
 233
 234 const base::char16 kUnicodeReplacementCharacter = 0xfffd;
 235
 236 void AppendStringOfType(const char* source, int length,
 237                         SharedCharTypes type,
 238                         CanonOutput* output) {
 239   DoAppendStringOfType<char, unsigned char>(source, length, type, output);
 240 }
 241
 242 void AppendStringOfType(const base::char16* source, int length,
 243                         SharedCharTypes type,
 244                         CanonOutput* output) {
 245   DoAppendStringOfType<base::char16, base::char16>(
 246       source, length, type, output);
 247 }
 248
 249 void AppendInvalidNarrowString(const char* spec, int begin, int end,
 250                                CanonOutput* output) {
 251   DoAppendInvalidNarrowString<char, unsigned char>(spec, begin, end, output);
 252 }
 253
 254 void AppendInvalidNarrowString(const base::char16* spec, int begin, int end,
 255                                CanonOutput* output) {
 256   DoAppendInvalidNarrowString<base::char16, base::char16>(
 257       spec, begin, end, output);
 258 }
 259
 260 bool ConvertUTF16ToUTF8(const base::char16* input, int input_len,
 261                         CanonOutput* output) {
 262   bool success = true;
 263   for (int i = 0; i < input_len; i++) {
 264     unsigned code_point;
 265     success &= ReadUTFChar(input, &i, input_len, &code_point);
 266     AppendUTF8Value(code_point, output);
 267   }
 268   return success;
 269 }
 270
 271 bool ConvertUTF8ToUTF16(const char* input, int input_len,
 272                         CanonOutputT<base::char16>* output) {
 273   bool success = true;
 274   for (int i = 0; i < input_len; i++) {
 275     unsigned code_point;
 276     success &= ReadUTFChar(input, &i, input_len, &code_point);
 277     AppendUTF16Value(code_point, output);
 278   }
 279   return success;
 280 }
 281
 282 void SetupOverrideComponents(const char* base,
 283                              const Replacements<char>& repl,
 284                              URLComponentSource<char>* source,
 285                              url_parse::Parsed* parsed) {
 286   // Get the source and parsed structures of the things we are replacing.
 287   const URLComponentSource<char>& repl_source = repl.sources();
 288   const url_parse::Parsed& repl_parsed = repl.components();
 289
 290   DoOverrideComponent(repl_source.scheme, repl_parsed.scheme,
 291                       &source->scheme, &parsed->scheme);
 292   DoOverrideComponent(repl_source.username, repl_parsed.username,
 293                       &source->username, &parsed->username);
 294   DoOverrideComponent(repl_source.password, repl_parsed.password,
 295                       &source->password, &parsed->password);
 296
 297   // Our host should be empty if not present, so override the default setup.
 298   DoOverrideComponent(repl_source.host, repl_parsed.host,
 299                       &source->host, &parsed->host);
 300   if (parsed->host.len == -1)
 301     parsed->host.len = 0;
 302
 303   DoOverrideComponent(repl_source.port, repl_parsed.port,
 304                       &source->port, &parsed->port);
 305   DoOverrideComponent(repl_source.path, repl_parsed.path,
 306                       &source->path, &parsed->path);
 307   DoOverrideComponent(repl_source.query, repl_parsed.query,
 308                       &source->query, &parsed->query);
 309   DoOverrideComponent(repl_source.ref, repl_parsed.ref,
 310                       &source->ref, &parsed->ref);
 311 }
 312
 313 bool SetupUTF16OverrideComponents(const char* base,
 314                                   const Replacements<base::char16>& repl,
 315                                   CanonOutput* utf8_buffer,
 316                                   URLComponentSource<char>* source,
 317                                   url_parse::Parsed* parsed) {
 318   bool success = true;
 319
 320   // Get the source and parsed structures of the things we are replacing.
 321   const URLComponentSource<base::char16>& repl_source = repl.sources();
 322   const url_parse::Parsed& repl_parsed = repl.components();
 323
 324   success &= PrepareUTF16OverrideComponent(
 325       repl_source.scheme, repl_parsed.scheme,
 326       utf8_buffer, &parsed->scheme);
 327   success &= PrepareUTF16OverrideComponent(
 328       repl_source.username, repl_parsed.username,
 329       utf8_buffer, &parsed->username);
 330   success &= PrepareUTF16OverrideComponent(
 331       repl_source.password, repl_parsed.password,
 332       utf8_buffer, &parsed->password);
 333   success &= PrepareUTF16OverrideComponent(
 334       repl_source.host, repl_parsed.host,
 335       utf8_buffer, &parsed->host);
 336   success &= PrepareUTF16OverrideComponent(
 337       repl_source.port, repl_parsed.port,
 338       utf8_buffer, &parsed->port);
 339   success &= PrepareUTF16OverrideComponent(
 340       repl_source.path, repl_parsed.path,
 341       utf8_buffer, &parsed->path);
 342   success &= PrepareUTF16OverrideComponent(
 343       repl_source.query, repl_parsed.query,
 344       utf8_buffer, &parsed->query);
 345   success &= PrepareUTF16OverrideComponent(
 346       repl_source.ref, repl_parsed.ref,
 347       utf8_buffer, &parsed->ref);
 348
 349   // PrepareUTF16OverrideComponent will not have set the data pointer since the
 350   // buffer could be resized, invalidating the pointers. We set the data
 351   // pointers for affected components now that the buffer is finalized.
 352   if (repl_source.scheme)   source->scheme = utf8_buffer->data();
 353   if (repl_source.username) source->username = utf8_buffer->data();
 354   if (repl_source.password) source->password = utf8_buffer->data();
 355   if (repl_source.host)     source->host = utf8_buffer->data();
 356   if (repl_source.port)     source->port = utf8_buffer->data();
 357   if (repl_source.path)     source->path = utf8_buffer->data();
 358   if (repl_source.query)    source->query = utf8_buffer->data();
 359   if (repl_source.ref)      source->ref = utf8_buffer->data();
 360
 361   return success;
 362 }
 363
 364 #ifndef WIN32
 365
 366 int _itoa_s(int value, char* buffer, size_t size_in_chars, int radix) {
 367   const char* format_str;
 368   if (radix == 10)
 369     format_str = "%d";
 370   else if (radix == 16)
 371     format_str = "%x";
 372   else
 373     return EINVAL;
 374
 375   int written = snprintf(buffer, size_in_chars, format_str, value);
 376   if (static_cast<size_t>(written) >= size_in_chars) {
 377     // Output was truncated, or written was negative.
 378     return EINVAL;
 379   }
 380   return 0;
 381 }
 382
 383 int _itow_s(int value, base::char16* buffer, size_t size_in_chars, int radix) {
 384   if (radix != 10)
 385     return EINVAL;
 386
 387   // No more than 12 characters will be required for a 32-bit integer.
 388   // Add an extra byte for the terminating null.
 389   char temp[13];
 390   int written = snprintf(temp, sizeof(temp), "%d", value);
 391   if (static_cast<size_t>(written) >= size_in_chars) {
 392     // Output was truncated, or written was negative.
 393     return EINVAL;
 394   }
 395
 396   for (int i = 0; i < written; ++i) {
 397     buffer[i] = static_cast<base::char16>(temp[i]);
 398   }
 399   buffer[written] = '\0';
 400   return 0;
 401 }
 402
 403 #endif  // !WIN32
 404
 405 }  // namespace url_canon