url/url_canon_internal.cc

   1 // Copyright 2013 The Chromium Authors
   2 // Use of this source code is governed by a BSD-style license that can be
   3 // found in the LICENSE file.
   4
   5 #include "url/url_canon_internal.h"
   6
   7 #include <errno.h>
   8 #include <stddef.h>
   9 #include <stdlib.h>
  10 #ifdef __SSE2__
  11 #include <immintrin.h>
  12 #elif defined(__aarch64__)
  13 #include <arm_neon.h>
  14 #endif
  15
  16 #include <cstdio>
  17 #include <string>
  18
  19 #include "base/bits.h"
  20 #include "base/numerics/safe_conversions.h"
  21 #include "base/strings/utf_string_conversion_utils.h"
  22
  23 namespace url {
  24
  25 namespace {
  26
  27 // Find the initial segment of the given string that consists solely
  28 // of characters valid for CHAR_QUERY. (We can have false negatives in
  29 // one specific case, namely the exclamation mark 0x21, but false negatives
  30 // are fine, and it's not worth adding a separate test for.) This is
  31 // a fast path to speed up checking of very long query strings that are
  32 // already valid, which happen on some web pages.
  33 //
  34 // This has some startup cost to load the constants and such, so it's
  35 // usually not worth it for short strings.
  36 size_t FindInitialQuerySafeString(const char* source, size_t length) {
  37 #if defined(__SSE2__) || defined(__aarch64__)
  38   constexpr size_t kChunkSize = 16;
  39   size_t i;
  40   for (i = 0; i < base::bits::AlignDown(length, kChunkSize); i += kChunkSize) {
  41     char b __attribute__((vector_size(16)));
  42     memcpy(&b, source + i, sizeof(b));
  43
  44     // Compare each element with the ranges for CHAR_QUERY
  45     // (see kSharedCharTypeTable), vectorized so that it creates
  46     // a mask of which elements match. For completeness, we could
  47     // have had (...) | b == 0x21 here, but exclamation marks are
  48     // rare and the extra test costs us some time.
  49     auto mask = b >= 0x24 && b <= 0x7e && b != 0x27 && b != 0x3c && b != 0x3e;
  50
  51 #ifdef __SSE2__
  52     if (_mm_movemask_epi8(reinterpret_cast<__m128i>(mask)) != 0xffff) {
  53       return i;
  54     }
  55 #else
  56     if (vminvq_u8(reinterpret_cast<uint8x16_t>(mask)) == 0) {
  57       return i;
  58     }
  59 #endif
  60   }
  61   return i;
  62 #else
  63   // Need SIMD support (with fast reductions) for this to be efficient.
  64   return 0;
  65 #endif
  66 }
  67
  68 template <typename CHAR, typename UCHAR>
  69 void DoAppendStringOfType(const CHAR* source,
  70                           size_t length,
  71                           SharedCharTypes type,
  72                           CanonOutput* output) {
  73   size_t i = 0;
  74   // We only instantiate this for char, to avoid a Clang crash
  75   // (and because Append() does not support converting).
  76   if constexpr (sizeof(CHAR) == 1) {
  77     if (type == CHAR_QUERY && length >= kMinimumLengthForSIMD) {
  78       i = FindInitialQuerySafeString(source, length);
  79       output->Append(source, i);
  80     }
  81   }
  82   for (; i < length; i++) {
  83     if (static_cast<UCHAR>(source[i]) >= 0x80) {
  84       // ReadUTFCharLossy will fill the code point with
  85       // kUnicodeReplacementCharacter when the input is invalid, which is what
  86       // we want.
  87       base_icu::UChar32 code_point;
  88       ReadUTFCharLossy(source, &i, length, &code_point);
  89       AppendUTF8EscapedValue(code_point, output);
  90     } else {
  91       // Just append the 7-bit character, possibly escaping it.
  92       unsigned char uch = static_cast<unsigned char>(source[i]);
  93       if (!IsCharOfType(uch, type))
  94         AppendEscapedChar(uch, output);
  95       else
  96         output->push_back(uch);
  97     }
  98   }
  99 }
 100
 101 // This function assumes the input values are all contained in 8-bit,
 102 // although it allows any type. Returns true if input is valid, false if not.
 103 template <typename CHAR, typename UCHAR>
 104 void DoAppendInvalidNarrowString(const CHAR* spec,
 105                                  size_t begin,
 106                                  size_t end,
 107                                  CanonOutput* output) {
 108   for (size_t i = begin; i < end; i++) {
 109     UCHAR uch = static_cast<UCHAR>(spec[i]);
 110     if (uch >= 0x80) {
 111       // Handle UTF-8/16 encodings. This call will correctly handle the error
 112       // case by appending the invalid character.
 113       AppendUTF8EscapedChar(spec, &i, end, output);
 114     } else if (uch <= ' ' || uch == 0x7f) {
 115       // This function is for error handling, so we escape all control
 116       // characters and spaces, but not anything else since we lack
 117       // context to do something more specific.
 118       AppendEscapedChar(static_cast<unsigned char>(uch), output);
 119     } else {
 120       output->push_back(static_cast<char>(uch));
 121     }
 122   }
 123 }
 124
 125 // Overrides one component, see the Replacements structure for
 126 // what the various combionations of source pointer and component mean.
 127 void DoOverrideComponent(const char* override_source,
 128                          const Component& override_component,
 129                          const char** dest,
 130                          Component* dest_component) {
 131   if (override_source) {
 132     *dest = override_source;
 133     *dest_component = override_component;
 134   }
 135 }
 136
 137 // Similar to DoOverrideComponent except that it takes a UTF-16 input and does
 138 // not actually set the output character pointer.
 139 //
 140 // The input is converted to UTF-8 at the end of the given buffer as a temporary
 141 // holding place. The component identifying the portion of the buffer used in
 142 // the |utf8_buffer| will be specified in |*dest_component|.
 143 //
 144 // This will not actually set any |dest| pointer like DoOverrideComponent
 145 // does because all of the pointers will point into the |utf8_buffer|, which
 146 // may get resized while we're overriding a subsequent component. Instead, the
 147 // caller should use the beginning of the |utf8_buffer| as the string pointer
 148 // for all components once all overrides have been prepared.
 149 bool PrepareUTF16OverrideComponent(const char16_t* override_source,
 150                                    const Component& override_component,
 151                                    CanonOutput* utf8_buffer,
 152                                    Component* dest_component) {
 153   bool success = true;
 154   if (override_source) {
 155     if (!override_component.is_valid()) {
 156       // Non-"valid" component (means delete), so we need to preserve that.
 157       *dest_component = Component();
 158     } else {
 159       // Convert to UTF-8.
 160       dest_component->begin = utf8_buffer->length();
 161       success = ConvertUTF16ToUTF8(&override_source[override_component.begin],
 162                                    static_cast<size_t>(override_component.len),
 163                                    utf8_buffer);
 164       dest_component->len = utf8_buffer->length() - dest_component->begin;
 165     }
 166   }
 167   return success;
 168 }
 169
 170 }  // namespace
 171
 172 // See the header file for this array's declaration.
 173 // clang-format off
 174 const unsigned char kSharedCharTypeTable[0x100] = {
 175     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  // 0x00 - 0x0f
 176     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  // 0x10 - 0x1f
 177     0,                           // 0x20  ' ' (escape spaces in queries)
 178     CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT,  // 0x21  !
 179     0,                           // 0x22  "
 180     0,                           // 0x23  #  (invalid in query since it marks the ref)
 181     CHAR_QUERY | CHAR_USERINFO,  // 0x24  $
 182     CHAR_QUERY | CHAR_USERINFO,  // 0x25  %
 183     CHAR_QUERY | CHAR_USERINFO,  // 0x26  &
 184     0,                           // 0x27  '  (Try to prevent XSS.)
 185     CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT,  // 0x28  (
 186     CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT,  // 0x29  )
 187     CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT,  // 0x2a  *
 188     CHAR_QUERY | CHAR_USERINFO,  // 0x2b  +
 189     CHAR_QUERY | CHAR_USERINFO,  // 0x2c  ,
 190     CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT,  // 0x2d  -
 191     CHAR_QUERY | CHAR_USERINFO | CHAR_IPV4 | CHAR_COMPONENT,  // 0x2e  .
 192     CHAR_QUERY,                  // 0x2f  /
 193     CHAR_QUERY | CHAR_USERINFO | CHAR_IPV4 | CHAR_HEX | CHAR_DEC | CHAR_OCT | CHAR_COMPONENT,  // 0x30  0
 194     CHAR_QUERY | CHAR_USERINFO | CHAR_IPV4 | CHAR_HEX | CHAR_DEC | CHAR_OCT | CHAR_COMPONENT,  // 0x31  1
 195     CHAR_QUERY | CHAR_USERINFO | CHAR_IPV4 | CHAR_HEX | CHAR_DEC | CHAR_OCT | CHAR_COMPONENT,  // 0x32  2
 196     CHAR_QUERY | CHAR_USERINFO | CHAR_IPV4 | CHAR_HEX | CHAR_DEC | CHAR_OCT | CHAR_COMPONENT,  // 0x33  3
 197     CHAR_QUERY | CHAR_USERINFO | CHAR_IPV4 | CHAR_HEX | CHAR_DEC | CHAR_OCT | CHAR_COMPONENT,  // 0x34  4
 198     CHAR_QUERY | CHAR_USERINFO | CHAR_IPV4 | CHAR_HEX | CHAR_DEC | CHAR_OCT | CHAR_COMPONENT,  // 0x35  5
 199     CHAR_QUERY | CHAR_USERINFO | CHAR_IPV4 | CHAR_HEX | CHAR_DEC | CHAR_OCT | CHAR_COMPONENT,  // 0x36  6
 200     CHAR_QUERY | CHAR_USERINFO | CHAR_IPV4 | CHAR_HEX | CHAR_DEC | CHAR_OCT | CHAR_COMPONENT,  // 0x37  7
 201     CHAR_QUERY | CHAR_USERINFO | CHAR_IPV4 | CHAR_HEX | CHAR_DEC | CHAR_COMPONENT,             // 0x38  8
 202     CHAR_QUERY | CHAR_USERINFO | CHAR_IPV4 | CHAR_HEX | CHAR_DEC | CHAR_COMPONENT,             // 0x39  9
 203     CHAR_QUERY,  // 0x3a  :
 204     CHAR_QUERY,  // 0x3b  ;
 205     0,           // 0x3c  <  (Try to prevent certain types of XSS.)
 206     CHAR_QUERY,  // 0x3d  =
 207     0,           // 0x3e  >  (Try to prevent certain types of XSS.)
 208     CHAR_QUERY,  // 0x3f  ?
 209     CHAR_QUERY,  // 0x40  @
 210     CHAR_QUERY | CHAR_USERINFO | CHAR_IPV4 | CHAR_HEX | CHAR_COMPONENT,  // 0x41  A
 211     CHAR_QUERY | CHAR_USERINFO | CHAR_IPV4 | CHAR_HEX | CHAR_COMPONENT,  // 0x42  B
 212     CHAR_QUERY | CHAR_USERINFO | CHAR_IPV4 | CHAR_HEX | CHAR_COMPONENT,  // 0x43  C
 213     CHAR_QUERY | CHAR_USERINFO | CHAR_IPV4 | CHAR_HEX | CHAR_COMPONENT,  // 0x44  D
 214     CHAR_QUERY | CHAR_USERINFO | CHAR_IPV4 | CHAR_HEX | CHAR_COMPONENT,  // 0x45  E
 215     CHAR_QUERY | CHAR_USERINFO | CHAR_IPV4 | CHAR_HEX | CHAR_COMPONENT,  // 0x46  F
 216     CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT,  // 0x47  G
 217     CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT,  // 0x48  H
 218     CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT,  // 0x49  I
 219     CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT,  // 0x4a  J
 220     CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT,  // 0x4b  K
 221     CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT,  // 0x4c  L
 222     CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT,  // 0x4d  M
 223     CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT,  // 0x4e  N
 224     CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT,  // 0x4f  O
 225     CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT,  // 0x50  P
 226     CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT,  // 0x51  Q
 227     CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT,  // 0x52  R
 228     CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT,  // 0x53  S
 229     CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT,  // 0x54  T
 230     CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT,  // 0x55  U
 231     CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT,  // 0x56  V
 232     CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT,  // 0x57  W
 233     CHAR_QUERY | CHAR_USERINFO | CHAR_IPV4 | CHAR_COMPONENT, // 0x58  X
 234     CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT,  // 0x59  Y
 235     CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT,  // 0x5a  Z
 236     CHAR_QUERY,  // 0x5b  [
 237     CHAR_QUERY,  // 0x5c  '\'
 238     CHAR_QUERY,  // 0x5d  ]
 239     CHAR_QUERY,  // 0x5e  ^
 240     CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT,  // 0x5f  _
 241     CHAR_QUERY,  // 0x60  `
 242     CHAR_QUERY | CHAR_USERINFO | CHAR_IPV4 | CHAR_HEX | CHAR_COMPONENT,  // 0x61  a
 243     CHAR_QUERY | CHAR_USERINFO | CHAR_IPV4 | CHAR_HEX | CHAR_COMPONENT,  // 0x62  b
 244     CHAR_QUERY | CHAR_USERINFO | CHAR_IPV4 | CHAR_HEX | CHAR_COMPONENT,  // 0x63  c
 245     CHAR_QUERY | CHAR_USERINFO | CHAR_IPV4 | CHAR_HEX | CHAR_COMPONENT,  // 0x64  d
 246     CHAR_QUERY | CHAR_USERINFO | CHAR_IPV4 | CHAR_HEX | CHAR_COMPONENT,  // 0x65  e
 247     CHAR_QUERY | CHAR_USERINFO | CHAR_IPV4 | CHAR_HEX | CHAR_COMPONENT,  // 0x66  f
 248     CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT,  // 0x67  g
 249     CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT,  // 0x68  h
 250     CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT,  // 0x69  i
 251     CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT,  // 0x6a  j
 252     CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT,  // 0x6b  k
 253     CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT,  // 0x6c  l
 254     CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT,  // 0x6d  m
 255     CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT,  // 0x6e  n
 256     CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT,  // 0x6f  o
 257     CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT,  // 0x70  p
 258     CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT,  // 0x71  q
 259     CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT,  // 0x72  r
 260     CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT,  // 0x73  s
 261     CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT,  // 0x74  t
 262     CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT,  // 0x75  u
 263     CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT,  // 0x76  v
 264     CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT,  // 0x77  w
 265     CHAR_QUERY | CHAR_USERINFO | CHAR_IPV4 | CHAR_COMPONENT,  // 0x78  x
 266     CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT,  // 0x79  y
 267     CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT,  // 0x7a  z
 268     CHAR_QUERY,  // 0x7b  {
 269     CHAR_QUERY,  // 0x7c  |
 270     CHAR_QUERY,  // 0x7d  }
 271     CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT,  // 0x7e  ~
 272     0,           // 0x7f
 273     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  // 0x80 - 0x8f
 274     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  // 0x90 - 0x9f
 275     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  // 0xa0 - 0xaf
 276     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  // 0xb0 - 0xbf
 277     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  // 0xc0 - 0xcf
 278     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  // 0xd0 - 0xdf
 279     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  // 0xe0 - 0xef
 280     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  // 0xf0 - 0xff
 281 };
 282 // clang-format on
 283
 284 const char kCharToHexLookup[8] = {
 285     0,         // 0x00 - 0x1f
 286     '0',       // 0x20 - 0x3f: digits 0 - 9 are 0x30 - 0x39
 287     'A' - 10,  // 0x40 - 0x5f: letters A - F are 0x41 - 0x46
 288     'a' - 10,  // 0x60 - 0x7f: letters a - f are 0x61 - 0x66
 289     0,         // 0x80 - 0x9F
 290     0,         // 0xA0 - 0xBF
 291     0,         // 0xC0 - 0xDF
 292     0,         // 0xE0 - 0xFF
 293 };
 294
 295 const base_icu::UChar32 kUnicodeReplacementCharacter = 0xfffd;
 296
 297 void AppendStringOfType(const char* source,
 298                         size_t length,
 299                         SharedCharTypes type,
 300                         CanonOutput* output) {
 301   DoAppendStringOfType<char, unsigned char>(source, length, type, output);
 302 }
 303
 304 void AppendStringOfType(const char16_t* source,
 305                         size_t length,
 306                         SharedCharTypes type,
 307                         CanonOutput* output) {
 308   DoAppendStringOfType<char16_t, char16_t>(source, length, type, output);
 309 }
 310
 311 bool ReadUTFCharLossy(const char* str,
 312                       size_t* begin,
 313                       size_t length,
 314                       base_icu::UChar32* code_point_out) {
 315   if (!base::ReadUnicodeCharacter(str, length, begin, code_point_out)) {
 316     *code_point_out = kUnicodeReplacementCharacter;
 317     return false;
 318   }
 319   return true;
 320 }
 321
 322 bool ReadUTFCharLossy(const char16_t* str,
 323                       size_t* begin,
 324                       size_t length,
 325                       base_icu::UChar32* code_point_out) {
 326   if (!base::ReadUnicodeCharacter(str, length, begin, code_point_out)) {
 327     *code_point_out = kUnicodeReplacementCharacter;
 328     return false;
 329   }
 330   return true;
 331 }
 332
 333 void AppendInvalidNarrowString(const char* spec,
 334                                size_t begin,
 335                                size_t end,
 336                                CanonOutput* output) {
 337   DoAppendInvalidNarrowString<char, unsigned char>(spec, begin, end, output);
 338 }
 339
 340 void AppendInvalidNarrowString(const char16_t* spec,
 341                                size_t begin,
 342                                size_t end,
 343                                CanonOutput* output) {
 344   DoAppendInvalidNarrowString<char16_t, char16_t>(spec, begin, end, output);
 345 }
 346
 347 bool ConvertUTF16ToUTF8(const char16_t* input,
 348                         size_t input_len,
 349                         CanonOutput* output) {
 350   bool success = true;
 351   for (size_t i = 0; i < input_len; i++) {
 352     base_icu::UChar32 code_point;
 353     success &= ReadUTFCharLossy(input, &i, input_len, &code_point);
 354     AppendUTF8Value(code_point, output);
 355   }
 356   return success;
 357 }
 358
 359 bool ConvertUTF8ToUTF16(const char* input,
 360                         size_t input_len,
 361                         CanonOutputT<char16_t>* output) {
 362   bool success = true;
 363   for (size_t i = 0; i < input_len; i++) {
 364     base_icu::UChar32 code_point;
 365     success &= ReadUTFCharLossy(input, &i, input_len, &code_point);
 366     AppendUTF16Value(code_point, output);
 367   }
 368   return success;
 369 }
 370
 371 void SetupOverrideComponents(const char* base,
 372                              const Replacements<char>& repl,
 373                              URLComponentSource<char>* source,
 374                              Parsed* parsed) {
 375   // Get the source and parsed structures of the things we are replacing.
 376   const URLComponentSource<char>& repl_source = repl.sources();
 377   const Parsed& repl_parsed = repl.components();
 378
 379   DoOverrideComponent(repl_source.scheme, repl_parsed.scheme, &source->scheme,
 380                       &parsed->scheme);
 381   DoOverrideComponent(repl_source.username, repl_parsed.username,
 382                       &source->username, &parsed->username);
 383   DoOverrideComponent(repl_source.password, repl_parsed.password,
 384                       &source->password, &parsed->password);
 385
 386   // Our host should be empty if not present, so override the default setup.
 387   DoOverrideComponent(repl_source.host, repl_parsed.host, &source->host,
 388                       &parsed->host);
 389   if (parsed->host.len == -1)
 390     parsed->host.len = 0;
 391
 392   DoOverrideComponent(repl_source.port, repl_parsed.port, &source->port,
 393                       &parsed->port);
 394   DoOverrideComponent(repl_source.path, repl_parsed.path, &source->path,
 395                       &parsed->path);
 396   DoOverrideComponent(repl_source.query, repl_parsed.query, &source->query,
 397                       &parsed->query);
 398   DoOverrideComponent(repl_source.ref, repl_parsed.ref, &source->ref,
 399                       &parsed->ref);
 400 }
 401
 402 bool SetupUTF16OverrideComponents(const char* base,
 403                                   const Replacements<char16_t>& repl,
 404                                   CanonOutput* utf8_buffer,
 405                                   URLComponentSource<char>* source,
 406                                   Parsed* parsed) {
 407   bool success = true;
 408
 409   // Get the source and parsed structures of the things we are replacing.
 410   const URLComponentSource<char16_t>& repl_source = repl.sources();
 411   const Parsed& repl_parsed = repl.components();
 412
 413   success &= PrepareUTF16OverrideComponent(
 414       repl_source.scheme, repl_parsed.scheme, utf8_buffer, &parsed->scheme);
 415   success &=
 416       PrepareUTF16OverrideComponent(repl_source.username, repl_parsed.username,
 417                                     utf8_buffer, &parsed->username);
 418   success &=
 419       PrepareUTF16OverrideComponent(repl_source.password, repl_parsed.password,
 420                                     utf8_buffer, &parsed->password);
 421   success &= PrepareUTF16OverrideComponent(repl_source.host, repl_parsed.host,
 422                                            utf8_buffer, &parsed->host);
 423   success &= PrepareUTF16OverrideComponent(repl_source.port, repl_parsed.port,
 424                                            utf8_buffer, &parsed->port);
 425   success &= PrepareUTF16OverrideComponent(repl_source.path, repl_parsed.path,
 426                                            utf8_buffer, &parsed->path);
 427   success &= PrepareUTF16OverrideComponent(repl_source.query, repl_parsed.query,
 428                                            utf8_buffer, &parsed->query);
 429   success &= PrepareUTF16OverrideComponent(repl_source.ref, repl_parsed.ref,
 430                                            utf8_buffer, &parsed->ref);
 431
 432   // PrepareUTF16OverrideComponent will not have set the data pointer since the
 433   // buffer could be resized, invalidating the pointers. We set the data
 434   // pointers for affected components now that the buffer is finalized.
 435   if (repl_source.scheme)
 436     source->scheme = utf8_buffer->data();
 437   if (repl_source.username)
 438     source->username = utf8_buffer->data();
 439   if (repl_source.password)
 440     source->password = utf8_buffer->data();
 441   if (repl_source.host)
 442     source->host = utf8_buffer->data();
 443   if (repl_source.port)
 444     source->port = utf8_buffer->data();
 445   if (repl_source.path)
 446     source->path = utf8_buffer->data();
 447   if (repl_source.query)
 448     source->query = utf8_buffer->data();
 449   if (repl_source.ref)
 450     source->ref = utf8_buffer->data();
 451
 452   return success;
 453 }
 454
 455 #ifndef WIN32
 456
 457 int _itoa_s(int value, char* buffer, size_t size_in_chars, int radix) {
 458   const char* format_str;
 459   if (radix == 10)
 460     format_str = "%d";
 461   else if (radix == 16)
 462     format_str = "%x";
 463   else
 464     return EINVAL;
 465
 466   int written = snprintf(buffer, size_in_chars, format_str, value);
 467   if (static_cast<size_t>(written) >= size_in_chars) {
 468     // Output was truncated, or written was negative.
 469     return EINVAL;
 470   }
 471   return 0;
 472 }
 473
 474 int _itow_s(int value, char16_t* buffer, size_t size_in_chars, int radix) {
 475   if (radix != 10)
 476     return EINVAL;
 477
 478   // No more than 12 characters will be required for a 32-bit integer.
 479   // Add an extra byte for the terminating null.
 480   char temp[13];
 481   int written = snprintf(temp, sizeof(temp), "%d", value);
 482   if (static_cast<size_t>(written) >= size_in_chars) {
 483     // Output was truncated, or written was negative.
 484     return EINVAL;
 485   }
 486
 487   for (int i = 0; i < written; ++i) {
 488     buffer[i] = static_cast<char16_t>(temp[i]);
 489   }
 490   buffer[written] = '\0';
 491   return 0;
 492 }
 493
 494 #endif  // !WIN32
 495
 496 }  // namespace url