Fix for Geolocation webTCT failures
[platform/framework/web/chromium-efl.git] / url / url_canon.h
index 618ff7b..d3a7fab 100644 (file)
@@ -1,4 +1,4 @@
-// Copyright 2013 The Chromium Authors. All rights reserved.
+// Copyright 2013 The Chromium Authors
 // Use of this source code is governed by a BSD-style license that can be
 // found in the LICENSE file.
 
@@ -8,10 +8,13 @@
 #include <stdlib.h>
 #include <string.h>
 
+#include <string_view>
+
+#include "base/component_export.h"
 #include "base/export_template.h"
-#include "base/strings/string16.h"
+#include "base/memory/raw_ptr_exclusion.h"
+#include "base/numerics/clamped_math.h"
 #include "url/third_party/mozilla/url_parse.h"
-#include "url/url_export.h"
 
 namespace url {
 
@@ -25,56 +28,47 @@ namespace url {
 // resize function that is called when the existing buffer is not big enough.
 // The derived class is then in charge of setting up our buffer which we will
 // manage.
-template<typename T>
+template <typename T>
 class CanonOutputT {
  public:
-  CanonOutputT() : buffer_(NULL), buffer_len_(0), cur_len_(0) {
-  }
-  virtual ~CanonOutputT() {
-  }
+  CanonOutputT() = default;
+  virtual ~CanonOutputT() = default;
 
   // Implemented to resize the buffer. This function should update the buffer
   // pointer to point to the new buffer, and any old data up to |cur_len_| in
   // the buffer must be copied over.
   //
   // The new size |sz| must be larger than buffer_len_.
-  virtual void Resize(int sz) = 0;
+  virtual void Resize(size_t sz) = 0;
 
   // Accessor for returning a character at a given position. The input offset
   // must be in the valid range.
-  inline T at(int offset) const {
-    return buffer_[offset];
-  }
+  inline T at(size_t offset) const { return buffer_[offset]; }
 
   // Sets the character at the given position. The given position MUST be less
   // than the length().
-  inline void set(int offset, T ch) {
-    buffer_[offset] = ch;
-  }
+  inline void set(size_t offset, T ch) { buffer_[offset] = ch; }
 
   // Returns the number of characters currently in the buffer.
-  inline int length() const {
-    return cur_len_;
-  }
+  inline size_t length() const { return cur_len_; }
 
   // Returns the current capacity of the buffer. The length() is the number of
   // characters that have been declared to be written, but the capacity() is
   // the number that can be written without reallocation. If the caller must
   // write many characters at once, it can make sure there is enough capacity,
   // write the data, then use set_size() to declare the new length().
-  int capacity() const {
-    return buffer_len_;
+  size_t capacity() const { return buffer_len_; }
+
+  // Returns the contents of the buffer as a string_view.
+  std::basic_string_view<T> view() const {
+    return std::basic_string_view<T>(data(), length());
   }
 
   // Called by the user of this class to get the output. The output will NOT
   // be NULL-terminated. Call length() to get the
   // length.
-  const T* data() const {
-    return buffer_;
-  }
-  T* data() {
-    return buffer_;
-  }
+  const T* data() const { return buffer_; }
+  T* data() { return buffer_; }
 
   // Shortens the URL to the new length. Used for "backing up" when processing
   // relative paths. This can also be used if an external function writes a lot
@@ -82,9 +76,7 @@ class CanonOutputT {
   // to declare the new length.
   //
   // This MUST NOT be used to expand the size of the buffer beyond capacity().
-  void set_length(int new_len) {
-    cur_len_ = new_len;
-  }
+  void set_length(size_t new_len) { cur_len_ = new_len; }
 
   // This is the most performance critical function, since it is called for
   // every character.
@@ -108,28 +100,29 @@ class CanonOutputT {
   }
 
   // Appends the given string to the output.
-  void Append(const T* str, int str_len) {
-    if (cur_len_ + str_len > buffer_len_) {
-      if (!Grow(cur_len_ + str_len - buffer_len_))
+  void Append(const T* str, size_t str_len) {
+    if (str_len > buffer_len_ - cur_len_) {
+      if (!Grow(str_len - (buffer_len_ - cur_len_)))
         return;
     }
-    for (int i = 0; i < str_len; i++)
-      buffer_[cur_len_ + i] = str[i];
+    memcpy(buffer_ + cur_len_, str, str_len * sizeof(T));
     cur_len_ += str_len;
   }
 
-  void ReserveSizeIfNeeded(int estimated_size) {
+  void Append(std::basic_string_view<T> str) { Append(str.data(), str.size()); }
+
+  void ReserveSizeIfNeeded(size_t estimated_size) {
     // Reserve a bit extra to account for escaped chars.
     if (estimated_size > buffer_len_)
-      Resize(estimated_size + 8);
+      Resize((base::ClampedNumeric<size_t>(estimated_size) + 8).RawValue());
   }
 
  protected:
   // Grows the given buffer so that it can fit at least |min_additional|
   // characters. Returns true if the buffer could be resized, false on OOM.
-  bool Grow(int min_additional) {
-    static const int kMinBufferLen = 16;
-    int new_len = (buffer_len_ == 0) ? kMinBufferLen : buffer_len_;
+  bool Grow(size_t min_additional) {
+    static const size_t kMinBufferLen = 16;
+    size_t new_len = (buffer_len_ == 0) ? kMinBufferLen : buffer_len_;
     do {
       if (new_len >= (1 << 30))  // Prevent overflow below.
         return false;
@@ -139,29 +132,31 @@ class CanonOutputT {
     return true;
   }
 
-  T* buffer_;
-  int buffer_len_;
+  // `buffer_` is not a raw_ptr<...> for performance reasons (based on analysis
+  // of sampling profiler data).
+  RAW_PTR_EXCLUSION T* buffer_ = nullptr;
+  size_t buffer_len_ = 0;
 
   // Used characters in the buffer.
-  int cur_len_;
+  size_t cur_len_ = 0;
 };
 
 // Simple implementation of the CanonOutput using new[]. This class
 // also supports a static buffer so if it is allocated on the stack, most
 // URLs can be canonicalized with no heap allocations.
-template<typename T, int fixed_capacity = 1024>
+template <typename T, int fixed_capacity = 1024>
 class RawCanonOutputT : public CanonOutputT<T> {
  public:
   RawCanonOutputT() : CanonOutputT<T>() {
     this->buffer_ = fixed_buffer_;
     this->buffer_len_ = fixed_capacity;
   }
-  virtual ~RawCanonOutputT() {
+  ~RawCanonOutputT() override {
     if (this->buffer_ != fixed_buffer_)
       delete[] this->buffer_;
   }
 
-  void Resize(int sz) override {
+  void Resize(size_t sz) override {
     T* new_buf = new T[sz];
     memcpy(new_buf, this->buffer_,
            sizeof(T) * (this->cur_len_ < sz ? this->cur_len_ : sz));
@@ -176,20 +171,21 @@ class RawCanonOutputT : public CanonOutputT<T> {
 };
 
 // Explicitely instantiate commonly used instatiations.
-extern template class EXPORT_TEMPLATE_DECLARE(URL_EXPORT) CanonOutputT<char>;
-extern template class EXPORT_TEMPLATE_DECLARE(URL_EXPORT)
-    CanonOutputT<base::char16>;
+extern template class EXPORT_TEMPLATE_DECLARE(COMPONENT_EXPORT(URL))
+    CanonOutputT<char>;
+extern template class EXPORT_TEMPLATE_DECLARE(COMPONENT_EXPORT(URL))
+    CanonOutputT<char16_t>;
 
 // Normally, all canonicalization output is in narrow characters. We support
 // the templates so it can also be used internally if a wide buffer is
 // required.
 typedef CanonOutputT<char> CanonOutput;
-typedef CanonOutputT<base::char16> CanonOutputW;
+typedef CanonOutputT<char16_t> CanonOutputW;
 
-template<int fixed_capacity>
+template <int fixed_capacity>
 class RawCanonOutput : public RawCanonOutputT<char, fixed_capacity> {};
-template<int fixed_capacity>
-class RawCanonOutputW : public RawCanonOutputT<base::char16, fixed_capacity> {};
+template <int fixed_capacity>
+class RawCanonOutputW : public RawCanonOutputT<char16_t, fixed_capacity> {};
 
 // Character set converter ----------------------------------------------------
 //
@@ -199,7 +195,7 @@ class RawCanonOutputW : public RawCanonOutputT<base::char16, fixed_capacity> {};
 //
 // Embedders will want to see the unit test for the ICU version.
 
-class URL_EXPORT CharsetConverter {
+class COMPONENT_EXPORT(URL) CharsetConverter {
  public:
   CharsetConverter() {}
   virtual ~CharsetConverter() {}
@@ -215,7 +211,7 @@ class URL_EXPORT CharsetConverter {
   // decimal, (such as "&#20320;") with escaping of the ampersand, number
   // sign, and semicolon (in the previous example it would be
   // "%26%2320320%3B"). This rule is based on what IE does in this situation.
-  virtual void ConvertFromUTF16(const base::char16* input,
+  virtual void ConvertFromUTF16(const char16_t* input,
                                 int input_len,
                                 CanonOutput* output) = 0;
 };
@@ -266,17 +262,18 @@ enum SchemeType {
 // If |input| contained both removable whitespace and a raw `<` character,
 // |potentially_dangling_markup| will be set to `true`. Otherwise, it will be
 // left untouched.
-URL_EXPORT const char* RemoveURLWhitespace(const char* input,
-                                           int input_len,
-                                           CanonOutputT<char>* buffer,
-                                           int* output_len,
-                                           bool* potentially_dangling_markup);
-URL_EXPORT const base::char16* RemoveURLWhitespace(
-    const base::char16* input,
-    int input_len,
-    CanonOutputT<base::char16>* buffer,
-    int* output_len,
-    bool* potentially_dangling_markup);
+COMPONENT_EXPORT(URL)
+const char* RemoveURLWhitespace(const char* input,
+                                int input_len,
+                                CanonOutputT<char>* buffer,
+                                int* output_len,
+                                bool* potentially_dangling_markup);
+COMPONENT_EXPORT(URL)
+const char16_t* RemoveURLWhitespace(const char16_t* input,
+                                    int input_len,
+                                    CanonOutputT<char16_t>* buffer,
+                                    int* output_len,
+                                    bool* potentially_dangling_markup);
 
 // IDN ------------------------------------------------------------------------
 
@@ -289,14 +286,13 @@ URL_EXPORT const base::char16* RemoveURLWhitespace(
 // the length of the output will be set to the length of the new host name.
 //
 // On error, returns false. The output in this case is undefined.
-URL_EXPORT bool IDNToASCII(const base::char16* src,
-                           int src_len,
-                           CanonOutputW* output);
+COMPONENT_EXPORT(URL)
+bool IDNToASCII(std::u16string_view src, CanonOutputW* output);
 
 // Piece-by-piece canonicalizers ----------------------------------------------
 //
 // These individual canonicalizers append the canonicalized versions of the
-// corresponding URL component to the given std::string. The spec and the
+// corresponding URL component to the given CanonOutput. The spec and the
 // previously-identified range of that component are the input. The range of
 // the canonicalized component will be written to the output component.
 //
@@ -317,14 +313,16 @@ URL_EXPORT bool IDNToASCII(const base::char16* src,
 // URLs.
 //
 // The 8-bit version requires UTF-8 encoding.
-URL_EXPORT bool CanonicalizeScheme(const char* spec,
-                                   const Component& scheme,
-                                   CanonOutput* output,
-                                   Component* out_scheme);
-URL_EXPORT bool CanonicalizeScheme(const base::char16* spec,
-                                   const Component& scheme,
-                                   CanonOutput* output,
-                                   Component* out_scheme);
+COMPONENT_EXPORT(URL)
+bool CanonicalizeScheme(const char* spec,
+                        const Component& scheme,
+                        CanonOutput* output,
+                        Component* out_scheme);
+COMPONENT_EXPORT(URL)
+bool CanonicalizeScheme(const char16_t* spec,
+                        const Component& scheme,
+                        CanonOutput* output,
+                        Component* out_scheme);
 
 // User info: username/password. If present, this will add the delimiters so
 // the output will be "<username>:<password>@" or "<username>@". Empty
@@ -336,20 +334,22 @@ URL_EXPORT bool CanonicalizeScheme(const base::char16* spec,
 // is legal as long as the two components don't overlap.
 //
 // The 8-bit version requires UTF-8 encoding.
-URL_EXPORT bool CanonicalizeUserInfo(const char* username_source,
-                                     const Component& username,
-                                     const char* password_source,
-                                     const Component& password,
-                                     CanonOutput* output,
-                                     Component* out_username,
-                                     Component* out_password);
-URL_EXPORT bool CanonicalizeUserInfo(const base::char16* username_source,
-                                     const Component& username,
-                                     const base::char16* password_source,
-                                     const Component& password,
-                                     CanonOutput* output,
-                                     Component* out_username,
-                                     Component* out_password);
+COMPONENT_EXPORT(URL)
+bool CanonicalizeUserInfo(const char* username_source,
+                          const Component& username,
+                          const char* password_source,
+                          const Component& password,
+                          CanonOutput* output,
+                          Component* out_username,
+                          Component* out_password);
+COMPONENT_EXPORT(URL)
+bool CanonicalizeUserInfo(const char16_t* username_source,
+                          const Component& username,
+                          const char16_t* password_source,
+                          const Component& password,
+                          CanonOutput* output,
+                          Component* out_username,
+                          Component* out_password);
 
 // This structure holds detailed state exported from the IP/Host canonicalizers.
 // Additional fields may be added as callers require them.
@@ -361,16 +361,16 @@ struct CanonHostInfo {
 
   // This field summarizes how the input was classified by the canonicalizer.
   enum Family {
-    NEUTRAL,   // - Doesn't resemble an IP address. As far as the IP
-               //   canonicalizer is concerned, it should be treated as a
-               //   hostname.
-    BROKEN,    // - Almost an IP, but was not canonicalized. This could be an
-               //   IPv4 address where truncation occurred, or something
-               //   containing the special characters :[] which did not parse
-               //   as an IPv6 address. Never attempt to connect to this
-               //   address, because it might actually succeed!
-    IPV4,      // - Successfully canonicalized as an IPv4 address.
-    IPV6,      // - Successfully canonicalized as an IPv6 address.
+    NEUTRAL,  // - Doesn't resemble an IP address. As far as the IP
+              //   canonicalizer is concerned, it should be treated as a
+              //   hostname.
+    BROKEN,   // - Almost an IP, but was not canonicalized. This could be an
+              //   IPv4 address where truncation occurred, or something
+              //   containing the special characters :[] which did not parse
+              //   as an IPv6 address. Never attempt to connect to this
+              //   address, because it might actually succeed!
+    IPV4,     // - Successfully canonicalized as an IPv4 address.
+    IPV6,     // - Successfully canonicalized as an IPv6 address.
   };
   Family family;
 
@@ -396,32 +396,35 @@ struct CanonHostInfo {
   }
 };
 
-
 // Host.
 //
 // The 8-bit version requires UTF-8 encoding. Use this version when you only
 // need to know whether canonicalization succeeded.
-URL_EXPORT bool CanonicalizeHost(const char* spec,
-                                 const Component& host,
-                                 CanonOutput* output,
-                                 Component* out_host);
-URL_EXPORT bool CanonicalizeHost(const base::char16* spec,
-                                 const Component& host,
-                                 CanonOutput* output,
-                                 Component* out_host);
+COMPONENT_EXPORT(URL)
+bool CanonicalizeHost(const char* spec,
+                      const Component& host,
+                      CanonOutput* output,
+                      Component* out_host);
+COMPONENT_EXPORT(URL)
+bool CanonicalizeHost(const char16_t* spec,
+                      const Component& host,
+                      CanonOutput* output,
+                      Component* out_host);
 
 // Extended version of CanonicalizeHost, which returns additional information.
 // Use this when you need to know whether the hostname was an IP address.
 // A successful return is indicated by host_info->family != BROKEN. See the
 // definition of CanonHostInfo above for details.
-URL_EXPORT void CanonicalizeHostVerbose(const char* spec,
-                                        const Component& host,
-                                        CanonOutput* output,
-                                        CanonHostInfo* host_info);
-URL_EXPORT void CanonicalizeHostVerbose(const base::char16* spec,
-                                        const Component& host,
-                                        CanonOutput* output,
-                                        CanonHostInfo* host_info);
+COMPONENT_EXPORT(URL)
+void CanonicalizeHostVerbose(const char* spec,
+                             const Component& host,
+                             CanonOutput* output,
+                             CanonHostInfo* host_info);
+COMPONENT_EXPORT(URL)
+void CanonicalizeHostVerbose(const char16_t* spec,
+                             const Component& host,
+                             CanonOutput* output,
+                             CanonHostInfo* host_info);
 
 // Canonicalizes a string according to the host canonicalization rules. Unlike
 // CanonicalizeHost, this will not check for IP addresses which can change the
@@ -443,12 +446,14 @@ URL_EXPORT void CanonicalizeHostVerbose(const base::char16* spec,
 // Returns true if the host was valid. This function will treat a 0-length
 // host as valid (because it's designed to be used for substrings) while the
 // full version above will mark empty hosts as broken.
-URL_EXPORT bool CanonicalizeHostSubstring(const char* spec,
-                                          const Component& host,
-                                          CanonOutput* output);
-URL_EXPORT bool CanonicalizeHostSubstring(const base::char16* spec,
-                                          const Component& host,
-                                          CanonOutput* output);
+COMPONENT_EXPORT(URL)
+bool CanonicalizeHostSubstring(const char* spec,
+                               const Component& host,
+                               CanonOutput* output);
+COMPONENT_EXPORT(URL)
+bool CanonicalizeHostSubstring(const char16_t* spec,
+                               const Component& host,
+                               CanonOutput* output);
 
 // IP addresses.
 //
@@ -460,34 +465,39 @@ URL_EXPORT bool CanonicalizeHostSubstring(const base::char16* spec,
 // This is called AUTOMATICALLY from the host canonicalizer, which ensures that
 // the input is unescaped and name-prepped, etc. It should not normally be
 // necessary or wise to call this directly.
-URL_EXPORT void CanonicalizeIPAddress(const char* spec,
-                                      const Component& host,
-                                      CanonOutput* output,
-                                      CanonHostInfo* host_info);
-URL_EXPORT void CanonicalizeIPAddress(const base::char16* spec,
-                                      const Component& host,
-                                      CanonOutput* output,
-                                      CanonHostInfo* host_info);
+COMPONENT_EXPORT(URL)
+void CanonicalizeIPAddress(const char* spec,
+                           const Component& host,
+                           CanonOutput* output,
+                           CanonHostInfo* host_info);
+COMPONENT_EXPORT(URL)
+void CanonicalizeIPAddress(const char16_t* spec,
+                           const Component& host,
+                           CanonOutput* output,
+                           CanonHostInfo* host_info);
 
 // Port: this function will add the colon for the port if a port is present.
 // The caller can pass PORT_UNSPECIFIED as the
 // default_port_for_scheme argument if there is no default port.
 //
 // The 8-bit version requires UTF-8 encoding.
-URL_EXPORT bool CanonicalizePort(const char* spec,
-                                 const Component& port,
-                                 int default_port_for_scheme,
-                                 CanonOutput* output,
-                                 Component* out_port);
-URL_EXPORT bool CanonicalizePort(const base::char16* spec,
-                                 const Component& port,
-                                 int default_port_for_scheme,
-                                 CanonOutput* output,
-                                 Component* out_port);
+COMPONENT_EXPORT(URL)
+bool CanonicalizePort(const char* spec,
+                      const Component& port,
+                      int default_port_for_scheme,
+                      CanonOutput* output,
+                      Component* out_port);
+COMPONENT_EXPORT(URL)
+bool CanonicalizePort(const char16_t* spec,
+                      const Component& port,
+                      int default_port_for_scheme,
+                      CanonOutput* output,
+                      Component* out_port);
 
 // Returns the default port for the given canonical scheme, or PORT_UNSPECIFIED
-// if the scheme is unknown.
-URL_EXPORT int DefaultPortForScheme(const char* scheme, int scheme_len);
+// if the scheme is unknown. Based on https://url.spec.whatwg.org/#default-port
+COMPONENT_EXPORT(URL)
+int DefaultPortForScheme(const char* scheme, int scheme_len);
 
 // Path. If the input does not begin in a slash (including if the input is
 // empty), we'll prepend a slash to the path to make it canonical.
@@ -498,14 +508,29 @@ URL_EXPORT int DefaultPortForScheme(const char* scheme, int scheme_len);
 // an issue. Somebody giving us an 8-bit path is responsible for generating
 // the path that the server expects (we'll escape high-bit characters), so
 // if something is invalid, it's their problem.
-URL_EXPORT bool CanonicalizePath(const char* spec,
-                                 const Component& path,
-                                 CanonOutput* output,
-                                 Component* out_path);
-URL_EXPORT bool CanonicalizePath(const base::char16* spec,
-                                 const Component& path,
-                                 CanonOutput* output,
-                                 Component* out_path);
+COMPONENT_EXPORT(URL)
+bool CanonicalizePath(const char* spec,
+                      const Component& path,
+                      CanonOutput* output,
+                      Component* out_path);
+COMPONENT_EXPORT(URL)
+bool CanonicalizePath(const char16_t* spec,
+                      const Component& path,
+                      CanonOutput* output,
+                      Component* out_path);
+
+// Like CanonicalizePath(), but does not assume that its operating on the
+// entire path.  It therefore does not prepend a slash, etc.
+COMPONENT_EXPORT(URL)
+bool CanonicalizePartialPath(const char* spec,
+                             const Component& path,
+                             CanonOutput* output,
+                             Component* out_path);
+COMPONENT_EXPORT(URL)
+bool CanonicalizePartialPath(const char16_t* spec,
+                             const Component& path,
+                             CanonOutput* output,
+                             Component* out_path);
 
 // Canonicalizes the input as a file path. This is like CanonicalizePath except
 // that it also handles Windows drive specs. For example, the path can begin
@@ -513,14 +538,16 @@ URL_EXPORT bool CanonicalizePath(const base::char16* spec,
 // The string will be appended to |*output| and |*out_path| will be updated.
 //
 // The 8-bit version requires UTF-8 encoding.
-URL_EXPORT bool FileCanonicalizePath(const char* spec,
-                                     const Component& path,
-                                     CanonOutput* output,
-                                     Component* out_path);
-URL_EXPORT bool FileCanonicalizePath(const base::char16* spec,
-                                     const Component& path,
-                                     CanonOutput* output,
-                                     Component* out_path);
+COMPONENT_EXPORT(URL)
+bool FileCanonicalizePath(const char* spec,
+                          const Component& path,
+                          CanonOutput* output,
+                          Component* out_path);
+COMPONENT_EXPORT(URL)
+bool FileCanonicalizePath(const char16_t* spec,
+                          const Component& path,
+                          CanonOutput* output,
+                          Component* out_path);
 
 // Query: Prepends the ? if needed.
 //
@@ -534,16 +561,18 @@ URL_EXPORT bool FileCanonicalizePath(const base::char16* spec,
 // if necessary, for ASCII input, no conversions are necessary.
 //
 // The converter can be NULL. In this case, the output encoding will be UTF-8.
-URL_EXPORT void CanonicalizeQuery(const char* spec,
-                                  const Component& query,
-                                  CharsetConverter* converter,
-                                  CanonOutput* output,
-                                  Component* out_query);
-URL_EXPORT void CanonicalizeQuery(const base::char16* spec,
-                                  const Component& query,
-                                  CharsetConverter* converter,
-                                  CanonOutput* output,
-                                  Component* out_query);
+COMPONENT_EXPORT(URL)
+void CanonicalizeQuery(const char* spec,
+                       const Component& query,
+                       CharsetConverter* converter,
+                       CanonOutput* output,
+                       Component* out_query);
+COMPONENT_EXPORT(URL)
+void CanonicalizeQuery(const char16_t* spec,
+                       const Component& query,
+                       CharsetConverter* converter,
+                       CanonOutput* output,
+                       Component* out_query);
 
 // Ref: Prepends the # if needed. The output will be UTF-8 (this is the only
 // canonicalizer that does not produce ASCII output). The output is
@@ -551,14 +580,16 @@ URL_EXPORT void CanonicalizeQuery(const base::char16* spec,
 //
 // This function will not fail. If the input is invalid UTF-8/UTF-16, we'll use
 // the "Unicode replacement character" for the confusing bits and copy the rest.
-URL_EXPORT void CanonicalizeRef(const char* spec,
-                                const Component& path,
-                                CanonOutput* output,
-                                Component* out_path);
-URL_EXPORT void CanonicalizeRef(const base::char16* spec,
-                                const Component& path,
-                                CanonOutput* output,
-                                Component* out_path);
+COMPONENT_EXPORT(URL)
+void CanonicalizeRef(const char* spec,
+                     const Component& path,
+                     CanonOutput* output,
+                     Component* out_path);
+COMPONENT_EXPORT(URL)
+void CanonicalizeRef(const char16_t* spec,
+                     const Component& path,
+                     CanonOutput* output,
+                     Component* out_path);
 
 // Full canonicalizer ---------------------------------------------------------
 //
@@ -571,77 +602,100 @@ URL_EXPORT void CanonicalizeRef(const base::char16* spec,
 // The 8-bit versions require UTF-8 encoding.
 
 // Use for standard URLs with authorities and paths.
-URL_EXPORT bool CanonicalizeStandardURL(const char* spec,
-                                        int spec_len,
-                                        const Parsed& parsed,
-                                        SchemeType scheme_type,
-                                        CharsetConverter* query_converter,
-                                        CanonOutput* output,
-                                        Parsed* new_parsed);
-URL_EXPORT bool CanonicalizeStandardURL(const base::char16* spec,
-                                        int spec_len,
-                                        const Parsed& parsed,
-                                        SchemeType scheme_type,
-                                        CharsetConverter* query_converter,
-                                        CanonOutput* output,
-                                        Parsed* new_parsed);
+COMPONENT_EXPORT(URL)
+bool CanonicalizeStandardURL(const char* spec,
+                             int spec_len,
+                             const Parsed& parsed,
+                             SchemeType scheme_type,
+                             CharsetConverter* query_converter,
+                             CanonOutput* output,
+                             Parsed* new_parsed);
+COMPONENT_EXPORT(URL)
+bool CanonicalizeStandardURL(const char16_t* spec,
+                             int spec_len,
+                             const Parsed& parsed,
+                             SchemeType scheme_type,
+                             CharsetConverter* query_converter,
+                             CanonOutput* output,
+                             Parsed* new_parsed);
 
 // Use for file URLs.
-URL_EXPORT bool CanonicalizeFileURL(const char* spec,
-                                    int spec_len,
-                                    const Parsed& parsed,
-                                    CharsetConverter* query_converter,
-                                    CanonOutput* output,
-                                    Parsed* new_parsed);
-URL_EXPORT bool CanonicalizeFileURL(const base::char16* spec,
-                                    int spec_len,
-                                    const Parsed& parsed,
-                                    CharsetConverter* query_converter,
-                                    CanonOutput* output,
-                                    Parsed* new_parsed);
+COMPONENT_EXPORT(URL)
+bool CanonicalizeFileURL(const char* spec,
+                         int spec_len,
+                         const Parsed& parsed,
+                         CharsetConverter* query_converter,
+                         CanonOutput* output,
+                         Parsed* new_parsed);
+COMPONENT_EXPORT(URL)
+bool CanonicalizeFileURL(const char16_t* spec,
+                         int spec_len,
+                         const Parsed& parsed,
+                         CharsetConverter* query_converter,
+                         CanonOutput* output,
+                         Parsed* new_parsed);
 
 // Use for filesystem URLs.
-URL_EXPORT bool CanonicalizeFileSystemURL(const char* spec,
-                                          int spec_len,
-                                          const Parsed& parsed,
-                                          CharsetConverter* query_converter,
-                                          CanonOutput* output,
-                                          Parsed* new_parsed);
-URL_EXPORT bool CanonicalizeFileSystemURL(const base::char16* spec,
-                                          int spec_len,
-                                          const Parsed& parsed,
-                                          CharsetConverter* query_converter,
-                                          CanonOutput* output,
-                                          Parsed* new_parsed);
+COMPONENT_EXPORT(URL)
+bool CanonicalizeFileSystemURL(const char* spec,
+                               int spec_len,
+                               const Parsed& parsed,
+                               CharsetConverter* query_converter,
+                               CanonOutput* output,
+                               Parsed* new_parsed);
+COMPONENT_EXPORT(URL)
+bool CanonicalizeFileSystemURL(const char16_t* spec,
+                               int spec_len,
+                               const Parsed& parsed,
+                               CharsetConverter* query_converter,
+                               CanonOutput* output,
+                               Parsed* new_parsed);
 
 // Use for path URLs such as javascript. This does not modify the path in any
 // way, for example, by escaping it.
-URL_EXPORT bool CanonicalizePathURL(const char* spec,
-                                    int spec_len,
-                                    const Parsed& parsed,
-                                    CanonOutput* output,
-                                    Parsed* new_parsed);
-URL_EXPORT bool CanonicalizePathURL(const base::char16* spec,
-                                    int spec_len,
-                                    const Parsed& parsed,
-                                    CanonOutput* output,
-                                    Parsed* new_parsed);
+COMPONENT_EXPORT(URL)
+bool CanonicalizePathURL(const char* spec,
+                         int spec_len,
+                         const Parsed& parsed,
+                         CanonOutput* output,
+                         Parsed* new_parsed);
+COMPONENT_EXPORT(URL)
+bool CanonicalizePathURL(const char16_t* spec,
+                         int spec_len,
+                         const Parsed& parsed,
+                         CanonOutput* output,
+                         Parsed* new_parsed);
+
+// Use to canonicalize just the path component of a "path" URL; e.g. the
+// path of a javascript URL.
+COMPONENT_EXPORT(URL)
+void CanonicalizePathURLPath(const char* source,
+                             const Component& component,
+                             CanonOutput* output,
+                             Component* new_component);
+COMPONENT_EXPORT(URL)
+void CanonicalizePathURLPath(const char16_t* source,
+                             const Component& component,
+                             CanonOutput* output,
+                             Component* new_component);
 
 // Use for mailto URLs. This "canonicalizes" the URL into a path and query
 // component. It does not attempt to merge "to" fields. It uses UTF-8 for
 // the query encoding if there is a query. This is because a mailto URL is
 // really intended for an external mail program, and the encoding of a page,
 // etc. which would influence a query encoding normally are irrelevant.
-URL_EXPORT bool CanonicalizeMailtoURL(const char* spec,
-                                      int spec_len,
-                                      const Parsed& parsed,
-                                      CanonOutput* output,
-                                      Parsed* new_parsed);
-URL_EXPORT bool CanonicalizeMailtoURL(const base::char16* spec,
-                                      int spec_len,
-                                      const Parsed& parsed,
-                                      CanonOutput* output,
-                                      Parsed* new_parsed);
+COMPONENT_EXPORT(URL)
+bool CanonicalizeMailtoURL(const char* spec,
+                           int spec_len,
+                           const Parsed& parsed,
+                           CanonOutput* output,
+                           Parsed* new_parsed);
+COMPONENT_EXPORT(URL)
+bool CanonicalizeMailtoURL(const char16_t* spec,
+                           int spec_len,
+                           const Parsed& parsed,
+                           CanonOutput* output,
+                           Parsed* new_parsed);
 
 // Part replacer --------------------------------------------------------------
 
@@ -658,21 +712,20 @@ URL_EXPORT bool CanonicalizeMailtoURL(const base::char16* spec,
 // This structures does not own any data. It is the caller's responsibility to
 // ensure that the data the pointers point to stays in scope and is not
 // modified.
-template<typename CHAR>
+template <typename CHAR>
 struct URLComponentSource {
   // Constructor normally used by callers wishing to replace components. This
   // will make them all NULL, which is no replacement. The caller would then
   // override the components they want to replace.
   URLComponentSource()
-      : scheme(NULL),
-        username(NULL),
-        password(NULL),
-        host(NULL),
-        port(NULL),
-        path(NULL),
-        query(NULL),
-        ref(NULL) {
-  }
+      : scheme(nullptr),
+        username(nullptr),
+        password(nullptr),
+        host(nullptr),
+        port(nullptr),
+        path(nullptr),
+        query(nullptr),
+        ref(nullptr) {}
 
   // Constructor normally used internally to initialize all the components to
   // point to the same spec.
@@ -684,17 +737,32 @@ struct URLComponentSource {
         port(default_value),
         path(default_value),
         query(default_value),
-        ref(default_value) {
-  }
-
-  const CHAR* scheme;
-  const CHAR* username;
-  const CHAR* password;
-  const CHAR* host;
-  const CHAR* port;
-  const CHAR* path;
-  const CHAR* query;
-  const CHAR* ref;
+        ref(default_value) {}
+
+  // This field is not a raw_ptr<> because it was filtered by the rewriter for:
+  // #addr-of
+  RAW_PTR_EXCLUSION const CHAR* scheme;
+  // This field is not a raw_ptr<> because it was filtered by the rewriter for:
+  // #addr-of
+  RAW_PTR_EXCLUSION const CHAR* username;
+  // This field is not a raw_ptr<> because it was filtered by the rewriter for:
+  // #addr-of
+  RAW_PTR_EXCLUSION const CHAR* password;
+  // This field is not a raw_ptr<> because it was filtered by the rewriter for:
+  // #addr-of
+  RAW_PTR_EXCLUSION const CHAR* host;
+  // This field is not a raw_ptr<> because it was filtered by the rewriter for:
+  // #addr-of
+  RAW_PTR_EXCLUSION const CHAR* port;
+  // This field is not a raw_ptr<> because it was filtered by the rewriter for:
+  // #addr-of
+  RAW_PTR_EXCLUSION const CHAR* path;
+  // This field is not a raw_ptr<> because it was filtered by the rewriter for:
+  // #addr-of
+  RAW_PTR_EXCLUSION const CHAR* query;
+  // This field is not a raw_ptr<> because it was filtered by the rewriter for:
+  // #addr-of
+  RAW_PTR_EXCLUSION const CHAR* ref;
 };
 
 // This structure encapsulates information on modifying a URL. Each component
@@ -707,11 +775,10 @@ struct URLComponentSource {
 // IN SCOPE BY THE CALLER for as long as this object exists!
 //
 // Prefer the 8-bit replacement version if possible since it is more efficient.
-template<typename CHAR>
+template <typename CHAR>
 class Replacements {
  public:
-  Replacements() {
-  }
+  Replacements() {}
 
   // Scheme
   void SetScheme(const CHAR* s, const Component& comp) {
@@ -826,78 +893,86 @@ class Replacements {
 };
 
 // The base must be an 8-bit canonical URL.
-URL_EXPORT bool ReplaceStandardURL(const char* base,
-                                   const Parsed& base_parsed,
-                                   const Replacements<char>& replacements,
-                                   SchemeType scheme_type,
-                                   CharsetConverter* query_converter,
-                                   CanonOutput* output,
-                                   Parsed* new_parsed);
-URL_EXPORT bool ReplaceStandardURL(
-    const char* base,
-    const Parsed& base_parsed,
-    const Replacements<base::char16>& replacements,
-    SchemeType scheme_type,
-    CharsetConverter* query_converter,
-    CanonOutput* output,
-    Parsed* new_parsed);
+COMPONENT_EXPORT(URL)
+bool ReplaceStandardURL(const char* base,
+                        const Parsed& base_parsed,
+                        const Replacements<char>& replacements,
+                        SchemeType scheme_type,
+                        CharsetConverter* query_converter,
+                        CanonOutput* output,
+                        Parsed* new_parsed);
+COMPONENT_EXPORT(URL)
+bool ReplaceStandardURL(const char* base,
+                        const Parsed& base_parsed,
+                        const Replacements<char16_t>& replacements,
+                        SchemeType scheme_type,
+                        CharsetConverter* query_converter,
+                        CanonOutput* output,
+                        Parsed* new_parsed);
 
 // Filesystem URLs can only have the path, query, or ref replaced.
 // All other components will be ignored.
-URL_EXPORT bool ReplaceFileSystemURL(const char* base,
-                                     const Parsed& base_parsed,
-                                     const Replacements<char>& replacements,
-                                     CharsetConverter* query_converter,
-                                     CanonOutput* output,
-                                     Parsed* new_parsed);
-URL_EXPORT bool ReplaceFileSystemURL(
-    const char* base,
-    const Parsed& base_parsed,
-    const Replacements<base::char16>& replacements,
-    CharsetConverter* query_converter,
-    CanonOutput* output,
-    Parsed* new_parsed);
+COMPONENT_EXPORT(URL)
+bool ReplaceFileSystemURL(const char* base,
+                          const Parsed& base_parsed,
+                          const Replacements<char>& replacements,
+                          CharsetConverter* query_converter,
+                          CanonOutput* output,
+                          Parsed* new_parsed);
+COMPONENT_EXPORT(URL)
+bool ReplaceFileSystemURL(const char* base,
+                          const Parsed& base_parsed,
+                          const Replacements<char16_t>& replacements,
+                          CharsetConverter* query_converter,
+                          CanonOutput* output,
+                          Parsed* new_parsed);
 
 // Replacing some parts of a file URL is not permitted. Everything except
 // the host, path, query, and ref will be ignored.
-URL_EXPORT bool ReplaceFileURL(const char* base,
-                               const Parsed& base_parsed,
-                               const Replacements<char>& replacements,
-                               CharsetConverter* query_converter,
-                               CanonOutput* output,
-                               Parsed* new_parsed);
-URL_EXPORT bool ReplaceFileURL(const char* base,
-                               const Parsed& base_parsed,
-                               const Replacements<base::char16>& replacements,
-                               CharsetConverter* query_converter,
-                               CanonOutput* output,
-                               Parsed* new_parsed);
+COMPONENT_EXPORT(URL)
+bool ReplaceFileURL(const char* base,
+                    const Parsed& base_parsed,
+                    const Replacements<char>& replacements,
+                    CharsetConverter* query_converter,
+                    CanonOutput* output,
+                    Parsed* new_parsed);
+COMPONENT_EXPORT(URL)
+bool ReplaceFileURL(const char* base,
+                    const Parsed& base_parsed,
+                    const Replacements<char16_t>& replacements,
+                    CharsetConverter* query_converter,
+                    CanonOutput* output,
+                    Parsed* new_parsed);
 
 // Path URLs can only have the scheme and path replaced. All other components
 // will be ignored.
-URL_EXPORT bool ReplacePathURL(const char* base,
-                               const Parsed& base_parsed,
-                               const Replacements<char>& replacements,
-                               CanonOutput* output,
-                               Parsed* new_parsed);
-URL_EXPORT bool ReplacePathURL(const char* base,
-                               const Parsed& base_parsed,
-                               const Replacements<base::char16>& replacements,
-                               CanonOutput* output,
-                               Parsed* new_parsed);
+COMPONENT_EXPORT(URL)
+bool ReplacePathURL(const char* base,
+                    const Parsed& base_parsed,
+                    const Replacements<char>& replacements,
+                    CanonOutput* output,
+                    Parsed* new_parsed);
+COMPONENT_EXPORT(URL)
+bool ReplacePathURL(const char* base,
+                    const Parsed& base_parsed,
+                    const Replacements<char16_t>& replacements,
+                    CanonOutput* output,
+                    Parsed* new_parsed);
 
 // Mailto URLs can only have the scheme, path, and query replaced.
 // All other components will be ignored.
-URL_EXPORT bool ReplaceMailtoURL(const char* base,
-                                 const Parsed& base_parsed,
-                                 const Replacements<char>& replacements,
-                                 CanonOutput* output,
-                                 Parsed* new_parsed);
-URL_EXPORT bool ReplaceMailtoURL(const char* base,
-                                 const Parsed& base_parsed,
-                                 const Replacements<base::char16>& replacements,
-                                 CanonOutput* output,
-                                 Parsed* new_parsed);
+COMPONENT_EXPORT(URL)
+bool ReplaceMailtoURL(const char* base,
+                      const Parsed& base_parsed,
+                      const Replacements<char>& replacements,
+                      CanonOutput* output,
+                      Parsed* new_parsed);
+COMPONENT_EXPORT(URL)
+bool ReplaceMailtoURL(const char* base,
+                      const Parsed& base_parsed,
+                      const Replacements<char16_t>& replacements,
+                      CanonOutput* output,
+                      Parsed* new_parsed);
 
 // Relative URL ---------------------------------------------------------------
 
@@ -912,20 +987,22 @@ URL_EXPORT bool ReplaceMailtoURL(const char* base,
 // not). Failure means that the combination of URLs doesn't make any sense.
 //
 // The base URL should always be canonical, therefore is ASCII.
-URL_EXPORT bool IsRelativeURL(const char* base,
-                              const Parsed& base_parsed,
-                              const char* fragment,
-                              int fragment_len,
-                              bool is_base_hierarchical,
-                              bool* is_relative,
-                              Component* relative_component);
-URL_EXPORT bool IsRelativeURL(const char* base,
-                              const Parsed& base_parsed,
-                              const base::char16* fragment,
-                              int fragment_len,
-                              bool is_base_hierarchical,
-                              bool* is_relative,
-                              Component* relative_component);
+COMPONENT_EXPORT(URL)
+bool IsRelativeURL(const char* base,
+                   const Parsed& base_parsed,
+                   const char* fragment,
+                   int fragment_len,
+                   bool is_base_hierarchical,
+                   bool* is_relative,
+                   Component* relative_component);
+COMPONENT_EXPORT(URL)
+bool IsRelativeURL(const char* base,
+                   const Parsed& base_parsed,
+                   const char16_t* fragment,
+                   int fragment_len,
+                   bool is_base_hierarchical,
+                   bool* is_relative,
+                   Component* relative_component);
 
 // Given a canonical parsed source URL, a URL fragment known to be relative,
 // and the identified relevant portion of the relative URL (computed by
@@ -945,22 +1022,24 @@ URL_EXPORT bool IsRelativeURL(const char* base,
 // Returns true on success. On failure, the output will be "something
 // reasonable" that will be consistent and valid, just probably not what
 // was intended by the web page author or caller.
-URL_EXPORT bool ResolveRelativeURL(const char* base_url,
-                                   const Parsed& base_parsed,
-                                   bool base_is_file,
-                                   const char* relative_url,
-                                   const Component& relative_component,
-                                   CharsetConverter* query_converter,
-                                   CanonOutput* output,
-                                   Parsed* out_parsed);
-URL_EXPORT bool ResolveRelativeURL(const char* base_url,
-                                   const Parsed& base_parsed,
-                                   bool base_is_file,
-                                   const base::char16* relative_url,
-                                   const Component& relative_component,
-                                   CharsetConverter* query_converter,
-                                   CanonOutput* output,
-                                   Parsed* out_parsed);
+COMPONENT_EXPORT(URL)
+bool ResolveRelativeURL(const char* base_url,
+                        const Parsed& base_parsed,
+                        bool base_is_file,
+                        const char* relative_url,
+                        const Component& relative_component,
+                        CharsetConverter* query_converter,
+                        CanonOutput* output,
+                        Parsed* out_parsed);
+COMPONENT_EXPORT(URL)
+bool ResolveRelativeURL(const char* base_url,
+                        const Parsed& base_parsed,
+                        bool base_is_file,
+                        const char16_t* relative_url,
+                        const Component& relative_component,
+                        CharsetConverter* query_converter,
+                        CanonOutput* output,
+                        Parsed* out_parsed);
 
 }  // namespace url