-// Copyright 2013 The Chromium Authors. All rights reserved.
+// Copyright 2013 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include <stdlib.h>
#include <string.h>
+#include <string_view>
+
+#include "base/component_export.h"
#include "base/export_template.h"
-#include "base/strings/string16.h"
+#include "base/memory/raw_ptr_exclusion.h"
+#include "base/numerics/clamped_math.h"
#include "url/third_party/mozilla/url_parse.h"
-#include "url/url_export.h"
namespace url {
// resize function that is called when the existing buffer is not big enough.
// The derived class is then in charge of setting up our buffer which we will
// manage.
-template<typename T>
+template <typename T>
class CanonOutputT {
public:
- CanonOutputT() : buffer_(NULL), buffer_len_(0), cur_len_(0) {
- }
- virtual ~CanonOutputT() {
- }
+ CanonOutputT() = default;
+ virtual ~CanonOutputT() = default;
// Implemented to resize the buffer. This function should update the buffer
// pointer to point to the new buffer, and any old data up to |cur_len_| in
// the buffer must be copied over.
//
// The new size |sz| must be larger than buffer_len_.
- virtual void Resize(int sz) = 0;
+ virtual void Resize(size_t sz) = 0;
// Accessor for returning a character at a given position. The input offset
// must be in the valid range.
- inline T at(int offset) const {
- return buffer_[offset];
- }
+ inline T at(size_t offset) const { return buffer_[offset]; }
// Sets the character at the given position. The given position MUST be less
// than the length().
- inline void set(int offset, T ch) {
- buffer_[offset] = ch;
- }
+ inline void set(size_t offset, T ch) { buffer_[offset] = ch; }
// Returns the number of characters currently in the buffer.
- inline int length() const {
- return cur_len_;
- }
+ inline size_t length() const { return cur_len_; }
// Returns the current capacity of the buffer. The length() is the number of
// characters that have been declared to be written, but the capacity() is
// the number that can be written without reallocation. If the caller must
// write many characters at once, it can make sure there is enough capacity,
// write the data, then use set_size() to declare the new length().
- int capacity() const {
- return buffer_len_;
+ size_t capacity() const { return buffer_len_; }
+
+ // Returns the contents of the buffer as a string_view.
+ std::basic_string_view<T> view() const {
+ return std::basic_string_view<T>(data(), length());
}
// Called by the user of this class to get the output. The output will NOT
// be NULL-terminated. Call length() to get the
// length.
- const T* data() const {
- return buffer_;
- }
- T* data() {
- return buffer_;
- }
+ const T* data() const { return buffer_; }
+ T* data() { return buffer_; }
// Shortens the URL to the new length. Used for "backing up" when processing
// relative paths. This can also be used if an external function writes a lot
// to declare the new length.
//
// This MUST NOT be used to expand the size of the buffer beyond capacity().
- void set_length(int new_len) {
- cur_len_ = new_len;
- }
+ void set_length(size_t new_len) { cur_len_ = new_len; }
// This is the most performance critical function, since it is called for
// every character.
}
// Appends the given string to the output.
- void Append(const T* str, int str_len) {
- if (cur_len_ + str_len > buffer_len_) {
- if (!Grow(cur_len_ + str_len - buffer_len_))
+ void Append(const T* str, size_t str_len) {
+ if (str_len > buffer_len_ - cur_len_) {
+ if (!Grow(str_len - (buffer_len_ - cur_len_)))
return;
}
- for (int i = 0; i < str_len; i++)
- buffer_[cur_len_ + i] = str[i];
+ memcpy(buffer_ + cur_len_, str, str_len * sizeof(T));
cur_len_ += str_len;
}
- void ReserveSizeIfNeeded(int estimated_size) {
+ void Append(std::basic_string_view<T> str) { Append(str.data(), str.size()); }
+
+ void ReserveSizeIfNeeded(size_t estimated_size) {
// Reserve a bit extra to account for escaped chars.
if (estimated_size > buffer_len_)
- Resize(estimated_size + 8);
+ Resize((base::ClampedNumeric<size_t>(estimated_size) + 8).RawValue());
}
protected:
// Grows the given buffer so that it can fit at least |min_additional|
// characters. Returns true if the buffer could be resized, false on OOM.
- bool Grow(int min_additional) {
- static const int kMinBufferLen = 16;
- int new_len = (buffer_len_ == 0) ? kMinBufferLen : buffer_len_;
+ bool Grow(size_t min_additional) {
+ static const size_t kMinBufferLen = 16;
+ size_t new_len = (buffer_len_ == 0) ? kMinBufferLen : buffer_len_;
do {
if (new_len >= (1 << 30)) // Prevent overflow below.
return false;
return true;
}
- T* buffer_;
- int buffer_len_;
+ // `buffer_` is not a raw_ptr<...> for performance reasons (based on analysis
+ // of sampling profiler data).
+ RAW_PTR_EXCLUSION T* buffer_ = nullptr;
+ size_t buffer_len_ = 0;
// Used characters in the buffer.
- int cur_len_;
+ size_t cur_len_ = 0;
};
// Simple implementation of the CanonOutput using new[]. This class
// also supports a static buffer so if it is allocated on the stack, most
// URLs can be canonicalized with no heap allocations.
-template<typename T, int fixed_capacity = 1024>
+template <typename T, int fixed_capacity = 1024>
class RawCanonOutputT : public CanonOutputT<T> {
public:
RawCanonOutputT() : CanonOutputT<T>() {
this->buffer_ = fixed_buffer_;
this->buffer_len_ = fixed_capacity;
}
- virtual ~RawCanonOutputT() {
+ ~RawCanonOutputT() override {
if (this->buffer_ != fixed_buffer_)
delete[] this->buffer_;
}
- void Resize(int sz) override {
+ void Resize(size_t sz) override {
T* new_buf = new T[sz];
memcpy(new_buf, this->buffer_,
sizeof(T) * (this->cur_len_ < sz ? this->cur_len_ : sz));
};
// Explicitely instantiate commonly used instatiations.
-extern template class EXPORT_TEMPLATE_DECLARE(URL_EXPORT) CanonOutputT<char>;
-extern template class EXPORT_TEMPLATE_DECLARE(URL_EXPORT)
- CanonOutputT<base::char16>;
+extern template class EXPORT_TEMPLATE_DECLARE(COMPONENT_EXPORT(URL))
+ CanonOutputT<char>;
+extern template class EXPORT_TEMPLATE_DECLARE(COMPONENT_EXPORT(URL))
+ CanonOutputT<char16_t>;
// Normally, all canonicalization output is in narrow characters. We support
// the templates so it can also be used internally if a wide buffer is
// required.
typedef CanonOutputT<char> CanonOutput;
-typedef CanonOutputT<base::char16> CanonOutputW;
+typedef CanonOutputT<char16_t> CanonOutputW;
-template<int fixed_capacity>
+template <int fixed_capacity>
class RawCanonOutput : public RawCanonOutputT<char, fixed_capacity> {};
-template<int fixed_capacity>
-class RawCanonOutputW : public RawCanonOutputT<base::char16, fixed_capacity> {};
+template <int fixed_capacity>
+class RawCanonOutputW : public RawCanonOutputT<char16_t, fixed_capacity> {};
// Character set converter ----------------------------------------------------
//
//
// Embedders will want to see the unit test for the ICU version.
-class URL_EXPORT CharsetConverter {
+class COMPONENT_EXPORT(URL) CharsetConverter {
public:
CharsetConverter() {}
virtual ~CharsetConverter() {}
// decimal, (such as "你") with escaping of the ampersand, number
// sign, and semicolon (in the previous example it would be
// "%26%2320320%3B"). This rule is based on what IE does in this situation.
- virtual void ConvertFromUTF16(const base::char16* input,
+ virtual void ConvertFromUTF16(const char16_t* input,
int input_len,
CanonOutput* output) = 0;
};
// If |input| contained both removable whitespace and a raw `<` character,
// |potentially_dangling_markup| will be set to `true`. Otherwise, it will be
// left untouched.
-URL_EXPORT const char* RemoveURLWhitespace(const char* input,
- int input_len,
- CanonOutputT<char>* buffer,
- int* output_len,
- bool* potentially_dangling_markup);
-URL_EXPORT const base::char16* RemoveURLWhitespace(
- const base::char16* input,
- int input_len,
- CanonOutputT<base::char16>* buffer,
- int* output_len,
- bool* potentially_dangling_markup);
+COMPONENT_EXPORT(URL)
+const char* RemoveURLWhitespace(const char* input,
+ int input_len,
+ CanonOutputT<char>* buffer,
+ int* output_len,
+ bool* potentially_dangling_markup);
+COMPONENT_EXPORT(URL)
+const char16_t* RemoveURLWhitespace(const char16_t* input,
+ int input_len,
+ CanonOutputT<char16_t>* buffer,
+ int* output_len,
+ bool* potentially_dangling_markup);
// IDN ------------------------------------------------------------------------
// the length of the output will be set to the length of the new host name.
//
// On error, returns false. The output in this case is undefined.
-URL_EXPORT bool IDNToASCII(const base::char16* src,
- int src_len,
- CanonOutputW* output);
+COMPONENT_EXPORT(URL)
+bool IDNToASCII(std::u16string_view src, CanonOutputW* output);
// Piece-by-piece canonicalizers ----------------------------------------------
//
// These individual canonicalizers append the canonicalized versions of the
-// corresponding URL component to the given std::string. The spec and the
+// corresponding URL component to the given CanonOutput. The spec and the
// previously-identified range of that component are the input. The range of
// the canonicalized component will be written to the output component.
//
// URLs.
//
// The 8-bit version requires UTF-8 encoding.
-URL_EXPORT bool CanonicalizeScheme(const char* spec,
- const Component& scheme,
- CanonOutput* output,
- Component* out_scheme);
-URL_EXPORT bool CanonicalizeScheme(const base::char16* spec,
- const Component& scheme,
- CanonOutput* output,
- Component* out_scheme);
+COMPONENT_EXPORT(URL)
+bool CanonicalizeScheme(const char* spec,
+ const Component& scheme,
+ CanonOutput* output,
+ Component* out_scheme);
+COMPONENT_EXPORT(URL)
+bool CanonicalizeScheme(const char16_t* spec,
+ const Component& scheme,
+ CanonOutput* output,
+ Component* out_scheme);
// User info: username/password. If present, this will add the delimiters so
// the output will be "<username>:<password>@" or "<username>@". Empty
// is legal as long as the two components don't overlap.
//
// The 8-bit version requires UTF-8 encoding.
-URL_EXPORT bool CanonicalizeUserInfo(const char* username_source,
- const Component& username,
- const char* password_source,
- const Component& password,
- CanonOutput* output,
- Component* out_username,
- Component* out_password);
-URL_EXPORT bool CanonicalizeUserInfo(const base::char16* username_source,
- const Component& username,
- const base::char16* password_source,
- const Component& password,
- CanonOutput* output,
- Component* out_username,
- Component* out_password);
+COMPONENT_EXPORT(URL)
+bool CanonicalizeUserInfo(const char* username_source,
+ const Component& username,
+ const char* password_source,
+ const Component& password,
+ CanonOutput* output,
+ Component* out_username,
+ Component* out_password);
+COMPONENT_EXPORT(URL)
+bool CanonicalizeUserInfo(const char16_t* username_source,
+ const Component& username,
+ const char16_t* password_source,
+ const Component& password,
+ CanonOutput* output,
+ Component* out_username,
+ Component* out_password);
// This structure holds detailed state exported from the IP/Host canonicalizers.
// Additional fields may be added as callers require them.
// This field summarizes how the input was classified by the canonicalizer.
enum Family {
- NEUTRAL, // - Doesn't resemble an IP address. As far as the IP
- // canonicalizer is concerned, it should be treated as a
- // hostname.
- BROKEN, // - Almost an IP, but was not canonicalized. This could be an
- // IPv4 address where truncation occurred, or something
- // containing the special characters :[] which did not parse
- // as an IPv6 address. Never attempt to connect to this
- // address, because it might actually succeed!
- IPV4, // - Successfully canonicalized as an IPv4 address.
- IPV6, // - Successfully canonicalized as an IPv6 address.
+ NEUTRAL, // - Doesn't resemble an IP address. As far as the IP
+ // canonicalizer is concerned, it should be treated as a
+ // hostname.
+ BROKEN, // - Almost an IP, but was not canonicalized. This could be an
+ // IPv4 address where truncation occurred, or something
+ // containing the special characters :[] which did not parse
+ // as an IPv6 address. Never attempt to connect to this
+ // address, because it might actually succeed!
+ IPV4, // - Successfully canonicalized as an IPv4 address.
+ IPV6, // - Successfully canonicalized as an IPv6 address.
};
Family family;
}
};
-
// Host.
//
// The 8-bit version requires UTF-8 encoding. Use this version when you only
// need to know whether canonicalization succeeded.
-URL_EXPORT bool CanonicalizeHost(const char* spec,
- const Component& host,
- CanonOutput* output,
- Component* out_host);
-URL_EXPORT bool CanonicalizeHost(const base::char16* spec,
- const Component& host,
- CanonOutput* output,
- Component* out_host);
+COMPONENT_EXPORT(URL)
+bool CanonicalizeHost(const char* spec,
+ const Component& host,
+ CanonOutput* output,
+ Component* out_host);
+COMPONENT_EXPORT(URL)
+bool CanonicalizeHost(const char16_t* spec,
+ const Component& host,
+ CanonOutput* output,
+ Component* out_host);
// Extended version of CanonicalizeHost, which returns additional information.
// Use this when you need to know whether the hostname was an IP address.
// A successful return is indicated by host_info->family != BROKEN. See the
// definition of CanonHostInfo above for details.
-URL_EXPORT void CanonicalizeHostVerbose(const char* spec,
- const Component& host,
- CanonOutput* output,
- CanonHostInfo* host_info);
-URL_EXPORT void CanonicalizeHostVerbose(const base::char16* spec,
- const Component& host,
- CanonOutput* output,
- CanonHostInfo* host_info);
+COMPONENT_EXPORT(URL)
+void CanonicalizeHostVerbose(const char* spec,
+ const Component& host,
+ CanonOutput* output,
+ CanonHostInfo* host_info);
+COMPONENT_EXPORT(URL)
+void CanonicalizeHostVerbose(const char16_t* spec,
+ const Component& host,
+ CanonOutput* output,
+ CanonHostInfo* host_info);
// Canonicalizes a string according to the host canonicalization rules. Unlike
// CanonicalizeHost, this will not check for IP addresses which can change the
// Returns true if the host was valid. This function will treat a 0-length
// host as valid (because it's designed to be used for substrings) while the
// full version above will mark empty hosts as broken.
-URL_EXPORT bool CanonicalizeHostSubstring(const char* spec,
- const Component& host,
- CanonOutput* output);
-URL_EXPORT bool CanonicalizeHostSubstring(const base::char16* spec,
- const Component& host,
- CanonOutput* output);
+COMPONENT_EXPORT(URL)
+bool CanonicalizeHostSubstring(const char* spec,
+ const Component& host,
+ CanonOutput* output);
+COMPONENT_EXPORT(URL)
+bool CanonicalizeHostSubstring(const char16_t* spec,
+ const Component& host,
+ CanonOutput* output);
// IP addresses.
//
// This is called AUTOMATICALLY from the host canonicalizer, which ensures that
// the input is unescaped and name-prepped, etc. It should not normally be
// necessary or wise to call this directly.
-URL_EXPORT void CanonicalizeIPAddress(const char* spec,
- const Component& host,
- CanonOutput* output,
- CanonHostInfo* host_info);
-URL_EXPORT void CanonicalizeIPAddress(const base::char16* spec,
- const Component& host,
- CanonOutput* output,
- CanonHostInfo* host_info);
+COMPONENT_EXPORT(URL)
+void CanonicalizeIPAddress(const char* spec,
+ const Component& host,
+ CanonOutput* output,
+ CanonHostInfo* host_info);
+COMPONENT_EXPORT(URL)
+void CanonicalizeIPAddress(const char16_t* spec,
+ const Component& host,
+ CanonOutput* output,
+ CanonHostInfo* host_info);
// Port: this function will add the colon for the port if a port is present.
// The caller can pass PORT_UNSPECIFIED as the
// default_port_for_scheme argument if there is no default port.
//
// The 8-bit version requires UTF-8 encoding.
-URL_EXPORT bool CanonicalizePort(const char* spec,
- const Component& port,
- int default_port_for_scheme,
- CanonOutput* output,
- Component* out_port);
-URL_EXPORT bool CanonicalizePort(const base::char16* spec,
- const Component& port,
- int default_port_for_scheme,
- CanonOutput* output,
- Component* out_port);
+COMPONENT_EXPORT(URL)
+bool CanonicalizePort(const char* spec,
+ const Component& port,
+ int default_port_for_scheme,
+ CanonOutput* output,
+ Component* out_port);
+COMPONENT_EXPORT(URL)
+bool CanonicalizePort(const char16_t* spec,
+ const Component& port,
+ int default_port_for_scheme,
+ CanonOutput* output,
+ Component* out_port);
// Returns the default port for the given canonical scheme, or PORT_UNSPECIFIED
-// if the scheme is unknown.
-URL_EXPORT int DefaultPortForScheme(const char* scheme, int scheme_len);
+// if the scheme is unknown. Based on https://url.spec.whatwg.org/#default-port
+COMPONENT_EXPORT(URL)
+int DefaultPortForScheme(const char* scheme, int scheme_len);
// Path. If the input does not begin in a slash (including if the input is
// empty), we'll prepend a slash to the path to make it canonical.
// an issue. Somebody giving us an 8-bit path is responsible for generating
// the path that the server expects (we'll escape high-bit characters), so
// if something is invalid, it's their problem.
-URL_EXPORT bool CanonicalizePath(const char* spec,
- const Component& path,
- CanonOutput* output,
- Component* out_path);
-URL_EXPORT bool CanonicalizePath(const base::char16* spec,
- const Component& path,
- CanonOutput* output,
- Component* out_path);
+COMPONENT_EXPORT(URL)
+bool CanonicalizePath(const char* spec,
+ const Component& path,
+ CanonOutput* output,
+ Component* out_path);
+COMPONENT_EXPORT(URL)
+bool CanonicalizePath(const char16_t* spec,
+ const Component& path,
+ CanonOutput* output,
+ Component* out_path);
+
+// Like CanonicalizePath(), but does not assume that its operating on the
+// entire path. It therefore does not prepend a slash, etc.
+COMPONENT_EXPORT(URL)
+bool CanonicalizePartialPath(const char* spec,
+ const Component& path,
+ CanonOutput* output,
+ Component* out_path);
+COMPONENT_EXPORT(URL)
+bool CanonicalizePartialPath(const char16_t* spec,
+ const Component& path,
+ CanonOutput* output,
+ Component* out_path);
// Canonicalizes the input as a file path. This is like CanonicalizePath except
// that it also handles Windows drive specs. For example, the path can begin
// The string will be appended to |*output| and |*out_path| will be updated.
//
// The 8-bit version requires UTF-8 encoding.
-URL_EXPORT bool FileCanonicalizePath(const char* spec,
- const Component& path,
- CanonOutput* output,
- Component* out_path);
-URL_EXPORT bool FileCanonicalizePath(const base::char16* spec,
- const Component& path,
- CanonOutput* output,
- Component* out_path);
+COMPONENT_EXPORT(URL)
+bool FileCanonicalizePath(const char* spec,
+ const Component& path,
+ CanonOutput* output,
+ Component* out_path);
+COMPONENT_EXPORT(URL)
+bool FileCanonicalizePath(const char16_t* spec,
+ const Component& path,
+ CanonOutput* output,
+ Component* out_path);
// Query: Prepends the ? if needed.
//
// if necessary, for ASCII input, no conversions are necessary.
//
// The converter can be NULL. In this case, the output encoding will be UTF-8.
-URL_EXPORT void CanonicalizeQuery(const char* spec,
- const Component& query,
- CharsetConverter* converter,
- CanonOutput* output,
- Component* out_query);
-URL_EXPORT void CanonicalizeQuery(const base::char16* spec,
- const Component& query,
- CharsetConverter* converter,
- CanonOutput* output,
- Component* out_query);
+COMPONENT_EXPORT(URL)
+void CanonicalizeQuery(const char* spec,
+ const Component& query,
+ CharsetConverter* converter,
+ CanonOutput* output,
+ Component* out_query);
+COMPONENT_EXPORT(URL)
+void CanonicalizeQuery(const char16_t* spec,
+ const Component& query,
+ CharsetConverter* converter,
+ CanonOutput* output,
+ Component* out_query);
// Ref: Prepends the # if needed. The output will be UTF-8 (this is the only
// canonicalizer that does not produce ASCII output). The output is
//
// This function will not fail. If the input is invalid UTF-8/UTF-16, we'll use
// the "Unicode replacement character" for the confusing bits and copy the rest.
-URL_EXPORT void CanonicalizeRef(const char* spec,
- const Component& path,
- CanonOutput* output,
- Component* out_path);
-URL_EXPORT void CanonicalizeRef(const base::char16* spec,
- const Component& path,
- CanonOutput* output,
- Component* out_path);
+COMPONENT_EXPORT(URL)
+void CanonicalizeRef(const char* spec,
+ const Component& path,
+ CanonOutput* output,
+ Component* out_path);
+COMPONENT_EXPORT(URL)
+void CanonicalizeRef(const char16_t* spec,
+ const Component& path,
+ CanonOutput* output,
+ Component* out_path);
// Full canonicalizer ---------------------------------------------------------
//
// The 8-bit versions require UTF-8 encoding.
// Use for standard URLs with authorities and paths.
-URL_EXPORT bool CanonicalizeStandardURL(const char* spec,
- int spec_len,
- const Parsed& parsed,
- SchemeType scheme_type,
- CharsetConverter* query_converter,
- CanonOutput* output,
- Parsed* new_parsed);
-URL_EXPORT bool CanonicalizeStandardURL(const base::char16* spec,
- int spec_len,
- const Parsed& parsed,
- SchemeType scheme_type,
- CharsetConverter* query_converter,
- CanonOutput* output,
- Parsed* new_parsed);
+COMPONENT_EXPORT(URL)
+bool CanonicalizeStandardURL(const char* spec,
+ int spec_len,
+ const Parsed& parsed,
+ SchemeType scheme_type,
+ CharsetConverter* query_converter,
+ CanonOutput* output,
+ Parsed* new_parsed);
+COMPONENT_EXPORT(URL)
+bool CanonicalizeStandardURL(const char16_t* spec,
+ int spec_len,
+ const Parsed& parsed,
+ SchemeType scheme_type,
+ CharsetConverter* query_converter,
+ CanonOutput* output,
+ Parsed* new_parsed);
// Use for file URLs.
-URL_EXPORT bool CanonicalizeFileURL(const char* spec,
- int spec_len,
- const Parsed& parsed,
- CharsetConverter* query_converter,
- CanonOutput* output,
- Parsed* new_parsed);
-URL_EXPORT bool CanonicalizeFileURL(const base::char16* spec,
- int spec_len,
- const Parsed& parsed,
- CharsetConverter* query_converter,
- CanonOutput* output,
- Parsed* new_parsed);
+COMPONENT_EXPORT(URL)
+bool CanonicalizeFileURL(const char* spec,
+ int spec_len,
+ const Parsed& parsed,
+ CharsetConverter* query_converter,
+ CanonOutput* output,
+ Parsed* new_parsed);
+COMPONENT_EXPORT(URL)
+bool CanonicalizeFileURL(const char16_t* spec,
+ int spec_len,
+ const Parsed& parsed,
+ CharsetConverter* query_converter,
+ CanonOutput* output,
+ Parsed* new_parsed);
// Use for filesystem URLs.
-URL_EXPORT bool CanonicalizeFileSystemURL(const char* spec,
- int spec_len,
- const Parsed& parsed,
- CharsetConverter* query_converter,
- CanonOutput* output,
- Parsed* new_parsed);
-URL_EXPORT bool CanonicalizeFileSystemURL(const base::char16* spec,
- int spec_len,
- const Parsed& parsed,
- CharsetConverter* query_converter,
- CanonOutput* output,
- Parsed* new_parsed);
+COMPONENT_EXPORT(URL)
+bool CanonicalizeFileSystemURL(const char* spec,
+ int spec_len,
+ const Parsed& parsed,
+ CharsetConverter* query_converter,
+ CanonOutput* output,
+ Parsed* new_parsed);
+COMPONENT_EXPORT(URL)
+bool CanonicalizeFileSystemURL(const char16_t* spec,
+ int spec_len,
+ const Parsed& parsed,
+ CharsetConverter* query_converter,
+ CanonOutput* output,
+ Parsed* new_parsed);
// Use for path URLs such as javascript. This does not modify the path in any
// way, for example, by escaping it.
-URL_EXPORT bool CanonicalizePathURL(const char* spec,
- int spec_len,
- const Parsed& parsed,
- CanonOutput* output,
- Parsed* new_parsed);
-URL_EXPORT bool CanonicalizePathURL(const base::char16* spec,
- int spec_len,
- const Parsed& parsed,
- CanonOutput* output,
- Parsed* new_parsed);
+COMPONENT_EXPORT(URL)
+bool CanonicalizePathURL(const char* spec,
+ int spec_len,
+ const Parsed& parsed,
+ CanonOutput* output,
+ Parsed* new_parsed);
+COMPONENT_EXPORT(URL)
+bool CanonicalizePathURL(const char16_t* spec,
+ int spec_len,
+ const Parsed& parsed,
+ CanonOutput* output,
+ Parsed* new_parsed);
+
+// Use to canonicalize just the path component of a "path" URL; e.g. the
+// path of a javascript URL.
+COMPONENT_EXPORT(URL)
+void CanonicalizePathURLPath(const char* source,
+ const Component& component,
+ CanonOutput* output,
+ Component* new_component);
+COMPONENT_EXPORT(URL)
+void CanonicalizePathURLPath(const char16_t* source,
+ const Component& component,
+ CanonOutput* output,
+ Component* new_component);
// Use for mailto URLs. This "canonicalizes" the URL into a path and query
// component. It does not attempt to merge "to" fields. It uses UTF-8 for
// the query encoding if there is a query. This is because a mailto URL is
// really intended for an external mail program, and the encoding of a page,
// etc. which would influence a query encoding normally are irrelevant.
-URL_EXPORT bool CanonicalizeMailtoURL(const char* spec,
- int spec_len,
- const Parsed& parsed,
- CanonOutput* output,
- Parsed* new_parsed);
-URL_EXPORT bool CanonicalizeMailtoURL(const base::char16* spec,
- int spec_len,
- const Parsed& parsed,
- CanonOutput* output,
- Parsed* new_parsed);
+COMPONENT_EXPORT(URL)
+bool CanonicalizeMailtoURL(const char* spec,
+ int spec_len,
+ const Parsed& parsed,
+ CanonOutput* output,
+ Parsed* new_parsed);
+COMPONENT_EXPORT(URL)
+bool CanonicalizeMailtoURL(const char16_t* spec,
+ int spec_len,
+ const Parsed& parsed,
+ CanonOutput* output,
+ Parsed* new_parsed);
// Part replacer --------------------------------------------------------------
// This structures does not own any data. It is the caller's responsibility to
// ensure that the data the pointers point to stays in scope and is not
// modified.
-template<typename CHAR>
+template <typename CHAR>
struct URLComponentSource {
// Constructor normally used by callers wishing to replace components. This
// will make them all NULL, which is no replacement. The caller would then
// override the components they want to replace.
URLComponentSource()
- : scheme(NULL),
- username(NULL),
- password(NULL),
- host(NULL),
- port(NULL),
- path(NULL),
- query(NULL),
- ref(NULL) {
- }
+ : scheme(nullptr),
+ username(nullptr),
+ password(nullptr),
+ host(nullptr),
+ port(nullptr),
+ path(nullptr),
+ query(nullptr),
+ ref(nullptr) {}
// Constructor normally used internally to initialize all the components to
// point to the same spec.
port(default_value),
path(default_value),
query(default_value),
- ref(default_value) {
- }
-
- const CHAR* scheme;
- const CHAR* username;
- const CHAR* password;
- const CHAR* host;
- const CHAR* port;
- const CHAR* path;
- const CHAR* query;
- const CHAR* ref;
+ ref(default_value) {}
+
+ // This field is not a raw_ptr<> because it was filtered by the rewriter for:
+ // #addr-of
+ RAW_PTR_EXCLUSION const CHAR* scheme;
+ // This field is not a raw_ptr<> because it was filtered by the rewriter for:
+ // #addr-of
+ RAW_PTR_EXCLUSION const CHAR* username;
+ // This field is not a raw_ptr<> because it was filtered by the rewriter for:
+ // #addr-of
+ RAW_PTR_EXCLUSION const CHAR* password;
+ // This field is not a raw_ptr<> because it was filtered by the rewriter for:
+ // #addr-of
+ RAW_PTR_EXCLUSION const CHAR* host;
+ // This field is not a raw_ptr<> because it was filtered by the rewriter for:
+ // #addr-of
+ RAW_PTR_EXCLUSION const CHAR* port;
+ // This field is not a raw_ptr<> because it was filtered by the rewriter for:
+ // #addr-of
+ RAW_PTR_EXCLUSION const CHAR* path;
+ // This field is not a raw_ptr<> because it was filtered by the rewriter for:
+ // #addr-of
+ RAW_PTR_EXCLUSION const CHAR* query;
+ // This field is not a raw_ptr<> because it was filtered by the rewriter for:
+ // #addr-of
+ RAW_PTR_EXCLUSION const CHAR* ref;
};
// This structure encapsulates information on modifying a URL. Each component
// IN SCOPE BY THE CALLER for as long as this object exists!
//
// Prefer the 8-bit replacement version if possible since it is more efficient.
-template<typename CHAR>
+template <typename CHAR>
class Replacements {
public:
- Replacements() {
- }
+ Replacements() {}
// Scheme
void SetScheme(const CHAR* s, const Component& comp) {
};
// The base must be an 8-bit canonical URL.
-URL_EXPORT bool ReplaceStandardURL(const char* base,
- const Parsed& base_parsed,
- const Replacements<char>& replacements,
- SchemeType scheme_type,
- CharsetConverter* query_converter,
- CanonOutput* output,
- Parsed* new_parsed);
-URL_EXPORT bool ReplaceStandardURL(
- const char* base,
- const Parsed& base_parsed,
- const Replacements<base::char16>& replacements,
- SchemeType scheme_type,
- CharsetConverter* query_converter,
- CanonOutput* output,
- Parsed* new_parsed);
+COMPONENT_EXPORT(URL)
+bool ReplaceStandardURL(const char* base,
+ const Parsed& base_parsed,
+ const Replacements<char>& replacements,
+ SchemeType scheme_type,
+ CharsetConverter* query_converter,
+ CanonOutput* output,
+ Parsed* new_parsed);
+COMPONENT_EXPORT(URL)
+bool ReplaceStandardURL(const char* base,
+ const Parsed& base_parsed,
+ const Replacements<char16_t>& replacements,
+ SchemeType scheme_type,
+ CharsetConverter* query_converter,
+ CanonOutput* output,
+ Parsed* new_parsed);
// Filesystem URLs can only have the path, query, or ref replaced.
// All other components will be ignored.
-URL_EXPORT bool ReplaceFileSystemURL(const char* base,
- const Parsed& base_parsed,
- const Replacements<char>& replacements,
- CharsetConverter* query_converter,
- CanonOutput* output,
- Parsed* new_parsed);
-URL_EXPORT bool ReplaceFileSystemURL(
- const char* base,
- const Parsed& base_parsed,
- const Replacements<base::char16>& replacements,
- CharsetConverter* query_converter,
- CanonOutput* output,
- Parsed* new_parsed);
+COMPONENT_EXPORT(URL)
+bool ReplaceFileSystemURL(const char* base,
+ const Parsed& base_parsed,
+ const Replacements<char>& replacements,
+ CharsetConverter* query_converter,
+ CanonOutput* output,
+ Parsed* new_parsed);
+COMPONENT_EXPORT(URL)
+bool ReplaceFileSystemURL(const char* base,
+ const Parsed& base_parsed,
+ const Replacements<char16_t>& replacements,
+ CharsetConverter* query_converter,
+ CanonOutput* output,
+ Parsed* new_parsed);
// Replacing some parts of a file URL is not permitted. Everything except
// the host, path, query, and ref will be ignored.
-URL_EXPORT bool ReplaceFileURL(const char* base,
- const Parsed& base_parsed,
- const Replacements<char>& replacements,
- CharsetConverter* query_converter,
- CanonOutput* output,
- Parsed* new_parsed);
-URL_EXPORT bool ReplaceFileURL(const char* base,
- const Parsed& base_parsed,
- const Replacements<base::char16>& replacements,
- CharsetConverter* query_converter,
- CanonOutput* output,
- Parsed* new_parsed);
+COMPONENT_EXPORT(URL)
+bool ReplaceFileURL(const char* base,
+ const Parsed& base_parsed,
+ const Replacements<char>& replacements,
+ CharsetConverter* query_converter,
+ CanonOutput* output,
+ Parsed* new_parsed);
+COMPONENT_EXPORT(URL)
+bool ReplaceFileURL(const char* base,
+ const Parsed& base_parsed,
+ const Replacements<char16_t>& replacements,
+ CharsetConverter* query_converter,
+ CanonOutput* output,
+ Parsed* new_parsed);
// Path URLs can only have the scheme and path replaced. All other components
// will be ignored.
-URL_EXPORT bool ReplacePathURL(const char* base,
- const Parsed& base_parsed,
- const Replacements<char>& replacements,
- CanonOutput* output,
- Parsed* new_parsed);
-URL_EXPORT bool ReplacePathURL(const char* base,
- const Parsed& base_parsed,
- const Replacements<base::char16>& replacements,
- CanonOutput* output,
- Parsed* new_parsed);
+COMPONENT_EXPORT(URL)
+bool ReplacePathURL(const char* base,
+ const Parsed& base_parsed,
+ const Replacements<char>& replacements,
+ CanonOutput* output,
+ Parsed* new_parsed);
+COMPONENT_EXPORT(URL)
+bool ReplacePathURL(const char* base,
+ const Parsed& base_parsed,
+ const Replacements<char16_t>& replacements,
+ CanonOutput* output,
+ Parsed* new_parsed);
// Mailto URLs can only have the scheme, path, and query replaced.
// All other components will be ignored.
-URL_EXPORT bool ReplaceMailtoURL(const char* base,
- const Parsed& base_parsed,
- const Replacements<char>& replacements,
- CanonOutput* output,
- Parsed* new_parsed);
-URL_EXPORT bool ReplaceMailtoURL(const char* base,
- const Parsed& base_parsed,
- const Replacements<base::char16>& replacements,
- CanonOutput* output,
- Parsed* new_parsed);
+COMPONENT_EXPORT(URL)
+bool ReplaceMailtoURL(const char* base,
+ const Parsed& base_parsed,
+ const Replacements<char>& replacements,
+ CanonOutput* output,
+ Parsed* new_parsed);
+COMPONENT_EXPORT(URL)
+bool ReplaceMailtoURL(const char* base,
+ const Parsed& base_parsed,
+ const Replacements<char16_t>& replacements,
+ CanonOutput* output,
+ Parsed* new_parsed);
// Relative URL ---------------------------------------------------------------
// not). Failure means that the combination of URLs doesn't make any sense.
//
// The base URL should always be canonical, therefore is ASCII.
-URL_EXPORT bool IsRelativeURL(const char* base,
- const Parsed& base_parsed,
- const char* fragment,
- int fragment_len,
- bool is_base_hierarchical,
- bool* is_relative,
- Component* relative_component);
-URL_EXPORT bool IsRelativeURL(const char* base,
- const Parsed& base_parsed,
- const base::char16* fragment,
- int fragment_len,
- bool is_base_hierarchical,
- bool* is_relative,
- Component* relative_component);
+COMPONENT_EXPORT(URL)
+bool IsRelativeURL(const char* base,
+ const Parsed& base_parsed,
+ const char* fragment,
+ int fragment_len,
+ bool is_base_hierarchical,
+ bool* is_relative,
+ Component* relative_component);
+COMPONENT_EXPORT(URL)
+bool IsRelativeURL(const char* base,
+ const Parsed& base_parsed,
+ const char16_t* fragment,
+ int fragment_len,
+ bool is_base_hierarchical,
+ bool* is_relative,
+ Component* relative_component);
// Given a canonical parsed source URL, a URL fragment known to be relative,
// and the identified relevant portion of the relative URL (computed by
// Returns true on success. On failure, the output will be "something
// reasonable" that will be consistent and valid, just probably not what
// was intended by the web page author or caller.
-URL_EXPORT bool ResolveRelativeURL(const char* base_url,
- const Parsed& base_parsed,
- bool base_is_file,
- const char* relative_url,
- const Component& relative_component,
- CharsetConverter* query_converter,
- CanonOutput* output,
- Parsed* out_parsed);
-URL_EXPORT bool ResolveRelativeURL(const char* base_url,
- const Parsed& base_parsed,
- bool base_is_file,
- const base::char16* relative_url,
- const Component& relative_component,
- CharsetConverter* query_converter,
- CanonOutput* output,
- Parsed* out_parsed);
+COMPONENT_EXPORT(URL)
+bool ResolveRelativeURL(const char* base_url,
+ const Parsed& base_parsed,
+ bool base_is_file,
+ const char* relative_url,
+ const Component& relative_component,
+ CharsetConverter* query_converter,
+ CanonOutput* output,
+ Parsed* out_parsed);
+COMPONENT_EXPORT(URL)
+bool ResolveRelativeURL(const char* base_url,
+ const Parsed& base_parsed,
+ bool base_is_file,
+ const char16_t* relative_url,
+ const Component& relative_component,
+ CharsetConverter* query_converter,
+ CanonOutput* output,
+ Parsed* out_parsed);
} // namespace url