url/url_util.h

   1 // Copyright 2013 The Chromium Authors. All rights reserved.
   2 // Use of this source code is governed by a BSD-style license that can be
   3 // found in the LICENSE file.
   4
   5 #ifndef URL_URL_UTIL_H_
   6 #define URL_URL_UTIL_H_
   7
   8 #include <string>
   9 #include <vector>
  10
  11 #include "base/strings/string16.h"
  12 #include "base/strings/string_piece.h"
  13 #include "url/third_party/mozilla/url_parse.h"
  14 #include "url/url_canon.h"
  15 #include "url/url_constants.h"
  16 #include "url/url_export.h"
  17
  18 namespace url {
  19
  20 // Init ------------------------------------------------------------------------
  21
  22 // Initialization is NOT required, it will be implicitly initialized when first
  23 // used. However, this implicit initialization is NOT threadsafe. If you are
  24 // using this library in a threaded environment and don't have a consistent
  25 // "first call" (an example might be calling Add*Scheme with your special
  26 // application-specific schemes) then you will want to call initialize before
  27 // spawning any threads.
  28 //
  29 // It is OK to call this function more than once, subsequent calls will be
  30 // no-ops, unless Shutdown was called in the mean time. This will also be a
  31 // no-op if other calls to the library have forced an initialization beforehand.
  32 URL_EXPORT void Initialize();
  33
  34 // Cleanup is not required, except some strings may leak. For most user
  35 // applications, this is fine. If you're using it in a library that may get
  36 // loaded and unloaded, you'll want to unload to properly clean up your
  37 // library.
  38 URL_EXPORT void Shutdown();
  39
  40 // Schemes ---------------------------------------------------------------------
  41
  42 // A pair for representing a standard scheme name and the SchemeType for it.
  43 struct URL_EXPORT SchemeWithType {
  44   const char* scheme;
  45   SchemeType type;
  46 };
  47
  48 // The following Add*Scheme method are not threadsafe and can not be called
  49 // concurrently with any other url_util function. They will assert if the lists
  50 // of schemes have been locked (see LockSchemeRegistries).
  51
  52 // Adds an application-defined scheme to the internal list of "standard-format"
  53 // URL schemes. A standard-format scheme adheres to what RFC 3986 calls "generic
  54 // URI syntax" (https://tools.ietf.org/html/rfc3986#section-3).
  55
  56 URL_EXPORT void AddStandardScheme(const char* new_scheme,
  57                                   SchemeType scheme_type);
  58
  59 // Adds an application-defined scheme to the internal list of schemes allowed
  60 // for referrers.
  61 URL_EXPORT void AddReferrerScheme(const char* new_scheme,
  62                                   SchemeType scheme_type);
  63
  64 // Adds an application-defined scheme to the list of schemes that do not trigger
  65 // mixed content warnings.
  66 URL_EXPORT void AddSecureScheme(const char* new_scheme);
  67 URL_EXPORT const std::vector<std::string>& GetSecureSchemes();
  68
  69 // Adds an application-defined scheme to the list of schemes that normal pages
  70 // cannot link to or access (i.e., with the same security rules as those applied
  71 // to "file" URLs).
  72 URL_EXPORT void AddLocalScheme(const char* new_scheme);
  73 URL_EXPORT const std::vector<std::string>& GetLocalSchemes();
  74
  75 // Adds an application-defined scheme to the list of schemes that cause pages
  76 // loaded with them to not have access to pages loaded with any other URL
  77 // scheme.
  78 URL_EXPORT void AddNoAccessScheme(const char* new_scheme);
  79 URL_EXPORT const std::vector<std::string>& GetNoAccessSchemes();
  80
  81 // Adds an application-defined scheme to the list of schemes that can be sent
  82 // CORS requests.
  83 URL_EXPORT void AddCORSEnabledScheme(const char* new_scheme);
  84 URL_EXPORT const std::vector<std::string>& GetCORSEnabledSchemes();
  85
  86 // Adds an application-defined scheme to the list of web schemes that can be
  87 // used by web to store data (e.g. cookies, local storage, ...). This is
  88 // to differentiate them from schemes that can store data but are not used on
  89 // web (e.g. application's internal schemes) or schemes that are used on web but
  90 // cannot store data.
  91 URL_EXPORT void AddWebStorageScheme(const char* new_scheme);
  92 URL_EXPORT const std::vector<std::string>& GetWebStorageSchemes();
  93
  94 // Adds an application-defined scheme to the list of schemes that can bypass the
  95 // Content-Security-Policy(CSP) checks.
  96 URL_EXPORT void AddCSPBypassingScheme(const char* new_scheme);
  97 URL_EXPORT const std::vector<std::string>& GetCSPBypassingSchemes();
  98
  99 // Adds an application-defined scheme to the list of schemes that are strictly
 100 // empty documents, allowing them to commit synchronously.
 101 URL_EXPORT void AddEmptyDocumentScheme(const char* new_scheme);
 102 URL_EXPORT const std::vector<std::string>& GetEmptyDocumentSchemes();
 103
 104 // Sets a flag to prevent future calls to Add*Scheme from succeeding.
 105 //
 106 // This is designed to help prevent errors for multithreaded applications.
 107 // Normal usage would be to call Add*Scheme for your custom schemes at
 108 // the beginning of program initialization, and then LockSchemeRegistries. This
 109 // prevents future callers from mistakenly calling Add*Scheme when the
 110 // program is running with multiple threads, where such usage would be
 111 // dangerous.
 112 //
 113 // We could have had Add*Scheme use a lock instead, but that would add
 114 // some platform-specific dependencies we don't otherwise have now, and is
 115 // overkill considering the normal usage is so simple.
 116 URL_EXPORT void LockSchemeRegistries();
 117
 118 // Locates the scheme in the given string and places it into |found_scheme|,
 119 // which may be NULL to indicate the caller does not care about the range.
 120 //
 121 // Returns whether the given |compare| scheme matches the scheme found in the
 122 // input (if any). The |compare| scheme must be a valid canonical scheme or
 123 // the result of the comparison is undefined.
 124 URL_EXPORT bool FindAndCompareScheme(const char* str,
 125                                      int str_len,
 126                                      const char* compare,
 127                                      Component* found_scheme);
 128 URL_EXPORT bool FindAndCompareScheme(const base::char16* str,
 129                                      int str_len,
 130                                      const char* compare,
 131                                      Component* found_scheme);
 132 inline bool FindAndCompareScheme(const std::string& str,
 133                                  const char* compare,
 134                                  Component* found_scheme) {
 135   return FindAndCompareScheme(str.data(), static_cast<int>(str.size()),
 136                               compare, found_scheme);
 137 }
 138 inline bool FindAndCompareScheme(const base::string16& str,
 139                                  const char* compare,
 140                                  Component* found_scheme) {
 141   return FindAndCompareScheme(str.data(), static_cast<int>(str.size()),
 142                               compare, found_scheme);
 143 }
 144
 145 // Returns true if the given scheme identified by |scheme| within |spec| is in
 146 // the list of known standard-format schemes (see AddStandardScheme).
 147 URL_EXPORT bool IsStandard(const char* spec, const Component& scheme);
 148 URL_EXPORT bool IsStandard(const base::char16* spec, const Component& scheme);
 149
 150 // Returns true if the given scheme identified by |scheme| within |spec| is in
 151 // the list of allowed schemes for referrers (see AddReferrerScheme).
 152 URL_EXPORT bool IsReferrerScheme(const char* spec, const Component& scheme);
 153
 154 // Returns true and sets |type| to the SchemeType of the given scheme
 155 // identified by |scheme| within |spec| if the scheme is in the list of known
 156 // standard-format schemes (see AddStandardScheme).
 157 URL_EXPORT bool GetStandardSchemeType(const char* spec,
 158                                       const Component& scheme,
 159                                       SchemeType* type);
 160 URL_EXPORT bool GetStandardSchemeType(const base::char16* spec,
 161                                       const Component& scheme,
 162                                       SchemeType* type);
 163
 164 // Hosts  ----------------------------------------------------------------------
 165
 166 // Returns true if the |canonical_host| matches or is in the same domain as the
 167 // given |canonical_domain| string. For example, if the canonicalized hostname
 168 // is "www.google.com", this will return true for "com", "google.com", and
 169 // "www.google.com" domains.
 170 //
 171 // If either of the input StringPieces is empty, the return value is false. The
 172 // input domain should match host canonicalization rules. i.e. it should be
 173 // lowercase except for escape chars.
 174 URL_EXPORT bool DomainIs(base::StringPiece canonical_host,
 175                          base::StringPiece canonical_domain);
 176
 177 // Returns true if the hostname is an IP address. Note: this function isn't very
 178 // cheap, as it must re-parse the host to verify.
 179 URL_EXPORT bool HostIsIPAddress(base::StringPiece host);
 180
 181 // URL library wrappers --------------------------------------------------------
 182
 183 // Parses the given spec according to the extracted scheme type. Normal users
 184 // should use the URL object, although this may be useful if performance is
 185 // critical and you don't want to do the heap allocation for the std::string.
 186 //
 187 // As with the Canonicalize* functions, the charset converter can
 188 // be NULL to use UTF-8 (it will be faster in this case).
 189 //
 190 // Returns true if a valid URL was produced, false if not. On failure, the
 191 // output and parsed structures will still be filled and will be consistent,
 192 // but they will not represent a loadable URL.
 193 URL_EXPORT bool Canonicalize(const char* spec,
 194                              int spec_len,
 195                              bool trim_path_end,
 196                              CharsetConverter* charset_converter,
 197                              CanonOutput* output,
 198                              Parsed* output_parsed);
 199 URL_EXPORT bool Canonicalize(const base::char16* spec,
 200                              int spec_len,
 201                              bool trim_path_end,
 202                              CharsetConverter* charset_converter,
 203                              CanonOutput* output,
 204                              Parsed* output_parsed);
 205
 206 // Resolves a potentially relative URL relative to the given parsed base URL.
 207 // The base MUST be valid. The resulting canonical URL and parsed information
 208 // will be placed in to the given out variables.
 209 //
 210 // The relative need not be relative. If we discover that it's absolute, this
 211 // will produce a canonical version of that URL. See Canonicalize() for more
 212 // about the charset_converter.
 213 //
 214 // Returns true if the output is valid, false if the input could not produce
 215 // a valid URL.
 216 URL_EXPORT bool ResolveRelative(const char* base_spec,
 217                                 int base_spec_len,
 218                                 const Parsed& base_parsed,
 219                                 const char* relative,
 220                                 int relative_length,
 221                                 CharsetConverter* charset_converter,
 222                                 CanonOutput* output,
 223                                 Parsed* output_parsed);
 224 URL_EXPORT bool ResolveRelative(const char* base_spec,
 225                                 int base_spec_len,
 226                                 const Parsed& base_parsed,
 227                                 const base::char16* relative,
 228                                 int relative_length,
 229                                 CharsetConverter* charset_converter,
 230                                 CanonOutput* output,
 231                                 Parsed* output_parsed);
 232
 233 // Replaces components in the given VALID input URL. The new canonical URL info
 234 // is written to output and out_parsed.
 235 //
 236 // Returns true if the resulting URL is valid.
 237 URL_EXPORT bool ReplaceComponents(const char* spec,
 238                                   int spec_len,
 239                                   const Parsed& parsed,
 240                                   const Replacements<char>& replacements,
 241                                   CharsetConverter* charset_converter,
 242                                   CanonOutput* output,
 243                                   Parsed* out_parsed);
 244 URL_EXPORT bool ReplaceComponents(
 245     const char* spec,
 246     int spec_len,
 247     const Parsed& parsed,
 248     const Replacements<base::char16>& replacements,
 249     CharsetConverter* charset_converter,
 250     CanonOutput* output,
 251     Parsed* out_parsed);
 252
 253 // String helper functions -----------------------------------------------------
 254
 255 enum class DecodeURLResult {
 256   // Did not contain code points greater than 0x7F.
 257   kAsciiOnly,
 258   // Did UTF-8 decode only.
 259   kUTF8,
 260   // Did byte to Unicode mapping only.
 261   kIsomorphic,
 262   // Did both of UTF-8 decode and isomorphic decode.
 263   kMixed,
 264 };
 265
 266 // Unescapes the given string using URL escaping rules.
 267 URL_EXPORT DecodeURLResult DecodeURLEscapeSequences(const char* input,
 268                                                     int length,
 269                                                     CanonOutputW* output);
 270
 271 // Escapes the given string as defined by the JS method encodeURIComponent. See
 272 // https://developer.mozilla.org/en/JavaScript/Reference/Global_Objects/encodeURIComponent
 273 URL_EXPORT void EncodeURIComponent(const char* input,
 274                                    int length,
 275                                    CanonOutput* output);
 276
 277 }  // namespace url
 278
 279 #endif  // URL_URL_UTIL_H_