glib/guri.c

   1 /* GLIB - Library of useful routines for C programming
   2  * Copyright © 2020 Red Hat, Inc.
   3  *
   4  * SPDX-License-Identifier: LGPL-2.1-or-later
   5  *
   6  * This library is free software; you can redistribute it and/or
   7  * modify it under the terms of the GNU Lesser General Public
   8  * License as published by the Free Software Foundation; either
   9  * version 2 of the License, or (at your option) any later version.
  10  *
  11  * This library is distributed in the hope that it will be useful,
  12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14  * Lesser General Public License for more details.
  15  *
  16  * You should have received a copy of the GNU Lesser General
  17  * Public License along with this library; if not, see
  18  * <http://www.gnu.org/licenses/>.
  19  */
  20
  21 #include "config.h"
  22
  23 #include <stdlib.h>
  24 #include <string.h>
  25
  26 #include "glib.h"
  27 #include "glibintl.h"
  28 #include "guriprivate.h"
  29
  30 /**
  31  * SECTION:guri
  32  * @short_description: URI-handling utilities
  33  * @include: glib.h
  34  *
  35  * The #GUri type and related functions can be used to parse URIs into
  36  * their components, and build valid URIs from individual components.
  37  *
  38  * Note that #GUri scope is to help manipulate URIs in various applications,
  39  * following [RFC 3986](https://tools.ietf.org/html/rfc3986). In particular,
  40  * it doesn't intend to cover web browser needs, and doesn't implement the
  41  * [WHATWG URL](https://url.spec.whatwg.org/) standard. No APIs are provided to
  42  * help prevent
  43  * [homograph attacks](https://en.wikipedia.org/wiki/IDN_homograph_attack), so
  44  * #GUri is not suitable for formatting URIs for display to the user for making
  45  * security-sensitive decisions.
  46  *
  47  * ## Relative and absolute URIs # {#relative-absolute-uris}
  48  *
  49  * As defined in [RFC 3986](https://tools.ietf.org/html/rfc3986#section-4), the
  50  * hierarchical nature of URIs means that they can either be ‘relative
  51  * references’ (sometimes referred to as ‘relative URIs’) or ‘URIs’ (for
  52  * clarity, ‘URIs’ are referred to in this documentation as
  53  * ‘absolute URIs’ — although
  54  * [in constrast to RFC 3986](https://tools.ietf.org/html/rfc3986#section-4.3),
  55  * fragment identifiers are always allowed).
  56  *
  57  * Relative references have one or more components of the URI missing. In
  58  * particular, they have no scheme. Any other component, such as hostname,
  59  * query, etc. may be missing, apart from a path, which has to be specified (but
  60  * may be empty). The path may be relative, starting with `./` rather than `/`.
  61  *
  62  * For example, a valid relative reference is `./path?query`,
  63  * `/?query#fragment` or `//example.com`.
  64  *
  65  * Absolute URIs have a scheme specified. Any other components of the URI which
  66  * are missing are specified as explicitly unset in the URI, rather than being
  67  * resolved relative to a base URI using g_uri_parse_relative().
  68  *
  69  * For example, a valid absolute URI is `file:///home/bob` or
  70  * `https://search.com?query=string`.
  71  *
  72  * A #GUri instance is always an absolute URI. A string may be an absolute URI
  73  * or a relative reference; see the documentation for individual functions as to
  74  * what forms they accept.
  75  *
  76  * ## Parsing URIs
  77  *
  78  * The most minimalist APIs for parsing URIs are g_uri_split() and
  79  * g_uri_split_with_user(). These split a URI into its component
  80  * parts, and return the parts; the difference between the two is that
  81  * g_uri_split() treats the ‘userinfo’ component of the URI as a
  82  * single element, while g_uri_split_with_user() can (depending on the
  83  * #GUriFlags you pass) treat it as containing a username, password,
  84  * and authentication parameters. Alternatively, g_uri_split_network()
  85  * can be used when you are only interested in the components that are
  86  * needed to initiate a network connection to the service (scheme,
  87  * host, and port).
  88  *
  89  * g_uri_parse() is similar to g_uri_split(), but instead of returning
  90  * individual strings, it returns a #GUri structure (and it requires
  91  * that the URI be an absolute URI).
  92  *
  93  * g_uri_resolve_relative() and g_uri_parse_relative() allow you to
  94  * resolve a relative URI relative to a base URI.
  95  * g_uri_resolve_relative() takes two strings and returns a string,
  96  * and g_uri_parse_relative() takes a #GUri and a string and returns a
  97  * #GUri.
  98  *
  99  * All of the parsing functions take a #GUriFlags argument describing
 100  * exactly how to parse the URI; see the documentation for that type
 101  * for more details on the specific flags that you can pass. If you
 102  * need to choose different flags based on the type of URI, you can
 103  * use g_uri_peek_scheme() on the URI string to check the scheme
 104  * first, and use that to decide what flags to parse it with.
 105  *
 106  * For example, you might want to use %G_URI_PARAMS_WWW_FORM when parsing the
 107  * params for a web URI, so compare the result of g_uri_peek_scheme() against
 108  * `http` and `https`.
 109  *
 110  * ## Building URIs
 111  *
 112  * g_uri_join() and g_uri_join_with_user() can be used to construct
 113  * valid URI strings from a set of component strings. They are the
 114  * inverse of g_uri_split() and g_uri_split_with_user().
 115  *
 116  * Similarly, g_uri_build() and g_uri_build_with_user() can be used to
 117  * construct a #GUri from a set of component strings.
 118  *
 119  * As with the parsing functions, the building functions take a
 120  * #GUriFlags argument. In particular, it is important to keep in mind
 121  * whether the URI components you are using are already `%`-encoded. If so,
 122  * you must pass the %G_URI_FLAGS_ENCODED flag.
 123  *
 124  * ## `file://` URIs
 125  *
 126  * Note that Windows and Unix both define special rules for parsing
 127  * `file://` URIs (involving non-UTF-8 character sets on Unix, and the
 128  * interpretation of path separators on Windows). #GUri does not
 129  * implement these rules. Use g_filename_from_uri() and
 130  * g_filename_to_uri() if you want to properly convert between
 131  * `file://` URIs and local filenames.
 132  *
 133  * ## URI Equality
 134  *
 135  * Note that there is no `g_uri_equal ()` function, because comparing
 136  * URIs usefully requires scheme-specific knowledge that #GUri does
 137  * not have. #GUri can help with normalization if you use the various
 138  * encoded #GUriFlags as well as %G_URI_FLAGS_SCHEME_NORMALIZE however
 139  * it is not comprehensive.
 140  * For example, `data:,foo` and `data:;base64,Zm9v` resolve to the same
 141  * thing according to the `data:` URI specification which GLib does not
 142  * handle.
 143  *
 144  * Since: 2.66
 145  */
 146
 147 /**
 148  * GUri:
 149  *
 150  * A parsed absolute URI.
 151  *
 152  * Since #GUri only represents absolute URIs, all #GUris will have a
 153  * URI scheme, so g_uri_get_scheme() will always return a non-%NULL
 154  * answer. Likewise, by definition, all URIs have a path component, so
 155  * g_uri_get_path() will always return a non-%NULL string (which may be empty).
 156  *
 157  * If the URI string has an
 158  * [‘authority’ component](https://tools.ietf.org/html/rfc3986#section-3) (that
 159  * is, if the scheme is followed by `://` rather than just `:`), then the
 160  * #GUri will contain a hostname, and possibly a port and ‘userinfo’.
 161  * Additionally, depending on how the #GUri was constructed/parsed (for example,
 162  * using the %G_URI_FLAGS_HAS_PASSWORD and %G_URI_FLAGS_HAS_AUTH_PARAMS flags),
 163  * the userinfo may be split out into a username, password, and
 164  * additional authorization-related parameters.
 165  *
 166  * Normally, the components of a #GUri will have all `%`-encoded
 167  * characters decoded. However, if you construct/parse a #GUri with
 168  * %G_URI_FLAGS_ENCODED, then the `%`-encoding will be preserved instead in
 169  * the userinfo, path, and query fields (and in the host field if also
 170  * created with %G_URI_FLAGS_NON_DNS). In particular, this is necessary if
 171  * the URI may contain binary data or non-UTF-8 text, or if decoding
 172  * the components might change the interpretation of the URI.
 173  *
 174  * For example, with the encoded flag:
 175  *
 176  * |[<!-- language="C" -->
 177  *   g_autoptr(GUri) uri = g_uri_parse ("http://host/path?query=http%3A%2F%2Fhost%2Fpath%3Fparam%3Dvalue", G_URI_FLAGS_ENCODED, &err);
 178  *   g_assert_cmpstr (g_uri_get_query (uri), ==, "query=http%3A%2F%2Fhost%2Fpath%3Fparam%3Dvalue");
 179  * ]|
 180  *
 181  * While the default `%`-decoding behaviour would give:
 182  *
 183  * |[<!-- language="C" -->
 184  *   g_autoptr(GUri) uri = g_uri_parse ("http://host/path?query=http%3A%2F%2Fhost%2Fpath%3Fparam%3Dvalue", G_URI_FLAGS_NONE, &err);
 185  *   g_assert_cmpstr (g_uri_get_query (uri), ==, "query=http://host/path?param=value");
 186  * ]|
 187  *
 188  * During decoding, if an invalid UTF-8 string is encountered, parsing will fail
 189  * with an error indicating the bad string location:
 190  *
 191  * |[<!-- language="C" -->
 192  *   g_autoptr(GUri) uri = g_uri_parse ("http://host/path?query=http%3A%2F%2Fhost%2Fpath%3Fbad%3D%00alue", G_URI_FLAGS_NONE, &err);
 193  *   g_assert_error (err, G_URI_ERROR, G_URI_ERROR_BAD_QUERY);
 194  * ]|
 195  *
 196  * You should pass %G_URI_FLAGS_ENCODED or %G_URI_FLAGS_ENCODED_QUERY if you
 197  * need to handle that case manually. In particular, if the query string
 198  * contains `=` characters that are `%`-encoded, you should let
 199  * g_uri_parse_params() do the decoding once of the query.
 200  *
 201  * #GUri is immutable once constructed, and can safely be accessed from
 202  * multiple threads. Its reference counting is atomic.
 203  *
 204  * Since: 2.66
 205  */
 206 struct _GUri {
 207   gchar     *scheme;
 208   gchar     *userinfo;
 209   gchar     *host;
 210   gint       port;
 211   gchar     *path;
 212   gchar     *query;
 213   gchar     *fragment;
 214
 215   gchar     *user;
 216   gchar     *password;
 217   gchar     *auth_params;
 218
 219   GUriFlags  flags;
 220 };
 221
 222 /**
 223  * g_uri_ref: (skip)
 224  * @uri: a #GUri
 225  *
 226  * Increments the reference count of @uri by one.
 227  *
 228  * Returns: @uri
 229  *
 230  * Since: 2.66
 231  */
 232 GUri *
 233 g_uri_ref (GUri *uri)
 234 {
 235   g_return_val_if_fail (uri != NULL, NULL);
 236
 237   return g_atomic_rc_box_acquire (uri);
 238 }
 239
 240 static void
 241 g_uri_clear (GUri *uri)
 242 {
 243   g_free (uri->scheme);
 244   g_free (uri->userinfo);
 245   g_free (uri->host);
 246   g_free (uri->path);
 247   g_free (uri->query);
 248   g_free (uri->fragment);
 249   g_free (uri->user);
 250   g_free (uri->password);
 251   g_free (uri->auth_params);
 252 }
 253
 254 /**
 255  * g_uri_unref: (skip)
 256  * @uri: a #GUri
 257  *
 258  * Atomically decrements the reference count of @uri by one.
 259  *
 260  * When the reference count reaches zero, the resources allocated by
 261  * @uri are freed
 262  *
 263  * Since: 2.66
 264  */
 265 void
 266 g_uri_unref (GUri *uri)
 267 {
 268   g_return_if_fail (uri != NULL);
 269
 270   g_atomic_rc_box_release_full (uri, (GDestroyNotify)g_uri_clear);
 271 }
 272
 273 static gboolean
 274 g_uri_char_is_unreserved (gchar ch)
 275 {
 276   if (g_ascii_isalnum (ch))
 277     return TRUE;
 278   return ch == '-' || ch == '.' || ch == '_' || ch == '~';
 279 }
 280
 281 #define XDIGIT(c) ((c) <= '9' ? (c) - '0' : ((c) & 0x4F) - 'A' + 10)
 282 #define HEXCHAR(s) ((XDIGIT (s[1]) << 4) + XDIGIT (s[2]))
 283
 284 static gssize
 285 uri_decoder (gchar       **out,
 286              const gchar  *illegal_chars,
 287              const gchar  *start,
 288              gsize         length,
 289              gboolean      just_normalize,
 290              gboolean      www_form,
 291              GUriFlags     flags,
 292              GUriError     parse_error,
 293              GError      **error)
 294 {
 295   gchar c;
 296   GString *decoded;
 297   const gchar *invalid, *s, *end;
 298   gssize len;
 299
 300   if (!(flags & G_URI_FLAGS_ENCODED))
 301     just_normalize = FALSE;
 302
 303   decoded = g_string_sized_new (length + 1);
 304   for (s = start, end = s + length; s < end; s++)
 305     {
 306       if (*s == '%')
 307         {
 308           if (s + 2 >= end ||
 309               !g_ascii_isxdigit (s[1]) ||
 310               !g_ascii_isxdigit (s[2]))
 311             {
 312               /* % followed by non-hex or the end of the string; this is an error */
 313               if (!(flags & G_URI_FLAGS_PARSE_RELAXED))
 314                 {
 315                   g_set_error_literal (error, G_URI_ERROR, parse_error,
 316                                        /* xgettext: no-c-format */
 317                                        _("Invalid %-encoding in URI"));
 318                   g_string_free (decoded, TRUE);
 319                   return -1;
 320                 }
 321
 322               /* In non-strict mode, just let it through; we *don't*
 323                * fix it to "%25", since that might change the way that
 324                * the URI's owner would interpret it.
 325                */
 326               g_string_append_c (decoded, *s);
 327               continue;
 328             }
 329
 330           c = HEXCHAR (s);
 331           if (illegal_chars && strchr (illegal_chars, c))
 332             {
 333               g_set_error_literal (error, G_URI_ERROR, parse_error,
 334                                    _("Illegal character in URI"));
 335               g_string_free (decoded, TRUE);
 336               return -1;
 337             }
 338           if (just_normalize && !g_uri_char_is_unreserved (c))
 339             {
 340               /* Leave the % sequence there but normalize it. */
 341               g_string_append_c (decoded, *s);
 342               g_string_append_c (decoded, g_ascii_toupper (s[1]));
 343               g_string_append_c (decoded, g_ascii_toupper (s[2]));
 344               s += 2;
 345             }
 346           else
 347             {
 348               g_string_append_c (decoded, c);
 349               s += 2;
 350             }
 351         }
 352       else if (www_form && *s == '+')
 353         g_string_append_c (decoded, ' ');
 354       /* Normalize any illegal characters. */
 355       else if (just_normalize && (!g_ascii_isgraph (*s)))
 356         g_string_append_printf (decoded, "%%%02X", (guchar)*s);
 357       else
 358         g_string_append_c (decoded, *s);
 359     }
 360
 361   len = decoded->len;
 362   g_assert (len >= 0);
 363
 364   if (!(flags & G_URI_FLAGS_ENCODED) &&
 365       !g_utf8_validate (decoded->str, len, &invalid))
 366     {
 367       g_set_error_literal (error, G_URI_ERROR, parse_error,
 368                            _("Non-UTF-8 characters in URI"));
 369       g_string_free (decoded, TRUE);
 370       return -1;
 371     }
 372
 373   if (out)
 374     *out = g_string_free (decoded, FALSE);
 375   else
 376     g_string_free (decoded, TRUE);
 377
 378   return len;
 379 }
 380
 381 static gboolean
 382 uri_decode (gchar       **out,
 383             const gchar  *illegal_chars,
 384             const gchar  *start,
 385             gsize         length,
 386             gboolean      www_form,
 387             GUriFlags     flags,
 388             GUriError     parse_error,
 389             GError      **error)
 390 {
 391   return uri_decoder (out, illegal_chars, start, length, FALSE, www_form, flags,
 392                       parse_error, error) != -1;
 393 }
 394
 395 static gboolean
 396 uri_normalize (gchar       **out,
 397                const gchar  *start,
 398                gsize         length,
 399                GUriFlags     flags,
 400                GUriError     parse_error,
 401                GError      **error)
 402 {
 403   return uri_decoder (out, NULL, start, length, TRUE, FALSE, flags,
 404                       parse_error, error) != -1;
 405 }
 406
 407 static gboolean
 408 is_valid (guchar       c,
 409           const gchar *reserved_chars_allowed)
 410 {
 411   if (g_uri_char_is_unreserved (c))
 412     return TRUE;
 413
 414   if (reserved_chars_allowed && strchr (reserved_chars_allowed, c))
 415     return TRUE;
 416
 417   return FALSE;
 418 }
 419
 420 void
 421 _uri_encoder (GString      *out,
 422               const guchar *start,
 423               gsize         length,
 424               const gchar  *reserved_chars_allowed,
 425               gboolean      allow_utf8)
 426 {
 427   static const gchar hex[] = "0123456789ABCDEF";
 428   const guchar *p = start;
 429   const guchar *end = p + length;
 430
 431   while (p < end)
 432     {
 433       gunichar multibyte_utf8_char = 0;
 434
 435       if (allow_utf8 && *p >= 0x80)
 436         multibyte_utf8_char = g_utf8_get_char_validated ((gchar *)p, end - p);
 437
 438       if (multibyte_utf8_char > 0 &&
 439           multibyte_utf8_char != (gunichar) -1 && multibyte_utf8_char != (gunichar) -2)
 440         {
 441           gint len = g_utf8_skip [*p];
 442           g_string_append_len (out, (gchar *)p, len);
 443           p += len;
 444         }
 445       else if (is_valid (*p, reserved_chars_allowed))
 446         {
 447           g_string_append_c (out, *p);
 448           p++;
 449         }
 450       else
 451         {
 452           g_string_append_c (out, '%');
 453           g_string_append_c (out, hex[*p >> 4]);
 454           g_string_append_c (out, hex[*p & 0xf]);
 455           p++;
 456         }
 457     }
 458 }
 459
 460 /* Parse the IP-literal construction from RFC 6874 (which extends RFC 3986 to
 461  * support IPv6 zone identifiers.
 462  *
 463  * Currently, IP versions beyond 6 (i.e. the IPvFuture rule) are unsupported.
 464  * There’s no point supporting them until (a) they exist and (b) the rest of the
 465  * stack (notably, sockets) supports them.
 466  *
 467  * Rules:
 468  *
 469  * IP-literal = "[" ( IPv6address / IPv6addrz / IPvFuture  ) "]"
 470  *
 471  * ZoneID = 1*( unreserved / pct-encoded )
 472  *
 473  * IPv6addrz = IPv6address "%25" ZoneID
 474  *
 475  * If %G_URI_FLAGS_PARSE_RELAXED is specified, this function also accepts:
 476  *
 477  * IPv6addrz = IPv6address "%" ZoneID
 478  */
 479 static gboolean
 480 parse_ip_literal (const gchar  *start,
 481                   gsize         length,
 482                   GUriFlags     flags,
 483                   gchar       **out,
 484                   GError      **error)
 485 {
 486   gchar *pct, *zone_id = NULL;
 487   gchar *addr = NULL;
 488   gsize addr_length = 0;
 489   gsize zone_id_length = 0;
 490   gchar *decoded_zone_id = NULL;
 491
 492   if (start[length - 1] != ']')
 493     goto bad_ipv6_literal;
 494
 495   /* Drop the square brackets */
 496   addr = g_strndup (start + 1, length - 2);
 497   addr_length = length - 2;
 498
 499   /* If there's an IPv6 scope ID, split out the zone. */
 500   pct = strchr (addr, '%');
 501   if (pct != NULL)
 502     {
 503       *pct = '\0';
 504
 505       if (addr_length - (pct - addr) >= 4 &&
 506           *(pct + 1) == '2' && *(pct + 2) == '5')
 507         {
 508           zone_id = pct + 3;
 509           zone_id_length = addr_length - (zone_id - addr);
 510         }
 511       else if (flags & G_URI_FLAGS_PARSE_RELAXED &&
 512                addr_length - (pct - addr) >= 2)
 513         {
 514           zone_id = pct + 1;
 515           zone_id_length = addr_length - (zone_id - addr);
 516         }
 517       else
 518         goto bad_ipv6_literal;
 519
 520       g_assert (zone_id_length >= 1);
 521     }
 522
 523   /* addr must be an IPv6 address */
 524   if (!g_hostname_is_ip_address (addr) || !strchr (addr, ':'))
 525     goto bad_ipv6_literal;
 526
 527   /* Zone ID must be valid. It can contain %-encoded characters. */
 528   if (zone_id != NULL &&
 529       !uri_decode (&decoded_zone_id, NULL, zone_id, zone_id_length, FALSE,
 530                    flags, G_URI_ERROR_BAD_HOST, NULL))
 531     goto bad_ipv6_literal;
 532
 533   /* Success */
 534   if (out != NULL && decoded_zone_id != NULL)
 535     *out = g_strconcat (addr, "%", decoded_zone_id, NULL);
 536   else if (out != NULL)
 537     *out = g_steal_pointer (&addr);
 538
 539   g_free (addr);
 540   g_free (decoded_zone_id);
 541
 542   return TRUE;
 543
 544 bad_ipv6_literal:
 545   g_free (addr);
 546   g_free (decoded_zone_id);
 547   g_set_error (error, G_URI_ERROR, G_URI_ERROR_BAD_HOST,
 548                _("Invalid IPv6 address ‘%.*s’ in URI"),
 549                (gint)length, start);
 550
 551   return FALSE;
 552 }
 553
 554 static gboolean
 555 parse_host (const gchar  *start,
 556             gsize         length,
 557             GUriFlags     flags,
 558             gchar       **out,
 559             GError      **error)
 560 {
 561   gchar *decoded = NULL, *host;
 562   gchar *addr = NULL;
 563
 564   if (*start == '[')
 565     {
 566       if (!parse_ip_literal (start, length, flags, &host, error))
 567         return FALSE;
 568       goto ok;
 569     }
 570
 571   if (g_ascii_isdigit (*start))
 572     {
 573       addr = g_strndup (start, length);
 574       if (g_hostname_is_ip_address (addr))
 575         {
 576           host = addr;
 577           goto ok;
 578         }
 579       g_free (addr);
 580     }
 581
 582   if (flags & G_URI_FLAGS_NON_DNS)
 583     {
 584       if (!uri_normalize (&decoded, start, length, flags,
 585                           G_URI_ERROR_BAD_HOST, error))
 586         return FALSE;
 587       host = g_steal_pointer (&decoded);
 588       goto ok;
 589     }
 590
 591   flags &= ~G_URI_FLAGS_ENCODED;
 592   if (!uri_decode (&decoded, NULL, start, length, FALSE, flags,
 593                    G_URI_ERROR_BAD_HOST, error))
 594     return FALSE;
 595
 596   /* You're not allowed to %-encode an IP address, so if it wasn't
 597    * one before, it better not be one now.
 598    */
 599   if (g_hostname_is_ip_address (decoded))
 600     {
 601       g_free (decoded);
 602       g_set_error (error, G_URI_ERROR, G_URI_ERROR_BAD_HOST,
 603                    _("Illegal encoded IP address ‘%.*s’ in URI"),
 604                    (gint)length, start);
 605       return FALSE;
 606     }
 607
 608   if (g_hostname_is_non_ascii (decoded))
 609     {
 610       host = g_hostname_to_ascii (decoded);
 611       if (host == NULL)
 612         {
 613           g_free (decoded);
 614           g_set_error (error, G_URI_ERROR, G_URI_ERROR_BAD_HOST,
 615                        _("Illegal internationalized hostname ‘%.*s’ in URI"),
 616                        (gint) length, start);
 617           return FALSE;
 618         }
 619     }
 620   else
 621     {
 622       host = g_steal_pointer (&decoded);
 623     }
 624
 625  ok:
 626   if (out)
 627     *out = g_steal_pointer (&host);
 628   g_free (host);
 629   g_free (decoded);
 630
 631   return TRUE;
 632 }
 633
 634 static gboolean
 635 parse_port (const gchar  *start,
 636             gsize         length,
 637             gint         *out,
 638             GError      **error)
 639 {
 640   gchar *end;
 641   gulong parsed_port;
 642
 643   /* strtoul() allows leading + or -, so we have to check this first. */
 644   if (!g_ascii_isdigit (*start))
 645     {
 646       g_set_error (error, G_URI_ERROR, G_URI_ERROR_BAD_PORT,
 647                    _("Could not parse port ‘%.*s’ in URI"),
 648                    (gint)length, start);
 649       return FALSE;
 650     }
 651
 652   /* We know that *(start + length) is either '\0' or a non-numeric
 653    * character, so strtoul() won't scan beyond it.
 654    */
 655   parsed_port = strtoul (start, &end, 10);
 656   if (end != start + length)
 657     {
 658       g_set_error (error, G_URI_ERROR, G_URI_ERROR_BAD_PORT,
 659                    _("Could not parse port ‘%.*s’ in URI"),
 660                    (gint)length, start);
 661       return FALSE;
 662     }
 663   else if (parsed_port > 65535)
 664     {
 665       g_set_error (error, G_URI_ERROR, G_URI_ERROR_BAD_PORT,
 666                    _("Port ‘%.*s’ in URI is out of range"),
 667                    (gint)length, start);
 668       return FALSE;
 669     }
 670
 671   if (out)
 672     *out = parsed_port;
 673   return TRUE;
 674 }
 675
 676 static gboolean
 677 parse_userinfo (const gchar  *start,
 678                 gsize         length,
 679                 GUriFlags     flags,
 680                 gchar       **user,
 681                 gchar       **password,
 682                 gchar       **auth_params,
 683                 GError      **error)
 684 {
 685   const gchar *user_end = NULL, *password_end = NULL, *auth_params_end;
 686
 687   auth_params_end = start + length;
 688   if (flags & G_URI_FLAGS_HAS_AUTH_PARAMS)
 689     password_end = memchr (start, ';', auth_params_end - start);
 690   if (!password_end)
 691     password_end = auth_params_end;
 692   if (flags & G_URI_FLAGS_HAS_PASSWORD)
 693     user_end = memchr (start, ':', password_end - start);
 694   if (!user_end)
 695     user_end = password_end;
 696
 697   if (!uri_normalize (user, start, user_end - start, flags,
 698                       G_URI_ERROR_BAD_USER, error))
 699     return FALSE;
 700
 701   if (*user_end == ':')
 702     {
 703       start = user_end + 1;
 704       if (!uri_normalize (password, start, password_end - start, flags,
 705                           G_URI_ERROR_BAD_PASSWORD, error))
 706         {
 707           if (user)
 708             g_clear_pointer (user, g_free);
 709           return FALSE;
 710         }
 711     }
 712   else if (password)
 713     *password = NULL;
 714
 715   if (*password_end == ';')
 716     {
 717       start = password_end + 1;
 718       if (!uri_normalize (auth_params, start, auth_params_end - start, flags,
 719                           G_URI_ERROR_BAD_AUTH_PARAMS, error))
 720         {
 721           if (user)
 722             g_clear_pointer (user, g_free);
 723           if (password)
 724             g_clear_pointer (password, g_free);
 725           return FALSE;
 726         }
 727     }
 728   else if (auth_params)
 729     *auth_params = NULL;
 730
 731   return TRUE;
 732 }
 733
 734 static gchar *
 735 uri_cleanup (const gchar *uri_string)
 736 {
 737   GString *copy;
 738   const gchar *end;
 739
 740   /* Skip leading whitespace */
 741   while (g_ascii_isspace (*uri_string))
 742     uri_string++;
 743
 744   /* Ignore trailing whitespace */
 745   end = uri_string + strlen (uri_string);
 746   while (end > uri_string && g_ascii_isspace (*(end - 1)))
 747     end--;
 748
 749   /* Copy the rest, encoding unencoded spaces and stripping other whitespace */
 750   copy = g_string_sized_new (end - uri_string);
 751   while (uri_string < end)
 752     {
 753       if (*uri_string == ' ')
 754         g_string_append (copy, "%20");
 755       else if (g_ascii_isspace (*uri_string))
 756         ;
 757       else
 758         g_string_append_c (copy, *uri_string);
 759       uri_string++;
 760     }
 761
 762   return g_string_free (copy, FALSE);
 763 }
 764
 765 static gboolean
 766 should_normalize_empty_path (const char *scheme)
 767 {
 768   const char * const schemes[] = { "https", "http", "wss", "ws" };
 769   gsize i;
 770   for (i = 0; i < G_N_ELEMENTS (schemes); ++i)
 771     {
 772       if (!strcmp (schemes[i], scheme))
 773         return TRUE;
 774     }
 775   return FALSE;
 776 }
 777
 778 static int
 779 normalize_port (const char *scheme,
 780                 int         port)
 781 {
 782   const char *default_schemes[3] = { NULL };
 783   int i;
 784
 785   switch (port)
 786     {
 787     case 21:
 788       default_schemes[0] = "ftp";
 789       break;
 790     case 80:
 791       default_schemes[0] = "http";
 792       default_schemes[1] = "ws";
 793       break;
 794     case 443:
 795       default_schemes[0] = "https";
 796       default_schemes[1] = "wss";
 797       break;
 798     default:
 799       break;
 800     }
 801
 802   for (i = 0; default_schemes[i]; ++i)
 803     {
 804       if (!strcmp (scheme, default_schemes[i]))
 805         return -1;
 806     }
 807
 808   return port;
 809 }
 810
 811 static int
 812 default_scheme_port (const char *scheme)
 813 {
 814   if (strcmp (scheme, "http") == 0 || strcmp (scheme, "ws") == 0)
 815     return 80;
 816
 817   if (strcmp (scheme, "https") == 0 || strcmp (scheme, "wss") == 0)
 818     return 443;
 819
 820   if (strcmp (scheme, "ftp") == 0)
 821     return 21;
 822
 823   return -1;
 824 }
 825
 826 static gboolean
 827 g_uri_split_internal (const gchar  *uri_string,
 828                       GUriFlags     flags,
 829                       gchar       **scheme,
 830                       gchar       **userinfo,
 831                       gchar       **user,
 832                       gchar       **password,
 833                       gchar       **auth_params,
 834                       gchar       **host,
 835                       gint         *port,
 836                       gchar       **path,
 837                       gchar       **query,
 838                       gchar       **fragment,
 839                       GError      **error)
 840 {
 841   const gchar *end, *colon, *at, *path_start, *semi, *question;
 842   const gchar *p, *bracket, *hostend;
 843   gchar *cleaned_uri_string = NULL;
 844   gchar *normalized_scheme = NULL;
 845
 846   if (scheme)
 847     *scheme = NULL;
 848   if (userinfo)
 849     *userinfo = NULL;
 850   if (user)
 851     *user = NULL;
 852   if (password)
 853     *password = NULL;
 854   if (auth_params)
 855     *auth_params = NULL;
 856   if (host)
 857     *host = NULL;
 858   if (port)
 859     *port = -1;
 860   if (path)
 861     *path = NULL;
 862   if (query)
 863     *query = NULL;
 864   if (fragment)
 865     *fragment = NULL;
 866
 867   if ((flags & G_URI_FLAGS_PARSE_RELAXED) && strpbrk (uri_string, " \t\n\r"))
 868     {
 869       cleaned_uri_string = uri_cleanup (uri_string);
 870       uri_string = cleaned_uri_string;
 871     }
 872
 873   /* Find scheme */
 874   p = uri_string;
 875   while (*p && (g_ascii_isalpha (*p) ||
 876                (p > uri_string && (g_ascii_isdigit (*p) ||
 877                                    *p == '.' || *p == '+' || *p == '-'))))
 878     p++;
 879
 880   if (p > uri_string && *p == ':')
 881     {
 882       normalized_scheme = g_ascii_strdown (uri_string, p - uri_string);
 883       if (scheme)
 884         *scheme = g_steal_pointer (&normalized_scheme);
 885       p++;
 886     }
 887   else
 888     {
 889       if (scheme)
 890         *scheme = NULL;
 891       p = uri_string;
 892     }
 893
 894   /* Check for authority */
 895   if (strncmp (p, "//", 2) == 0)
 896     {
 897       p += 2;
 898
 899       path_start = p + strcspn (p, "/?#");
 900       at = memchr (p, '@', path_start - p);
 901       if (at)
 902         {
 903           if (flags & G_URI_FLAGS_PARSE_RELAXED)
 904             {
 905               gchar *next_at;
 906
 907               /* Any "@"s in the userinfo must be %-encoded, but
 908                * people get this wrong sometimes. Since "@"s in the
 909                * hostname are unlikely (and also wrong anyway), assume
 910                * that if there are extra "@"s, they belong in the
 911                * userinfo.
 912                */
 913               do
 914                 {
 915                   next_at = memchr (at + 1, '@', path_start - (at + 1));
 916                   if (next_at)
 917                     at = next_at;
 918                 }
 919               while (next_at);
 920             }
 921
 922           if (user || password || auth_params ||
 923               (flags & (G_URI_FLAGS_HAS_PASSWORD|G_URI_FLAGS_HAS_AUTH_PARAMS)))
 924             {
 925               if (!parse_userinfo (p, at - p, flags,
 926                                    user, password, auth_params,
 927                                    error))
 928                 goto fail;
 929             }
 930
 931           if (!uri_normalize (userinfo, p, at - p, flags,
 932                               G_URI_ERROR_BAD_USER, error))
 933             goto fail;
 934
 935           p = at + 1;
 936         }
 937
 938       if (flags & G_URI_FLAGS_PARSE_RELAXED)
 939         {
 940           semi = strchr (p, ';');
 941           if (semi && semi < path_start)
 942             {
 943               /* Technically, semicolons are allowed in the "host"
 944                * production, but no one ever does this, and some
 945                * schemes mistakenly use semicolon as a delimiter
 946                * marking the start of the path. We have to check this
 947                * after checking for userinfo though, because a
 948                * semicolon before the "@" must be part of the
 949                * userinfo.
 950                */
 951               path_start = semi;
 952             }
 953         }
 954
 955       /* Find host and port. The host may be a bracket-delimited IPv6
 956        * address, in which case the colon delimiting the port must come
 957        * (immediately) after the close bracket.
 958        */
 959       if (*p == '[')
 960         {
 961           bracket = memchr (p, ']', path_start - p);
 962           if (bracket && *(bracket + 1) == ':')
 963             colon = bracket + 1;
 964           else
 965             colon = NULL;
 966         }
 967       else
 968         colon = memchr (p, ':', path_start - p);
 969
 970       hostend = colon ? colon : path_start;
 971       if (!parse_host (p, hostend - p, flags, host, error))
 972         goto fail;
 973
 974       if (colon && colon != path_start - 1)
 975         {
 976           p = colon + 1;
 977           if (!parse_port (p, path_start - p, port, error))
 978             goto fail;
 979         }
 980
 981       p = path_start;
 982     }
 983
 984   /* Find fragment. */
 985   end = p + strcspn (p, "#");
 986   if (*end == '#')
 987     {
 988       if (!uri_normalize (fragment, end + 1, strlen (end + 1),
 989                           flags | (flags & G_URI_FLAGS_ENCODED_FRAGMENT ? G_URI_FLAGS_ENCODED : 0),
 990                           G_URI_ERROR_BAD_FRAGMENT, error))
 991         goto fail;
 992     }
 993
 994   /* Find query */
 995   question = memchr (p, '?', end - p);
 996   if (question)
 997     {
 998       if (!uri_normalize (query, question + 1, end - (question + 1),
 999                           flags | (flags & G_URI_FLAGS_ENCODED_QUERY ? G_URI_FLAGS_ENCODED : 0),
1000                           G_URI_ERROR_BAD_QUERY, error))
1001         goto fail;
1002       end = question;
1003     }
1004
1005   if (!uri_normalize (path, p, end - p,
1006                       flags | (flags & G_URI_FLAGS_ENCODED_PATH ? G_URI_FLAGS_ENCODED : 0),
1007                       G_URI_ERROR_BAD_PATH, error))
1008     goto fail;
1009
1010   /* Scheme-based normalization */
1011   if (flags & G_URI_FLAGS_SCHEME_NORMALIZE && ((scheme && *scheme) || normalized_scheme))
1012     {
1013       const char *scheme_str = scheme && *scheme ? *scheme : normalized_scheme;
1014
1015       if (should_normalize_empty_path (scheme_str) && path && !**path)
1016         {
1017           g_free (*path);
1018           *path = g_strdup ("/");
1019         }
1020
1021       if (port && *port == -1)
1022         *port = default_scheme_port (scheme_str);
1023     }
1024
1025   g_free (normalized_scheme);
1026   g_free (cleaned_uri_string);
1027   return TRUE;
1028
1029  fail:
1030   if (scheme)
1031     g_clear_pointer (scheme, g_free);
1032   if (userinfo)
1033     g_clear_pointer (userinfo, g_free);
1034   if (host)
1035     g_clear_pointer (host, g_free);
1036   if (port)
1037     *port = -1;
1038   if (path)
1039     g_clear_pointer (path, g_free);
1040   if (query)
1041     g_clear_pointer (query, g_free);
1042   if (fragment)
1043     g_clear_pointer (fragment, g_free);
1044
1045   g_free (normalized_scheme);
1046   g_free (cleaned_uri_string);
1047   return FALSE;
1048 }
1049
1050 /**
1051  * g_uri_split:
1052  * @uri_ref: a string containing a relative or absolute URI
1053  * @flags: flags for parsing @uri_ref
1054  * @scheme: (out) (nullable) (optional) (transfer full): on return, contains
1055  *    the scheme (converted to lowercase), or %NULL
1056  * @userinfo: (out) (nullable) (optional) (transfer full): on return, contains
1057  *    the userinfo, or %NULL
1058  * @host: (out) (nullable) (optional) (transfer full): on return, contains the
1059  *    host, or %NULL
1060  * @port: (out) (optional) (transfer full): on return, contains the
1061  *    port, or `-1`
1062  * @path: (out) (not nullable) (optional) (transfer full): on return, contains the
1063  *    path
1064  * @query: (out) (nullable) (optional) (transfer full): on return, contains the
1065  *    query, or %NULL
1066  * @fragment: (out) (nullable) (optional) (transfer full): on return, contains
1067  *    the fragment, or %NULL
1068  * @error: #GError for error reporting, or %NULL to ignore.
1069  *
1070  * Parses @uri_ref (which can be an
1071  * [absolute or relative URI][relative-absolute-uris]) according to @flags, and
1072  * returns the pieces. Any component that doesn't appear in @uri_ref will be
1073  * returned as %NULL (but note that all URIs always have a path component,
1074  * though it may be the empty string).
1075  *
1076  * If @flags contains %G_URI_FLAGS_ENCODED, then `%`-encoded characters in
1077  * @uri_ref will remain encoded in the output strings. (If not,
1078  * then all such characters will be decoded.) Note that decoding will
1079  * only work if the URI components are ASCII or UTF-8, so you will
1080  * need to use %G_URI_FLAGS_ENCODED if they are not.
1081  *
1082  * Note that the %G_URI_FLAGS_HAS_PASSWORD and
1083  * %G_URI_FLAGS_HAS_AUTH_PARAMS @flags are ignored by g_uri_split(),
1084  * since it always returns only the full userinfo; use
1085  * g_uri_split_with_user() if you want it split up.
1086  *
1087  * Returns: (skip): %TRUE if @uri_ref parsed successfully, %FALSE
1088  *   on error.
1089  *
1090  * Since: 2.66
1091  */
1092 gboolean
1093 g_uri_split (const gchar  *uri_ref,
1094              GUriFlags     flags,
1095              gchar       **scheme,
1096              gchar       **userinfo,
1097              gchar       **host,
1098              gint         *port,
1099              gchar       **path,
1100              gchar       **query,
1101              gchar       **fragment,
1102              GError      **error)
1103 {
1104   g_return_val_if_fail (uri_ref != NULL, FALSE);
1105   g_return_val_if_fail (error == NULL || *error == NULL, FALSE);
1106
1107   return g_uri_split_internal (uri_ref, flags,
1108                                scheme, userinfo, NULL, NULL, NULL,
1109                                host, port, path, query, fragment,
1110                                error);
1111 }
1112
1113 /**
1114  * g_uri_split_with_user:
1115  * @uri_ref: a string containing a relative or absolute URI
1116  * @flags: flags for parsing @uri_ref
1117  * @scheme: (out) (nullable) (optional) (transfer full): on return, contains
1118  *    the scheme (converted to lowercase), or %NULL
1119  * @user: (out) (nullable) (optional) (transfer full): on return, contains
1120  *    the user, or %NULL
1121  * @password: (out) (nullable) (optional) (transfer full): on return, contains
1122  *    the password, or %NULL
1123  * @auth_params: (out) (nullable) (optional) (transfer full): on return, contains
1124  *    the auth_params, or %NULL
1125  * @host: (out) (nullable) (optional) (transfer full): on return, contains the
1126  *    host, or %NULL
1127  * @port: (out) (optional) (transfer full): on return, contains the
1128  *    port, or `-1`
1129  * @path: (out) (not nullable) (optional) (transfer full): on return, contains the
1130  *    path
1131  * @query: (out) (nullable) (optional) (transfer full): on return, contains the
1132  *    query, or %NULL
1133  * @fragment: (out) (nullable) (optional) (transfer full): on return, contains
1134  *    the fragment, or %NULL
1135  * @error: #GError for error reporting, or %NULL to ignore.
1136  *
1137  * Parses @uri_ref (which can be an
1138  * [absolute or relative URI][relative-absolute-uris]) according to @flags, and
1139  * returns the pieces. Any component that doesn't appear in @uri_ref will be
1140  * returned as %NULL (but note that all URIs always have a path component,
1141  * though it may be the empty string).
1142  *
1143  * See g_uri_split(), and the definition of #GUriFlags, for more
1144  * information on the effect of @flags. Note that @password will only
1145  * be parsed out if @flags contains %G_URI_FLAGS_HAS_PASSWORD, and
1146  * @auth_params will only be parsed out if @flags contains
1147  * %G_URI_FLAGS_HAS_AUTH_PARAMS.
1148  *
1149  * Returns: (skip): %TRUE if @uri_ref parsed successfully, %FALSE
1150  *   on error.
1151  *
1152  * Since: 2.66
1153  */
1154 gboolean
1155 g_uri_split_with_user (const gchar  *uri_ref,
1156                        GUriFlags     flags,
1157                        gchar       **scheme,
1158                        gchar       **user,
1159                        gchar       **password,
1160                        gchar       **auth_params,
1161                        gchar       **host,
1162                        gint         *port,
1163                        gchar       **path,
1164                        gchar       **query,
1165                        gchar       **fragment,
1166                        GError      **error)
1167 {
1168   g_return_val_if_fail (uri_ref != NULL, FALSE);
1169   g_return_val_if_fail (error == NULL || *error == NULL, FALSE);
1170
1171   return g_uri_split_internal (uri_ref, flags,
1172                                scheme, NULL, user, password, auth_params,
1173                                host, port, path, query, fragment,
1174                                error);
1175 }
1176
1177
1178 /**
1179  * g_uri_split_network:
1180  * @uri_string: a string containing an absolute URI
1181  * @flags: flags for parsing @uri_string
1182  * @scheme: (out) (nullable) (optional) (transfer full): on return, contains
1183  *    the scheme (converted to lowercase), or %NULL
1184  * @host: (out) (nullable) (optional) (transfer full): on return, contains the
1185  *    host, or %NULL
1186  * @port: (out) (optional) (transfer full): on return, contains the
1187  *    port, or `-1`
1188  * @error: #GError for error reporting, or %NULL to ignore.
1189  *
1190  * Parses @uri_string (which must be an [absolute URI][relative-absolute-uris])
1191  * according to @flags, and returns the pieces relevant to connecting to a host.
1192  * See the documentation for g_uri_split() for more details; this is
1193  * mostly a wrapper around that function with simpler arguments.
1194  * However, it will return an error if @uri_string is a relative URI,
1195  * or does not contain a hostname component.
1196  *
1197  * Returns: (skip): %TRUE if @uri_string parsed successfully,
1198  *   %FALSE on error.
1199  *
1200  * Since: 2.66
1201  */
1202 gboolean
1203 g_uri_split_network (const gchar  *uri_string,
1204                      GUriFlags     flags,
1205                      gchar       **scheme,
1206                      gchar       **host,
1207                      gint         *port,
1208                      GError      **error)
1209 {
1210   gchar *my_scheme = NULL, *my_host = NULL;
1211
1212   g_return_val_if_fail (uri_string != NULL, FALSE);
1213   g_return_val_if_fail (error == NULL || *error == NULL, FALSE);
1214
1215   if (!g_uri_split_internal (uri_string, flags,
1216                              &my_scheme, NULL, NULL, NULL, NULL,
1217                              &my_host, port, NULL, NULL, NULL,
1218                              error))
1219     return FALSE;
1220
1221   if (!my_scheme || !my_host)
1222     {
1223       if (!my_scheme)
1224         {
1225           g_set_error (error, G_URI_ERROR, G_URI_ERROR_BAD_SCHEME,
1226                        _("URI ‘%s’ is not an absolute URI"),
1227                        uri_string);
1228         }
1229       else
1230         {
1231           g_set_error (error, G_URI_ERROR, G_URI_ERROR_BAD_HOST,
1232                        _("URI ‘%s’ has no host component"),
1233                        uri_string);
1234         }
1235       g_free (my_scheme);
1236       g_free (my_host);
1237
1238       return FALSE;
1239     }
1240
1241   if (scheme)
1242     *scheme = g_steal_pointer (&my_scheme);
1243   if (host)
1244     *host = g_steal_pointer (&my_host);
1245
1246   g_free (my_scheme);
1247   g_free (my_host);
1248
1249   return TRUE;
1250 }
1251
1252 /**
1253  * g_uri_is_valid:
1254  * @uri_string: a string containing an absolute URI
1255  * @flags: flags for parsing @uri_string
1256  * @error: #GError for error reporting, or %NULL to ignore.
1257  *
1258  * Parses @uri_string according to @flags, to determine whether it is a valid
1259  * [absolute URI][relative-absolute-uris], i.e. it does not need to be resolved
1260  * relative to another URI using g_uri_parse_relative().
1261  *
1262  * If it’s not a valid URI, an error is returned explaining how it’s invalid.
1263  *
1264  * See g_uri_split(), and the definition of #GUriFlags, for more
1265  * information on the effect of @flags.
1266  *
1267  * Returns: %TRUE if @uri_string is a valid absolute URI, %FALSE on error.
1268  *
1269  * Since: 2.66
1270  */
1271 gboolean
1272 g_uri_is_valid (const gchar  *uri_string,
1273                 GUriFlags     flags,
1274                 GError      **error)
1275 {
1276   gchar *my_scheme = NULL;
1277
1278   g_return_val_if_fail (uri_string != NULL, FALSE);
1279   g_return_val_if_fail (error == NULL || *error == NULL, FALSE);
1280
1281   if (!g_uri_split_internal (uri_string, flags,
1282                              &my_scheme, NULL, NULL, NULL, NULL,
1283                              NULL, NULL, NULL, NULL, NULL,
1284                              error))
1285     return FALSE;
1286
1287   if (!my_scheme)
1288     {
1289       g_set_error (error, G_URI_ERROR, G_URI_ERROR_BAD_SCHEME,
1290                    _("URI ‘%s’ is not an absolute URI"),
1291                    uri_string);
1292       return FALSE;
1293     }
1294
1295   g_free (my_scheme);
1296
1297   return TRUE;
1298 }
1299
1300
1301 /* Implements the "Remove Dot Segments" algorithm from section 5.2.4 of
1302  * RFC 3986.
1303  *
1304  * See https://tools.ietf.org/html/rfc3986#section-5.2.4
1305  */
1306 static void
1307 remove_dot_segments (gchar *path)
1308 {
1309   /* The output can be written to the same buffer that the input
1310    * is read from, as the output pointer is only ever increased
1311    * when the input pointer is increased as well, and the input
1312    * pointer is never decreased. */
1313   gchar *input = path;
1314   gchar *output = path;
1315
1316   if (!*path)
1317     return;
1318
1319   while (*input)
1320     {
1321       /*  A.  If the input buffer begins with a prefix of "../" or "./",
1322        *      then remove that prefix from the input buffer; otherwise,
1323        */
1324       if (strncmp (input, "../", 3) == 0)
1325         input += 3;
1326       else if (strncmp (input, "./", 2) == 0)
1327         input += 2;
1328
1329       /*  B.  if the input buffer begins with a prefix of "/./" or "/.",
1330        *      where "." is a complete path segment, then replace that
1331        *      prefix with "/" in the input buffer; otherwise,
1332        */
1333       else if (strncmp (input, "/./", 3) == 0)
1334         input += 2;
1335       else if (strcmp (input, "/.") == 0)
1336         input[1] = '\0';
1337
1338       /*  C.  if the input buffer begins with a prefix of "/../" or "/..",
1339        *      where ".." is a complete path segment, then replace that
1340        *      prefix with "/" in the input buffer and remove the last
1341        *      segment and its preceding "/" (if any) from the output
1342        *      buffer; otherwise,
1343        */
1344       else if (strncmp (input, "/../", 4) == 0)
1345         {
1346           input += 3;
1347           if (output > path)
1348             {
1349               do
1350                 {
1351                   output--;
1352                 }
1353               while (*output != '/' && output > path);
1354             }
1355         }
1356       else if (strcmp (input, "/..") == 0)
1357         {
1358           input[1] = '\0';
1359           if (output > path)
1360             {
1361               do
1362                  {
1363                    output--;
1364                  }
1365               while (*output != '/' && output > path);
1366             }
1367         }
1368
1369       /*  D.  if the input buffer consists only of "." or "..", then remove
1370        *      that from the input buffer; otherwise,
1371        */
1372       else if (strcmp (input, "..") == 0 || strcmp (input, ".") == 0)
1373         input[0] = '\0';
1374
1375       /*  E.  move the first path segment in the input buffer to the end of
1376        *      the output buffer, including the initial "/" character (if
1377        *      any) and any subsequent characters up to, but not including,
1378        *      the next "/" character or the end of the input buffer.
1379        */
1380       else
1381         {
1382           *output++ = *input++;
1383           while (*input && *input != '/')
1384             *output++ = *input++;
1385         }
1386     }
1387   *output = '\0';
1388 }
1389
1390 /**
1391  * g_uri_parse:
1392  * @uri_string: a string representing an absolute URI
1393  * @flags: flags describing how to parse @uri_string
1394  * @error: #GError for error reporting, or %NULL to ignore.
1395  *
1396  * Parses @uri_string according to @flags. If the result is not a
1397  * valid [absolute URI][relative-absolute-uris], it will be discarded, and an
1398  * error returned.
1399  *
1400  * Return value: (transfer full): a new #GUri, or NULL on error.
1401  *
1402  * Since: 2.66
1403  */
1404 GUri *
1405 g_uri_parse (const gchar  *uri_string,
1406              GUriFlags     flags,
1407              GError      **error)
1408 {
1409   g_return_val_if_fail (uri_string != NULL, NULL);
1410   g_return_val_if_fail (error == NULL || *error == NULL, NULL);
1411
1412   return g_uri_parse_relative (NULL, uri_string, flags, error);
1413 }
1414
1415 /**
1416  * g_uri_parse_relative:
1417  * @base_uri: (nullable) (transfer none): a base absolute URI
1418  * @uri_ref: a string representing a relative or absolute URI
1419  * @flags: flags describing how to parse @uri_ref
1420  * @error: #GError for error reporting, or %NULL to ignore.
1421  *
1422  * Parses @uri_ref according to @flags and, if it is a
1423  * [relative URI][relative-absolute-uris], resolves it relative to @base_uri.
1424  * If the result is not a valid absolute URI, it will be discarded, and an error
1425  * returned.
1426  *
1427  * Return value: (transfer full): a new #GUri, or NULL on error.
1428  *
1429  * Since: 2.66
1430  */
1431 GUri *
1432 g_uri_parse_relative (GUri         *base_uri,
1433                       const gchar  *uri_ref,
1434                       GUriFlags     flags,
1435                       GError      **error)
1436 {
1437   GUri *uri = NULL;
1438
1439   g_return_val_if_fail (uri_ref != NULL, NULL);
1440   g_return_val_if_fail (error == NULL || *error == NULL, NULL);
1441   g_return_val_if_fail (base_uri == NULL || base_uri->scheme != NULL, NULL);
1442
1443   /* Use GUri struct to construct the return value: there is no guarantee it is
1444    * actually correct within the function body. */
1445   uri = g_atomic_rc_box_new0 (GUri);
1446   uri->flags = flags;
1447
1448   if (!g_uri_split_internal (uri_ref, flags,
1449                              &uri->scheme, &uri->userinfo,
1450                              &uri->user, &uri->password, &uri->auth_params,
1451                              &uri->host, &uri->port,
1452                              &uri->path, &uri->query, &uri->fragment,
1453                              error))
1454     {
1455       g_uri_unref (uri);
1456       return NULL;
1457     }
1458
1459   if (!uri->scheme && !base_uri)
1460     {
1461       g_set_error_literal (error, G_URI_ERROR, G_URI_ERROR_FAILED,
1462                            _("URI is not absolute, and no base URI was provided"));
1463       g_uri_unref (uri);
1464       return NULL;
1465     }
1466
1467   if (base_uri)
1468     {
1469       /* This is section 5.2.2 of RFC 3986, except that we're doing
1470        * it in place in @uri rather than copying from R to T.
1471        *
1472        * See https://tools.ietf.org/html/rfc3986#section-5.2.2
1473        */
1474       if (uri->scheme)
1475         remove_dot_segments (uri->path);
1476       else
1477         {
1478           uri->scheme = g_strdup (base_uri->scheme);
1479           if (uri->host)
1480             remove_dot_segments (uri->path);
1481           else
1482             {
1483               if (!*uri->path)
1484                 {
1485                   g_free (uri->path);
1486                   uri->path = g_strdup (base_uri->path);
1487                   if (!uri->query)
1488                     uri->query = g_strdup (base_uri->query);
1489                 }
1490               else
1491                 {
1492                   if (*uri->path == '/')
1493                     remove_dot_segments (uri->path);
1494                   else
1495                     {
1496                       gchar *newpath, *last;
1497
1498                       last = strrchr (base_uri->path, '/');
1499                       if (last)
1500                         {
1501                           newpath = g_strdup_printf ("%.*s/%s",
1502                                                      (gint)(last - base_uri->path),
1503                                                      base_uri->path,
1504                                                      uri->path);
1505                         }
1506                       else
1507                         newpath = g_strdup_printf ("/%s", uri->path);
1508
1509                       g_free (uri->path);
1510                       uri->path = g_steal_pointer (&newpath);
1511
1512                       remove_dot_segments (uri->path);
1513                     }
1514                 }
1515
1516               uri->userinfo = g_strdup (base_uri->userinfo);
1517               uri->user = g_strdup (base_uri->user);
1518               uri->password = g_strdup (base_uri->password);
1519               uri->auth_params = g_strdup (base_uri->auth_params);
1520               uri->host = g_strdup (base_uri->host);
1521               uri->port = base_uri->port;
1522             }
1523         }
1524
1525       /* Scheme normalization couldn't have been done earlier
1526        * as the relative URI may not have had a scheme */
1527       if (flags & G_URI_FLAGS_SCHEME_NORMALIZE)
1528         {
1529           if (should_normalize_empty_path (uri->scheme) && !*uri->path)
1530             {
1531               g_free (uri->path);
1532               uri->path = g_strdup ("/");
1533             }
1534
1535           uri->port = normalize_port (uri->scheme, uri->port);
1536         }
1537     }
1538   else
1539     {
1540       remove_dot_segments (uri->path);
1541     }
1542
1543   return g_steal_pointer (&uri);
1544 }
1545
1546 /**
1547  * g_uri_resolve_relative:
1548  * @base_uri_string: (nullable): a string representing a base URI
1549  * @uri_ref: a string representing a relative or absolute URI
1550  * @flags: flags describing how to parse @uri_ref
1551  * @error: #GError for error reporting, or %NULL to ignore.
1552  *
1553  * Parses @uri_ref according to @flags and, if it is a
1554  * [relative URI][relative-absolute-uris], resolves it relative to
1555  * @base_uri_string. If the result is not a valid absolute URI, it will be
1556  * discarded, and an error returned.
1557  *
1558  * (If @base_uri_string is %NULL, this just returns @uri_ref, or
1559  * %NULL if @uri_ref is invalid or not absolute.)
1560  *
1561  * Return value: (transfer full): the resolved URI string,
1562  * or NULL on error.
1563  *
1564  * Since: 2.66
1565  */
1566 gchar *
1567 g_uri_resolve_relative (const gchar  *base_uri_string,
1568                         const gchar  *uri_ref,
1569                         GUriFlags     flags,
1570                         GError      **error)
1571 {
1572   GUri *base_uri, *resolved_uri;
1573   gchar *resolved_uri_string;
1574
1575   g_return_val_if_fail (uri_ref != NULL, NULL);
1576   g_return_val_if_fail (error == NULL || *error == NULL, NULL);
1577
1578   flags |= G_URI_FLAGS_ENCODED;
1579
1580   if (base_uri_string)
1581     {
1582       base_uri = g_uri_parse (base_uri_string, flags, error);
1583       if (!base_uri)
1584         return NULL;
1585     }
1586   else
1587     base_uri = NULL;
1588
1589   resolved_uri = g_uri_parse_relative (base_uri, uri_ref, flags, error);
1590   if (base_uri)
1591     g_uri_unref (base_uri);
1592   if (!resolved_uri)
1593     return NULL;
1594
1595   resolved_uri_string = g_uri_to_string (resolved_uri);
1596   g_uri_unref (resolved_uri);
1597   return g_steal_pointer (&resolved_uri_string);
1598 }
1599
1600 /* userinfo as a whole can contain sub-delims + ":", but split-out
1601  * user can't contain ":" or ";", and split-out password can't contain
1602  * ";".
1603  */
1604 #define USERINFO_ALLOWED_CHARS G_URI_RESERVED_CHARS_ALLOWED_IN_USERINFO
1605 #define USER_ALLOWED_CHARS "!$&'()*+,="
1606 #define PASSWORD_ALLOWED_CHARS "!$&'()*+,=:"
1607 #define AUTH_PARAMS_ALLOWED_CHARS USERINFO_ALLOWED_CHARS
1608 #define IP_ADDR_ALLOWED_CHARS ":"
1609 #define HOST_ALLOWED_CHARS G_URI_RESERVED_CHARS_SUBCOMPONENT_DELIMITERS
1610 #define PATH_ALLOWED_CHARS G_URI_RESERVED_CHARS_ALLOWED_IN_PATH
1611 #define QUERY_ALLOWED_CHARS G_URI_RESERVED_CHARS_ALLOWED_IN_PATH "?"
1612 #define FRAGMENT_ALLOWED_CHARS G_URI_RESERVED_CHARS_ALLOWED_IN_PATH "?"
1613
1614 static gchar *
1615 g_uri_join_internal (GUriFlags    flags,
1616                      const gchar *scheme,
1617                      gboolean     userinfo,
1618                      const gchar *user,
1619                      const gchar *password,
1620                      const gchar *auth_params,
1621                      const gchar *host,
1622                      gint         port,
1623                      const gchar *path,
1624                      const gchar *query,
1625                      const gchar *fragment)
1626 {
1627   gboolean encoded = (flags & G_URI_FLAGS_ENCODED);
1628   GString *str;
1629   char *normalized_scheme = NULL;
1630
1631   /* Restrictions on path prefixes. See:
1632    * https://tools.ietf.org/html/rfc3986#section-3
1633    */
1634   g_return_val_if_fail (path != NULL, NULL);
1635   g_return_val_if_fail (host == NULL || (path[0] == '\0' || path[0] == '/'), NULL);
1636   g_return_val_if_fail (host != NULL || (path[0] != '/' || path[1] != '/'), NULL);
1637
1638   /* Arbitrarily chosen default size which should handle most average length
1639    * URIs. This should avoid a few reallocations of the buffer in most cases.
1640    * It’s 1B shorter than a power of two, since GString will add a
1641    * nul-terminator byte. */
1642   str = g_string_sized_new (127);
1643
1644   if (scheme)
1645     {
1646       g_string_append (str, scheme);
1647       g_string_append_c (str, ':');
1648     }
1649
1650   if (flags & G_URI_FLAGS_SCHEME_NORMALIZE && scheme && ((host && port != -1) || path[0] == '\0'))
1651     normalized_scheme = g_ascii_strdown (scheme, -1);
1652
1653   if (host)
1654     {
1655       g_string_append (str, "//");
1656
1657       if (user)
1658         {
1659           if (encoded)
1660             g_string_append (str, user);
1661           else
1662             {
1663               if (userinfo)
1664                 g_string_append_uri_escaped (str, user, USERINFO_ALLOWED_CHARS, TRUE);
1665               else
1666                 /* Encode ':' and ';' regardless of whether we have a
1667                  * password or auth params, since it may be parsed later
1668                  * under the assumption that it does.
1669                  */
1670                 g_string_append_uri_escaped (str, user, USER_ALLOWED_CHARS, TRUE);
1671             }
1672
1673           if (password)
1674             {
1675               g_string_append_c (str, ':');
1676               if (encoded)
1677                 g_string_append (str, password);
1678               else
1679                 g_string_append_uri_escaped (str, password,
1680                                              PASSWORD_ALLOWED_CHARS, TRUE);
1681             }
1682
1683           if (auth_params)
1684             {
1685               g_string_append_c (str, ';');
1686               if (encoded)
1687                 g_string_append (str, auth_params);
1688               else
1689                 g_string_append_uri_escaped (str, auth_params,
1690                                              AUTH_PARAMS_ALLOWED_CHARS, TRUE);
1691             }
1692
1693           g_string_append_c (str, '@');
1694         }
1695
1696       if (strchr (host, ':') && g_hostname_is_ip_address (host))
1697         {
1698           g_string_append_c (str, '[');
1699           if (encoded)
1700             g_string_append (str, host);
1701           else
1702             g_string_append_uri_escaped (str, host, IP_ADDR_ALLOWED_CHARS, TRUE);
1703           g_string_append_c (str, ']');
1704         }
1705       else
1706         {
1707           if (encoded)
1708             g_string_append (str, host);
1709           else
1710             g_string_append_uri_escaped (str, host, HOST_ALLOWED_CHARS, TRUE);
1711         }
1712
1713       if (port != -1 && (!normalized_scheme || normalize_port (normalized_scheme, port) != -1))
1714         g_string_append_printf (str, ":%d", port);
1715     }
1716
1717   if (path[0] == '\0' && normalized_scheme && should_normalize_empty_path (normalized_scheme))
1718     g_string_append (str, "/");
1719   else if (encoded || flags & G_URI_FLAGS_ENCODED_PATH)
1720     g_string_append (str, path);
1721   else
1722     g_string_append_uri_escaped (str, path, PATH_ALLOWED_CHARS, TRUE);
1723
1724   g_free (normalized_scheme);
1725
1726   if (query)
1727     {
1728       g_string_append_c (str, '?');
1729       if (encoded || flags & G_URI_FLAGS_ENCODED_QUERY)
1730         g_string_append (str, query);
1731       else
1732         g_string_append_uri_escaped (str, query, QUERY_ALLOWED_CHARS, TRUE);
1733     }
1734   if (fragment)
1735     {
1736       g_string_append_c (str, '#');
1737       if (encoded || flags & G_URI_FLAGS_ENCODED_FRAGMENT)
1738         g_string_append (str, fragment);
1739       else
1740         g_string_append_uri_escaped (str, fragment, FRAGMENT_ALLOWED_CHARS, TRUE);
1741     }
1742
1743   return g_string_free (str, FALSE);
1744 }
1745
1746 /**
1747  * g_uri_join:
1748  * @flags: flags describing how to build the URI string
1749  * @scheme: (nullable): the URI scheme, or %NULL
1750  * @userinfo: (nullable): the userinfo component, or %NULL
1751  * @host: (nullable): the host component, or %NULL
1752  * @port: the port, or `-1`
1753  * @path: (not nullable): the path component
1754  * @query: (nullable): the query component, or %NULL
1755  * @fragment: (nullable): the fragment, or %NULL
1756  *
1757  * Joins the given components together according to @flags to create
1758  * an absolute URI string. @path may not be %NULL (though it may be the empty
1759  * string).
1760  *
1761  * When @host is present, @path must either be empty or begin with a slash (`/`)
1762  * character. When @host is not present, @path cannot begin with two slash
1763    characters (`//`). See
1764  * [RFC 3986, section 3](https://tools.ietf.org/html/rfc3986#section-3).
1765  *
1766  * See also g_uri_join_with_user(), which allows specifying the
1767  * components of the ‘userinfo’ separately.
1768  *
1769  * %G_URI_FLAGS_HAS_PASSWORD and %G_URI_FLAGS_HAS_AUTH_PARAMS are ignored if set
1770  * in @flags.
1771  *
1772  * Return value: (not nullable) (transfer full): an absolute URI string
1773  *
1774  * Since: 2.66
1775  */
1776 gchar *
1777 g_uri_join (GUriFlags    flags,
1778             const gchar *scheme,
1779             const gchar *userinfo,
1780             const gchar *host,
1781             gint         port,
1782             const gchar *path,
1783             const gchar *query,
1784             const gchar *fragment)
1785 {
1786   g_return_val_if_fail (port >= -1 && port <= 65535, NULL);
1787   g_return_val_if_fail (path != NULL, NULL);
1788
1789   return g_uri_join_internal (flags,
1790                               scheme,
1791                               TRUE, userinfo, NULL, NULL,
1792                               host,
1793                               port,
1794                               path,
1795                               query,
1796                               fragment);
1797 }
1798
1799 /**
1800  * g_uri_join_with_user:
1801  * @flags: flags describing how to build the URI string
1802  * @scheme: (nullable): the URI scheme, or %NULL
1803  * @user: (nullable): the user component of the userinfo, or %NULL
1804  * @password: (nullable): the password component of the userinfo, or
1805  *   %NULL
1806  * @auth_params: (nullable): the auth params of the userinfo, or
1807  *   %NULL
1808  * @host: (nullable): the host component, or %NULL
1809  * @port: the port, or `-1`
1810  * @path: (not nullable): the path component
1811  * @query: (nullable): the query component, or %NULL
1812  * @fragment: (nullable): the fragment, or %NULL
1813  *
1814  * Joins the given components together according to @flags to create
1815  * an absolute URI string. @path may not be %NULL (though it may be the empty
1816  * string).
1817  *
1818  * In contrast to g_uri_join(), this allows specifying the components
1819  * of the ‘userinfo’ separately. It otherwise behaves the same.
1820  *
1821  * %G_URI_FLAGS_HAS_PASSWORD and %G_URI_FLAGS_HAS_AUTH_PARAMS are ignored if set
1822  * in @flags.
1823  *
1824  * Return value: (not nullable) (transfer full): an absolute URI string
1825  *
1826  * Since: 2.66
1827  */
1828 gchar *
1829 g_uri_join_with_user (GUriFlags    flags,
1830                       const gchar *scheme,
1831                       const gchar *user,
1832                       const gchar *password,
1833                       const gchar *auth_params,
1834                       const gchar *host,
1835                       gint         port,
1836                       const gchar *path,
1837                       const gchar *query,
1838                       const gchar *fragment)
1839 {
1840   g_return_val_if_fail (port >= -1 && port <= 65535, NULL);
1841   g_return_val_if_fail (path != NULL, NULL);
1842
1843   return g_uri_join_internal (flags,
1844                               scheme,
1845                               FALSE, user, password, auth_params,
1846                               host,
1847                               port,
1848                               path,
1849                               query,
1850                               fragment);
1851 }
1852
1853 /**
1854  * g_uri_build:
1855  * @flags: flags describing how to build the #GUri
1856  * @scheme: (not nullable): the URI scheme
1857  * @userinfo: (nullable): the userinfo component, or %NULL
1858  * @host: (nullable): the host component, or %NULL
1859  * @port: the port, or `-1`
1860  * @path: (not nullable): the path component
1861  * @query: (nullable): the query component, or %NULL
1862  * @fragment: (nullable): the fragment, or %NULL
1863  *
1864  * Creates a new #GUri from the given components according to @flags.
1865  *
1866  * See also g_uri_build_with_user(), which allows specifying the
1867  * components of the "userinfo" separately.
1868  *
1869  * Return value: (not nullable) (transfer full): a new #GUri
1870  *
1871  * Since: 2.66
1872  */
1873 GUri *
1874 g_uri_build (GUriFlags    flags,
1875              const gchar *scheme,
1876              const gchar *userinfo,
1877              const gchar *host,
1878              gint         port,
1879              const gchar *path,
1880              const gchar *query,
1881              const gchar *fragment)
1882 {
1883   GUri *uri;
1884
1885   g_return_val_if_fail (scheme != NULL, NULL);
1886   g_return_val_if_fail (port >= -1 && port <= 65535, NULL);
1887   g_return_val_if_fail (path != NULL, NULL);
1888
1889   uri = g_atomic_rc_box_new0 (GUri);
1890   uri->flags = flags;
1891   uri->scheme = g_ascii_strdown (scheme, -1);
1892   uri->userinfo = g_strdup (userinfo);
1893   uri->host = g_strdup (host);
1894   uri->port = port;
1895   uri->path = g_strdup (path);
1896   uri->query = g_strdup (query);
1897   uri->fragment = g_strdup (fragment);
1898
1899   return g_steal_pointer (&uri);
1900 }
1901
1902 /**
1903  * g_uri_build_with_user:
1904  * @flags: flags describing how to build the #GUri
1905  * @scheme: (not nullable): the URI scheme
1906  * @user: (nullable): the user component of the userinfo, or %NULL
1907  * @password: (nullable): the password component of the userinfo, or %NULL
1908  * @auth_params: (nullable): the auth params of the userinfo, or %NULL
1909  * @host: (nullable): the host component, or %NULL
1910  * @port: the port, or `-1`
1911  * @path: (not nullable): the path component
1912  * @query: (nullable): the query component, or %NULL
1913  * @fragment: (nullable): the fragment, or %NULL
1914  *
1915  * Creates a new #GUri from the given components according to @flags
1916  * (%G_URI_FLAGS_HAS_PASSWORD is added unconditionally). The @flags must be
1917  * coherent with the passed values, in particular use `%`-encoded values with
1918  * %G_URI_FLAGS_ENCODED.
1919  *
1920  * In contrast to g_uri_build(), this allows specifying the components
1921  * of the ‘userinfo’ field separately. Note that @user must be non-%NULL
1922  * if either @password or @auth_params is non-%NULL.
1923  *
1924  * Return value: (not nullable) (transfer full): a new #GUri
1925  *
1926  * Since: 2.66
1927  */
1928 GUri *
1929 g_uri_build_with_user (GUriFlags    flags,
1930                        const gchar *scheme,
1931                        const gchar *user,
1932                        const gchar *password,
1933                        const gchar *auth_params,
1934                        const gchar *host,
1935                        gint         port,
1936                        const gchar *path,
1937                        const gchar *query,
1938                        const gchar *fragment)
1939 {
1940   GUri *uri;
1941   GString *userinfo;
1942
1943   g_return_val_if_fail (scheme != NULL, NULL);
1944   g_return_val_if_fail (password == NULL || user != NULL, NULL);
1945   g_return_val_if_fail (auth_params == NULL || user != NULL, NULL);
1946   g_return_val_if_fail (port >= -1 && port <= 65535, NULL);
1947   g_return_val_if_fail (path != NULL, NULL);
1948
1949   uri = g_atomic_rc_box_new0 (GUri);
1950   uri->flags = flags | G_URI_FLAGS_HAS_PASSWORD;
1951   uri->scheme = g_ascii_strdown (scheme, -1);
1952   uri->user = g_strdup (user);
1953   uri->password = g_strdup (password);
1954   uri->auth_params = g_strdup (auth_params);
1955   uri->host = g_strdup (host);
1956   uri->port = port;
1957   uri->path = g_strdup (path);
1958   uri->query = g_strdup (query);
1959   uri->fragment = g_strdup (fragment);
1960
1961   if (user)
1962     {
1963       userinfo = g_string_new (user);
1964       if (password)
1965         {
1966           g_string_append_c (userinfo, ':');
1967           g_string_append (userinfo, uri->password);
1968         }
1969       if (auth_params)
1970         {
1971           g_string_append_c (userinfo, ';');
1972           g_string_append (userinfo, uri->auth_params);
1973         }
1974       uri->userinfo = g_string_free (userinfo, FALSE);
1975     }
1976
1977   return g_steal_pointer (&uri);
1978 }
1979
1980 /**
1981  * g_uri_to_string:
1982  * @uri: a #GUri
1983  *
1984  * Returns a string representing @uri.
1985  *
1986  * This is not guaranteed to return a string which is identical to the
1987  * string that @uri was parsed from. However, if the source URI was
1988  * syntactically correct (according to RFC 3986), and it was parsed
1989  * with %G_URI_FLAGS_ENCODED, then g_uri_to_string() is guaranteed to return
1990  * a string which is at least semantically equivalent to the source
1991  * URI (according to RFC 3986).
1992  *
1993  * If @uri might contain sensitive details, such as authentication parameters,
1994  * or private data in its query string, and the returned string is going to be
1995  * logged, then consider using g_uri_to_string_partial() to redact parts.
1996  *
1997  * Return value: (not nullable) (transfer full): a string representing @uri,
1998  *     which the caller must free.
1999  *
2000  * Since: 2.66
2001  */
2002 gchar *
2003 g_uri_to_string (GUri *uri)
2004 {
2005   g_return_val_if_fail (uri != NULL, NULL);
2006
2007   return g_uri_to_string_partial (uri, G_URI_HIDE_NONE);
2008 }
2009
2010 /**
2011  * g_uri_to_string_partial:
2012  * @uri: a #GUri
2013  * @flags: flags describing what parts of @uri to hide
2014  *
2015  * Returns a string representing @uri, subject to the options in
2016  * @flags. See g_uri_to_string() and #GUriHideFlags for more details.
2017  *
2018  * Return value: (not nullable) (transfer full): a string representing
2019  *     @uri, which the caller must free.
2020  *
2021  * Since: 2.66
2022  */
2023 gchar *
2024 g_uri_to_string_partial (GUri          *uri,
2025                          GUriHideFlags  flags)
2026 {
2027   gboolean hide_user = (flags & G_URI_HIDE_USERINFO);
2028   gboolean hide_password = (flags & (G_URI_HIDE_USERINFO | G_URI_HIDE_PASSWORD));
2029   gboolean hide_auth_params = (flags & (G_URI_HIDE_USERINFO | G_URI_HIDE_AUTH_PARAMS));
2030   gboolean hide_query = (flags & G_URI_HIDE_QUERY);
2031   gboolean hide_fragment = (flags & G_URI_HIDE_FRAGMENT);
2032
2033   g_return_val_if_fail (uri != NULL, NULL);
2034
2035   if (uri->flags & (G_URI_FLAGS_HAS_PASSWORD | G_URI_FLAGS_HAS_AUTH_PARAMS))
2036     {
2037       return g_uri_join_with_user (uri->flags,
2038                                    uri->scheme,
2039                                    hide_user ? NULL : uri->user,
2040                                    hide_password ? NULL : uri->password,
2041                                    hide_auth_params ? NULL : uri->auth_params,
2042                                    uri->host,
2043                                    uri->port,
2044                                    uri->path,
2045                                    hide_query ? NULL : uri->query,
2046                                    hide_fragment ? NULL : uri->fragment);
2047     }
2048
2049   return g_uri_join (uri->flags,
2050                      uri->scheme,
2051                      hide_user ? NULL : uri->userinfo,
2052                      uri->host,
2053                      uri->port,
2054                      uri->path,
2055                      hide_query ? NULL : uri->query,
2056                      hide_fragment ? NULL : uri->fragment);
2057 }
2058
2059 /* This is just a copy of g_str_hash() with g_ascii_toupper() added */
2060 static guint
2061 str_ascii_case_hash (gconstpointer v)
2062 {
2063   const signed char *p;
2064   guint32 h = 5381;
2065
2066   for (p = v; *p != '\0'; p++)
2067     h = (h << 5) + h + g_ascii_toupper (*p);
2068
2069   return h;
2070 }
2071
2072 static gboolean
2073 str_ascii_case_equal (gconstpointer v1,
2074                       gconstpointer v2)
2075 {
2076   const gchar *string1 = v1;
2077   const gchar *string2 = v2;
2078
2079   return g_ascii_strcasecmp (string1, string2) == 0;
2080 }
2081
2082 /**
2083  * GUriParamsIter:
2084  *
2085  * Many URI schemes include one or more attribute/value pairs as part of the URI
2086  * value. For example `scheme://server/path?query=string&is=there` has two
2087  * attributes – `query=string` and `is=there` – in its query part.
2088  *
2089  * A #GUriParamsIter structure represents an iterator that can be used to
2090  * iterate over the attribute/value pairs of a URI query string. #GUriParamsIter
2091  * structures are typically allocated on the stack and then initialized with
2092  * g_uri_params_iter_init(). See the documentation for g_uri_params_iter_init()
2093  * for a usage example.
2094  *
2095  * Since: 2.66
2096  */
2097 typedef struct
2098 {
2099   GUriParamsFlags flags;
2100   const gchar    *attr;
2101   const gchar    *end;
2102   guint8          sep_table[256]; /* 1 = index is a separator; 0 otherwise */
2103 } RealIter;
2104
2105 G_STATIC_ASSERT (sizeof (GUriParamsIter) == sizeof (RealIter));
2106 G_STATIC_ASSERT (G_ALIGNOF (GUriParamsIter) >= G_ALIGNOF (RealIter));
2107
2108 /**
2109  * g_uri_params_iter_init:
2110  * @iter: an uninitialized #GUriParamsIter
2111  * @params: a `%`-encoded string containing `attribute=value`
2112  *   parameters
2113  * @length: the length of @params, or `-1` if it is nul-terminated
2114  * @separators: the separator byte character set between parameters. (usually
2115  *   `&`, but sometimes `;` or both `&;`). Note that this function works on
2116  *   bytes not characters, so it can't be used to delimit UTF-8 strings for
2117  *   anything but ASCII characters. You may pass an empty set, in which case
2118  *   no splitting will occur.
2119  * @flags: flags to modify the way the parameters are handled.
2120  *
2121  * Initializes an attribute/value pair iterator.
2122  *
2123  * The iterator keeps pointers to the @params and @separators arguments, those
2124  * variables must thus outlive the iterator and not be modified during the
2125  * iteration.
2126  *
2127  * If %G_URI_PARAMS_WWW_FORM is passed in @flags, `+` characters in the param
2128  * string will be replaced with spaces in the output. For example, `foo=bar+baz`
2129  * will give attribute `foo` with value `bar baz`. This is commonly used on the
2130  * web (the `https` and `http` schemes only), but is deprecated in favour of
2131  * the equivalent of encoding spaces as `%20`.
2132  *
2133  * Unlike with g_uri_parse_params(), %G_URI_PARAMS_CASE_INSENSITIVE has no
2134  * effect if passed to @flags for g_uri_params_iter_init(). The caller is
2135  * responsible for doing their own case-insensitive comparisons.
2136  *
2137  * |[<!-- language="C" -->
2138  * GUriParamsIter iter;
2139  * GError *error = NULL;
2140  * gchar *unowned_attr, *unowned_value;
2141  *
2142  * g_uri_params_iter_init (&iter, "foo=bar&baz=bar&Foo=frob&baz=bar2", -1, "&", G_URI_PARAMS_NONE);
2143  * while (g_uri_params_iter_next (&iter, &unowned_attr, &unowned_value, &error))
2144  *   {
2145  *     g_autofree gchar *attr = g_steal_pointer (&unowned_attr);
2146  *     g_autofree gchar *value = g_steal_pointer (&unowned_value);
2147  *     // do something with attr and value; this code will be called 4 times
2148  *     // for the params string in this example: once with attr=foo and value=bar,
2149  *     // then with baz/bar, then Foo/frob, then baz/bar2.
2150  *   }
2151  * if (error)
2152  *   // handle parsing error
2153  * ]|
2154  *
2155  * Since: 2.66
2156  */
2157 void
2158 g_uri_params_iter_init (GUriParamsIter *iter,
2159                         const gchar    *params,
2160                         gssize          length,
2161                         const gchar    *separators,
2162                         GUriParamsFlags flags)
2163 {
2164   RealIter *ri = (RealIter *)iter;
2165   const gchar *s;
2166
2167   g_return_if_fail (iter != NULL);
2168   g_return_if_fail (length == 0 || params != NULL);
2169   g_return_if_fail (length >= -1);
2170   g_return_if_fail (separators != NULL);
2171
2172   ri->flags = flags;
2173
2174   if (length == -1)
2175     ri->end = params + strlen (params);
2176   else
2177     ri->end = params + length;
2178
2179   memset (ri->sep_table, FALSE, sizeof (ri->sep_table));
2180   for (s = separators; *s != '\0'; ++s)
2181     ri->sep_table[*(guchar *)s] = TRUE;
2182
2183   ri->attr = params;
2184 }
2185
2186 /**
2187  * g_uri_params_iter_next:
2188  * @iter: an initialized #GUriParamsIter
2189  * @attribute: (out) (nullable) (optional) (transfer full): on return, contains
2190  *     the attribute, or %NULL.
2191  * @value: (out) (nullable) (optional) (transfer full): on return, contains
2192  *     the value, or %NULL.
2193  * @error: #GError for error reporting, or %NULL to ignore.
2194  *
2195  * Advances @iter and retrieves the next attribute/value. %FALSE is returned if
2196  * an error has occurred (in which case @error is set), or if the end of the
2197  * iteration is reached (in which case @attribute and @value are set to %NULL
2198  * and the iterator becomes invalid). If %TRUE is returned,
2199  * g_uri_params_iter_next() may be called again to receive another
2200  * attribute/value pair.
2201  *
2202  * Note that the same @attribute may be returned multiple times, since URIs
2203  * allow repeated attributes.
2204  *
2205  * Returns: %FALSE if the end of the parameters has been reached or an error was
2206  *     encountered. %TRUE otherwise.
2207  *
2208  * Since: 2.66
2209  */
2210 gboolean
2211 g_uri_params_iter_next (GUriParamsIter *iter,
2212                         gchar         **attribute,
2213                         gchar         **value,
2214                         GError        **error)
2215 {
2216   RealIter *ri = (RealIter *)iter;
2217   const gchar *attr_end, *val, *val_end;
2218   gchar *decoded_attr, *decoded_value;
2219   gboolean www_form = ri->flags & G_URI_PARAMS_WWW_FORM;
2220   GUriFlags decode_flags = G_URI_FLAGS_NONE;
2221
2222   g_return_val_if_fail (iter != NULL, FALSE);
2223   g_return_val_if_fail (error == NULL || *error == NULL, FALSE);
2224
2225   /* Pre-clear these in case of failure or finishing. */
2226   if (attribute)
2227     *attribute = NULL;
2228   if (value)
2229     *value = NULL;
2230
2231   if (ri->attr >= ri->end)
2232     return FALSE;
2233
2234   if (ri->flags & G_URI_PARAMS_PARSE_RELAXED)
2235     decode_flags |= G_URI_FLAGS_PARSE_RELAXED;
2236
2237   /* Check if each character in @attr is a separator, by indexing by the
2238    * character value into the @sep_table, which has value 1 stored at an
2239    * index if that index is a separator. */
2240   for (val_end = ri->attr; val_end < ri->end; val_end++)
2241     if (ri->sep_table[*(guchar *)val_end])
2242       break;
2243
2244   attr_end = memchr (ri->attr, '=', val_end - ri->attr);
2245   if (!attr_end)
2246     {
2247       g_set_error_literal (error, G_URI_ERROR, G_URI_ERROR_FAILED,
2248                            _("Missing ‘=’ and parameter value"));
2249       return FALSE;
2250     }
2251   if (!uri_decode (&decoded_attr, NULL, ri->attr, attr_end - ri->attr,
2252                    www_form, decode_flags, G_URI_ERROR_FAILED, error))
2253     {
2254       return FALSE;
2255     }
2256
2257   val = attr_end + 1;
2258   if (!uri_decode (&decoded_value, NULL, val, val_end - val,
2259                    www_form, decode_flags, G_URI_ERROR_FAILED, error))
2260     {
2261       g_free (decoded_attr);
2262       return FALSE;
2263     }
2264
2265   if (attribute)
2266     *attribute = g_steal_pointer (&decoded_attr);
2267   if (value)
2268     *value = g_steal_pointer (&decoded_value);
2269
2270   g_free (decoded_attr);
2271   g_free (decoded_value);
2272
2273   ri->attr = val_end + 1;
2274   return TRUE;
2275 }
2276
2277 /**
2278  * g_uri_parse_params:
2279  * @params: a `%`-encoded string containing `attribute=value`
2280  *   parameters
2281  * @length: the length of @params, or `-1` if it is nul-terminated
2282  * @separators: the separator byte character set between parameters. (usually
2283  *   `&`, but sometimes `;` or both `&;`). Note that this function works on
2284  *   bytes not characters, so it can't be used to delimit UTF-8 strings for
2285  *   anything but ASCII characters. You may pass an empty set, in which case
2286  *   no splitting will occur.
2287  * @flags: flags to modify the way the parameters are handled.
2288  * @error: #GError for error reporting, or %NULL to ignore.
2289  *
2290  * Many URI schemes include one or more attribute/value pairs as part of the URI
2291  * value. This method can be used to parse them into a hash table. When an
2292  * attribute has multiple occurrences, the last value is the final returned
2293  * value. If you need to handle repeated attributes differently, use
2294  * #GUriParamsIter.
2295  *
2296  * The @params string is assumed to still be `%`-encoded, but the returned
2297  * values will be fully decoded. (Thus it is possible that the returned values
2298  * may contain `=` or @separators, if the value was encoded in the input.)
2299  * Invalid `%`-encoding is treated as with the %G_URI_FLAGS_PARSE_RELAXED
2300  * rules for g_uri_parse(). (However, if @params is the path or query string
2301  * from a #GUri that was parsed without %G_URI_FLAGS_PARSE_RELAXED and
2302  * %G_URI_FLAGS_ENCODED, then you already know that it does not contain any
2303  * invalid encoding.)
2304  *
2305  * %G_URI_PARAMS_WWW_FORM is handled as documented for g_uri_params_iter_init().
2306  *
2307  * If %G_URI_PARAMS_CASE_INSENSITIVE is passed to @flags, attributes will be
2308  * compared case-insensitively, so a params string `attr=123&Attr=456` will only
2309  * return a single attribute–value pair, `Attr=456`. Case will be preserved in
2310  * the returned attributes.
2311  *
2312  * If @params cannot be parsed (for example, it contains two @separators
2313  * characters in a row), then @error is set and %NULL is returned.
2314  *
2315  * Return value: (transfer full) (element-type utf8 utf8):
2316  *     A hash table of attribute/value pairs, with both names and values
2317  *     fully-decoded; or %NULL on error.
2318  *
2319  * Since: 2.66
2320  */
2321 GHashTable *
2322 g_uri_parse_params (const gchar     *params,
2323                     gssize           length,
2324                     const gchar     *separators,
2325                     GUriParamsFlags  flags,
2326                     GError         **error)
2327 {
2328   GHashTable *hash;
2329   GUriParamsIter iter;
2330   gchar *attribute, *value;
2331   GError *err = NULL;
2332
2333   g_return_val_if_fail (length == 0 || params != NULL, NULL);
2334   g_return_val_if_fail (length >= -1, NULL);
2335   g_return_val_if_fail (separators != NULL, NULL);
2336   g_return_val_if_fail (error == NULL || *error == NULL, FALSE);
2337
2338   if (flags & G_URI_PARAMS_CASE_INSENSITIVE)
2339     {
2340       hash = g_hash_table_new_full (str_ascii_case_hash,
2341                                     str_ascii_case_equal,
2342                                     g_free, g_free);
2343     }
2344   else
2345     {
2346       hash = g_hash_table_new_full (g_str_hash, g_str_equal,
2347                                     g_free, g_free);
2348     }
2349
2350   g_uri_params_iter_init (&iter, params, length, separators, flags);
2351
2352   while (g_uri_params_iter_next (&iter, &attribute, &value, &err))
2353     g_hash_table_insert (hash, attribute, value);
2354
2355   if (err)
2356     {
2357       g_propagate_error (error, g_steal_pointer (&err));
2358       g_hash_table_destroy (hash);
2359       return NULL;
2360     }
2361
2362   return g_steal_pointer (&hash);
2363 }
2364
2365 /**
2366  * g_uri_get_scheme:
2367  * @uri: a #GUri
2368  *
2369  * Gets @uri's scheme. Note that this will always be all-lowercase,
2370  * regardless of the string or strings that @uri was created from.
2371  *
2372  * Return value: (not nullable): @uri's scheme.
2373  *
2374  * Since: 2.66
2375  */
2376 const gchar *
2377 g_uri_get_scheme (GUri *uri)
2378 {
2379   g_return_val_if_fail (uri != NULL, NULL);
2380
2381   return uri->scheme;
2382 }
2383
2384 /**
2385  * g_uri_get_userinfo:
2386  * @uri: a #GUri
2387  *
2388  * Gets @uri's userinfo, which may contain `%`-encoding, depending on
2389  * the flags with which @uri was created.
2390  *
2391  * Return value: (nullable): @uri's userinfo.
2392  *
2393  * Since: 2.66
2394  */
2395 const gchar *
2396 g_uri_get_userinfo (GUri *uri)
2397 {
2398   g_return_val_if_fail (uri != NULL, NULL);
2399
2400   return uri->userinfo;
2401 }
2402
2403 /**
2404  * g_uri_get_user:
2405  * @uri: a #GUri
2406  *
2407  * Gets the ‘username’ component of @uri's userinfo, which may contain
2408  * `%`-encoding, depending on the flags with which @uri was created.
2409  * If @uri was not created with %G_URI_FLAGS_HAS_PASSWORD or
2410  * %G_URI_FLAGS_HAS_AUTH_PARAMS, this is the same as g_uri_get_userinfo().
2411  *
2412  * Return value: (nullable): @uri's user.
2413  *
2414  * Since: 2.66
2415  */
2416 const gchar *
2417 g_uri_get_user (GUri *uri)
2418 {
2419   g_return_val_if_fail (uri != NULL, NULL);
2420
2421   return uri->user;
2422 }
2423
2424 /**
2425  * g_uri_get_password:
2426  * @uri: a #GUri
2427  *
2428  * Gets @uri's password, which may contain `%`-encoding, depending on
2429  * the flags with which @uri was created. (If @uri was not created
2430  * with %G_URI_FLAGS_HAS_PASSWORD then this will be %NULL.)
2431  *
2432  * Return value: (nullable): @uri's password.
2433  *
2434  * Since: 2.66
2435  */
2436 const gchar *
2437 g_uri_get_password (GUri *uri)
2438 {
2439   g_return_val_if_fail (uri != NULL, NULL);
2440
2441   return uri->password;
2442 }
2443
2444 /**
2445  * g_uri_get_auth_params:
2446  * @uri: a #GUri
2447  *
2448  * Gets @uri's authentication parameters, which may contain
2449  * `%`-encoding, depending on the flags with which @uri was created.
2450  * (If @uri was not created with %G_URI_FLAGS_HAS_AUTH_PARAMS then this will
2451  * be %NULL.)
2452  *
2453  * Depending on the URI scheme, g_uri_parse_params() may be useful for
2454  * further parsing this information.
2455  *
2456  * Return value: (nullable): @uri's authentication parameters.
2457  *
2458  * Since: 2.66
2459  */
2460 const gchar *
2461 g_uri_get_auth_params (GUri *uri)
2462 {
2463   g_return_val_if_fail (uri != NULL, NULL);
2464
2465   return uri->auth_params;
2466 }
2467
2468 /**
2469  * g_uri_get_host:
2470  * @uri: a #GUri
2471  *
2472  * Gets @uri's host. This will never have `%`-encoded characters,
2473  * unless it is non-UTF-8 (which can only be the case if @uri was
2474  * created with %G_URI_FLAGS_NON_DNS).
2475  *
2476  * If @uri contained an IPv6 address literal, this value will be just
2477  * that address, without the brackets around it that are necessary in
2478  * the string form of the URI. Note that in this case there may also
2479  * be a scope ID attached to the address. Eg, `fe80::1234%``em1` (or
2480  * `fe80::1234%``25em1` if the string is still encoded).
2481  *
2482  * Return value: (nullable): @uri's host.
2483  *
2484  * Since: 2.66
2485  */
2486 const gchar *
2487 g_uri_get_host (GUri *uri)
2488 {
2489   g_return_val_if_fail (uri != NULL, NULL);
2490
2491   return uri->host;
2492 }
2493
2494 /**
2495  * g_uri_get_port:
2496  * @uri: a #GUri
2497  *
2498  * Gets @uri's port.
2499  *
2500  * Return value: @uri's port, or `-1` if no port was specified.
2501  *
2502  * Since: 2.66
2503  */
2504 gint
2505 g_uri_get_port (GUri *uri)
2506 {
2507   g_return_val_if_fail (uri != NULL, -1);
2508
2509   if (uri->port == -1 && uri->flags & G_URI_FLAGS_SCHEME_NORMALIZE)
2510     return default_scheme_port (uri->scheme);
2511
2512   return uri->port;
2513 }
2514
2515 /**
2516  * g_uri_get_path:
2517  * @uri: a #GUri
2518  *
2519  * Gets @uri's path, which may contain `%`-encoding, depending on the
2520  * flags with which @uri was created.
2521  *
2522  * Return value: (not nullable): @uri's path.
2523  *
2524  * Since: 2.66
2525  */
2526 const gchar *
2527 g_uri_get_path (GUri *uri)
2528 {
2529   g_return_val_if_fail (uri != NULL, NULL);
2530
2531   return uri->path;
2532 }
2533
2534 /**
2535  * g_uri_get_query:
2536  * @uri: a #GUri
2537  *
2538  * Gets @uri's query, which may contain `%`-encoding, depending on the
2539  * flags with which @uri was created.
2540  *
2541  * For queries consisting of a series of `name=value` parameters,
2542  * #GUriParamsIter or g_uri_parse_params() may be useful.
2543  *
2544  * Return value: (nullable): @uri's query.
2545  *
2546  * Since: 2.66
2547  */
2548 const gchar *
2549 g_uri_get_query (GUri *uri)
2550 {
2551   g_return_val_if_fail (uri != NULL, NULL);
2552
2553   return uri->query;
2554 }
2555
2556 /**
2557  * g_uri_get_fragment:
2558  * @uri: a #GUri
2559  *
2560  * Gets @uri's fragment, which may contain `%`-encoding, depending on
2561  * the flags with which @uri was created.
2562  *
2563  * Return value: (nullable): @uri's fragment.
2564  *
2565  * Since: 2.66
2566  */
2567 const gchar *
2568 g_uri_get_fragment (GUri *uri)
2569 {
2570   g_return_val_if_fail (uri != NULL, NULL);
2571
2572   return uri->fragment;
2573 }
2574
2575
2576 /**
2577  * g_uri_get_flags:
2578  * @uri: a #GUri
2579  *
2580  * Gets @uri's flags set upon construction.
2581  *
2582  * Return value: @uri's flags.
2583  *
2584  * Since: 2.66
2585  **/
2586 GUriFlags
2587 g_uri_get_flags (GUri *uri)
2588 {
2589   g_return_val_if_fail (uri != NULL, G_URI_FLAGS_NONE);
2590
2591   return uri->flags;
2592 }
2593
2594 /**
2595  * g_uri_unescape_segment:
2596  * @escaped_string: (nullable): A string, may be %NULL
2597  * @escaped_string_end: (nullable): Pointer to end of @escaped_string,
2598  *   may be %NULL
2599  * @illegal_characters: (nullable): An optional string of illegal
2600  *   characters not to be allowed, may be %NULL
2601  *
2602  * Unescapes a segment of an escaped string.
2603  *
2604  * If any of the characters in @illegal_characters or the NUL
2605  * character appears as an escaped character in @escaped_string, then
2606  * that is an error and %NULL will be returned. This is useful if you
2607  * want to avoid for instance having a slash being expanded in an
2608  * escaped path element, which might confuse pathname handling.
2609  *
2610  * Note: `NUL` byte is not accepted in the output, in contrast to
2611  * g_uri_unescape_bytes().
2612  *
2613  * Returns: (nullable): an unescaped version of @escaped_string,
2614  * or %NULL on error. The returned string should be freed when no longer
2615  * needed.  As a special case if %NULL is given for @escaped_string, this
2616  * function will return %NULL.
2617  *
2618  * Since: 2.16
2619  **/
2620 gchar *
2621 g_uri_unescape_segment (const gchar *escaped_string,
2622                         const gchar *escaped_string_end,
2623                         const gchar *illegal_characters)
2624 {
2625   gchar *unescaped;
2626   gsize length;
2627   gssize decoded_len;
2628
2629   if (!escaped_string)
2630     return NULL;
2631
2632   if (escaped_string_end)
2633     length = escaped_string_end - escaped_string;
2634   else
2635     length = strlen (escaped_string);
2636
2637   decoded_len = uri_decoder (&unescaped,
2638                              illegal_characters,
2639                              escaped_string, length,
2640                              FALSE, FALSE,
2641                              G_URI_FLAGS_ENCODED,
2642                              0, NULL);
2643   if (decoded_len < 0)
2644     return NULL;
2645
2646   if (memchr (unescaped, '\0', decoded_len))
2647     {
2648       g_free (unescaped);
2649       return NULL;
2650     }
2651
2652   return unescaped;
2653 }
2654
2655 /**
2656  * g_uri_unescape_string:
2657  * @escaped_string: an escaped string to be unescaped.
2658  * @illegal_characters: (nullable): a string of illegal characters
2659  *   not to be allowed, or %NULL.
2660  *
2661  * Unescapes a whole escaped string.
2662  *
2663  * If any of the characters in @illegal_characters or the NUL
2664  * character appears as an escaped character in @escaped_string, then
2665  * that is an error and %NULL will be returned. This is useful if you
2666  * want to avoid for instance having a slash being expanded in an
2667  * escaped path element, which might confuse pathname handling.
2668  *
2669  * Returns: (nullable): an unescaped version of @escaped_string.
2670  * The returned string should be freed when no longer needed.
2671  *
2672  * Since: 2.16
2673  **/
2674 gchar *
2675 g_uri_unescape_string (const gchar *escaped_string,
2676                        const gchar *illegal_characters)
2677 {
2678   return g_uri_unescape_segment (escaped_string, NULL, illegal_characters);
2679 }
2680
2681 /**
2682  * g_uri_escape_string:
2683  * @unescaped: the unescaped input string.
2684  * @reserved_chars_allowed: (nullable): a string of reserved
2685  *   characters that are allowed to be used, or %NULL.
2686  * @allow_utf8: %TRUE if the result can include UTF-8 characters.
2687  *
2688  * Escapes a string for use in a URI.
2689  *
2690  * Normally all characters that are not "unreserved" (i.e. ASCII
2691  * alphanumerical characters plus dash, dot, underscore and tilde) are
2692  * escaped. But if you specify characters in @reserved_chars_allowed
2693  * they are not escaped. This is useful for the "reserved" characters
2694  * in the URI specification, since those are allowed unescaped in some
2695  * portions of a URI.
2696  *
2697  * Returns: (not nullable): an escaped version of @unescaped. The
2698  * returned string should be freed when no longer needed.
2699  *
2700  * Since: 2.16
2701  **/
2702 gchar *
2703 g_uri_escape_string (const gchar *unescaped,
2704                      const gchar *reserved_chars_allowed,
2705                      gboolean     allow_utf8)
2706 {
2707   GString *s;
2708
2709   g_return_val_if_fail (unescaped != NULL, NULL);
2710
2711   s = g_string_sized_new (strlen (unescaped) * 1.25);
2712
2713   g_string_append_uri_escaped (s, unescaped, reserved_chars_allowed, allow_utf8);
2714
2715   return g_string_free (s, FALSE);
2716 }
2717
2718 /**
2719  * g_uri_unescape_bytes:
2720  * @escaped_string: A URI-escaped string
2721  * @length: the length (in bytes) of @escaped_string to escape, or `-1` if it
2722  *   is nul-terminated.
2723  * @illegal_characters: (nullable): a string of illegal characters
2724  *   not to be allowed, or %NULL.
2725  * @error: #GError for error reporting, or %NULL to ignore.
2726  *
2727  * Unescapes a segment of an escaped string as binary data.
2728  *
2729  * Note that in contrast to g_uri_unescape_string(), this does allow
2730  * nul bytes to appear in the output.
2731  *
2732  * If any of the characters in @illegal_characters appears as an escaped
2733  * character in @escaped_string, then that is an error and %NULL will be
2734  * returned. This is useful if you want to avoid for instance having a slash
2735  * being expanded in an escaped path element, which might confuse pathname
2736  * handling.
2737  *
2738  * Returns: (transfer full): an unescaped version of @escaped_string
2739  *     or %NULL on error (if decoding failed, using %G_URI_ERROR_FAILED error
2740  *     code). The returned #GBytes should be unreffed when no longer needed.
2741  *
2742  * Since: 2.66
2743  **/
2744 GBytes *
2745 g_uri_unescape_bytes (const gchar *escaped_string,
2746                       gssize       length,
2747                       const char *illegal_characters,
2748                       GError     **error)
2749 {
2750   gchar *buf;
2751   gssize unescaped_length;
2752
2753   g_return_val_if_fail (escaped_string != NULL, NULL);
2754   g_return_val_if_fail (error == NULL || *error == NULL, NULL);
2755
2756   if (length == -1)
2757     length = strlen (escaped_string);
2758
2759   unescaped_length = uri_decoder (&buf,
2760                                   illegal_characters,
2761                                   escaped_string, length,
2762                                   FALSE,
2763                                   FALSE,
2764                                   G_URI_FLAGS_ENCODED,
2765                                   G_URI_ERROR_FAILED, error);
2766   if (unescaped_length == -1)
2767     return NULL;
2768
2769   return g_bytes_new_take (buf, unescaped_length);
2770 }
2771
2772 /**
2773  * g_uri_escape_bytes:
2774  * @unescaped: (array length=length): the unescaped input data.
2775  * @length: the length of @unescaped
2776  * @reserved_chars_allowed: (nullable): a string of reserved
2777  *   characters that are allowed to be used, or %NULL.
2778  *
2779  * Escapes arbitrary data for use in a URI.
2780  *
2781  * Normally all characters that are not ‘unreserved’ (i.e. ASCII
2782  * alphanumerical characters plus dash, dot, underscore and tilde) are
2783  * escaped. But if you specify characters in @reserved_chars_allowed
2784  * they are not escaped. This is useful for the ‘reserved’ characters
2785  * in the URI specification, since those are allowed unescaped in some
2786  * portions of a URI.
2787  *
2788  * Though technically incorrect, this will also allow escaping nul
2789  * bytes as `%``00`.
2790  *
2791  * Returns: (not nullable) (transfer full): an escaped version of @unescaped.
2792  *     The returned string should be freed when no longer needed.
2793  *
2794  * Since: 2.66
2795  */
2796 gchar *
2797 g_uri_escape_bytes (const guint8 *unescaped,
2798                     gsize         length,
2799                     const gchar  *reserved_chars_allowed)
2800 {
2801   GString *string;
2802
2803   g_return_val_if_fail (unescaped != NULL, NULL);
2804
2805   string = g_string_sized_new (length * 1.25);
2806
2807   _uri_encoder (string, unescaped, length,
2808                reserved_chars_allowed, FALSE);
2809
2810   return g_string_free (string, FALSE);
2811 }
2812
2813 static gssize
2814 g_uri_scheme_length (const gchar *uri)
2815 {
2816   const gchar *p;
2817
2818   p = uri;
2819   if (!g_ascii_isalpha (*p))
2820     return -1;
2821   p++;
2822   while (g_ascii_isalnum (*p) || *p == '.' || *p == '+' || *p == '-')
2823     p++;
2824
2825   if (p > uri && *p == ':')
2826     return p - uri;
2827
2828   return -1;
2829 }
2830
2831 /**
2832  * g_uri_parse_scheme:
2833  * @uri: a valid URI.
2834  *
2835  * Gets the scheme portion of a URI string.
2836  * [RFC 3986](https://tools.ietf.org/html/rfc3986#section-3) decodes the scheme
2837  * as:
2838  * |[
2839  * URI = scheme ":" hier-part [ "?" query ] [ "#" fragment ]
2840  * ]|
2841  * Common schemes include `file`, `https`, `svn+ssh`, etc.
2842  *
2843  * Returns: (transfer full) (nullable): The ‘scheme’ component of the URI, or
2844  *     %NULL on error. The returned string should be freed when no longer needed.
2845  *
2846  * Since: 2.16
2847  **/
2848 gchar *
2849 g_uri_parse_scheme (const gchar *uri)
2850 {
2851   gssize len;
2852
2853   g_return_val_if_fail (uri != NULL, NULL);
2854
2855   len = g_uri_scheme_length (uri);
2856   return len == -1 ? NULL : g_strndup (uri, len);
2857 }
2858
2859 /**
2860  * g_uri_peek_scheme:
2861  * @uri: a valid URI.
2862  *
2863  * Gets the scheme portion of a URI string.
2864  * [RFC 3986](https://tools.ietf.org/html/rfc3986#section-3) decodes the scheme
2865  * as:
2866  * |[
2867  * URI = scheme ":" hier-part [ "?" query ] [ "#" fragment ]
2868  * ]|
2869  * Common schemes include `file`, `https`, `svn+ssh`, etc.
2870  *
2871  * Unlike g_uri_parse_scheme(), the returned scheme is normalized to
2872  * all-lowercase and does not need to be freed.
2873  *
2874  * Returns: (transfer none) (nullable): The ‘scheme’ component of the URI, or
2875  *     %NULL on error. The returned string is normalized to all-lowercase, and
2876  *     interned via g_intern_string(), so it does not need to be freed.
2877  *
2878  * Since: 2.66
2879  **/
2880 const gchar *
2881 g_uri_peek_scheme (const gchar *uri)
2882 {
2883   gssize len;
2884   gchar *lower_scheme;
2885   const gchar *scheme;
2886
2887   g_return_val_if_fail (uri != NULL, NULL);
2888
2889   len = g_uri_scheme_length (uri);
2890   if (len == -1)
2891     return NULL;
2892
2893   lower_scheme = g_ascii_strdown (uri, len);
2894   scheme = g_intern_string (lower_scheme);
2895   g_free (lower_scheme);
2896
2897   return scheme;
2898 }
2899
2900 G_DEFINE_QUARK (g-uri-quark, g_uri_error)