glib/guri.c

   1 /* GLIB - Library of useful routines for C programming
   2  * Copyright © 2020 Red Hat, Inc.
   3  *
   4  * SPDX-License-Identifier: LGPL-2.1-or-later
   5  *
   6  * This library is free software; you can redistribute it and/or
   7  * modify it under the terms of the GNU Lesser General Public
   8  * License as published by the Free Software Foundation; either
   9  * version 2 of the License, or (at your option) any later version.
  10  *
  11  * This library is distributed in the hope that it will be useful,
  12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14  * Lesser General Public License for more details.
  15  *
  16  * You should have received a copy of the GNU Lesser General
  17  * Public License along with this library; if not, see
  18  * <http://www.gnu.org/licenses/>.
  19  */
  20
  21 #include "config.h"
  22
  23 #include <stdlib.h>
  24 #include <string.h>
  25
  26 #include "glib.h"
  27 #include "glibintl.h"
  28 #include "glib-private.h"
  29 #include "guriprivate.h"
  30
  31 /**
  32  * SECTION:guri
  33  * @short_description: URI-handling utilities
  34  * @include: glib.h
  35  *
  36  * The #GUri type and related functions can be used to parse URIs into
  37  * their components, and build valid URIs from individual components.
  38  *
  39  * Note that #GUri scope is to help manipulate URIs in various applications,
  40  * following [RFC 3986](https://tools.ietf.org/html/rfc3986). In particular,
  41  * it doesn't intend to cover web browser needs, and doesn't implement the
  42  * [WHATWG URL](https://url.spec.whatwg.org/) standard. No APIs are provided to
  43  * help prevent
  44  * [homograph attacks](https://en.wikipedia.org/wiki/IDN_homograph_attack), so
  45  * #GUri is not suitable for formatting URIs for display to the user for making
  46  * security-sensitive decisions.
  47  *
  48  * ## Relative and absolute URIs # {#relative-absolute-uris}
  49  *
  50  * As defined in [RFC 3986](https://tools.ietf.org/html/rfc3986#section-4), the
  51  * hierarchical nature of URIs means that they can either be ‘relative
  52  * references’ (sometimes referred to as ‘relative URIs’) or ‘URIs’ (for
  53  * clarity, ‘URIs’ are referred to in this documentation as
  54  * ‘absolute URIs’ — although
  55  * [in constrast to RFC 3986](https://tools.ietf.org/html/rfc3986#section-4.3),
  56  * fragment identifiers are always allowed).
  57  *
  58  * Relative references have one or more components of the URI missing. In
  59  * particular, they have no scheme. Any other component, such as hostname,
  60  * query, etc. may be missing, apart from a path, which has to be specified (but
  61  * may be empty). The path may be relative, starting with `./` rather than `/`.
  62  *
  63  * For example, a valid relative reference is `./path?query`,
  64  * `/?query#fragment` or `//example.com`.
  65  *
  66  * Absolute URIs have a scheme specified. Any other components of the URI which
  67  * are missing are specified as explicitly unset in the URI, rather than being
  68  * resolved relative to a base URI using g_uri_parse_relative().
  69  *
  70  * For example, a valid absolute URI is `file:///home/bob` or
  71  * `https://search.com?query=string`.
  72  *
  73  * A #GUri instance is always an absolute URI. A string may be an absolute URI
  74  * or a relative reference; see the documentation for individual functions as to
  75  * what forms they accept.
  76  *
  77  * ## Parsing URIs
  78  *
  79  * The most minimalist APIs for parsing URIs are g_uri_split() and
  80  * g_uri_split_with_user(). These split a URI into its component
  81  * parts, and return the parts; the difference between the two is that
  82  * g_uri_split() treats the ‘userinfo’ component of the URI as a
  83  * single element, while g_uri_split_with_user() can (depending on the
  84  * #GUriFlags you pass) treat it as containing a username, password,
  85  * and authentication parameters. Alternatively, g_uri_split_network()
  86  * can be used when you are only interested in the components that are
  87  * needed to initiate a network connection to the service (scheme,
  88  * host, and port).
  89  *
  90  * g_uri_parse() is similar to g_uri_split(), but instead of returning
  91  * individual strings, it returns a #GUri structure (and it requires
  92  * that the URI be an absolute URI).
  93  *
  94  * g_uri_resolve_relative() and g_uri_parse_relative() allow you to
  95  * resolve a relative URI relative to a base URI.
  96  * g_uri_resolve_relative() takes two strings and returns a string,
  97  * and g_uri_parse_relative() takes a #GUri and a string and returns a
  98  * #GUri.
  99  *
 100  * All of the parsing functions take a #GUriFlags argument describing
 101  * exactly how to parse the URI; see the documentation for that type
 102  * for more details on the specific flags that you can pass. If you
 103  * need to choose different flags based on the type of URI, you can
 104  * use g_uri_peek_scheme() on the URI string to check the scheme
 105  * first, and use that to decide what flags to parse it with.
 106  *
 107  * For example, you might want to use %G_URI_PARAMS_WWW_FORM when parsing the
 108  * params for a web URI, so compare the result of g_uri_peek_scheme() against
 109  * `http` and `https`.
 110  *
 111  * ## Building URIs
 112  *
 113  * g_uri_join() and g_uri_join_with_user() can be used to construct
 114  * valid URI strings from a set of component strings. They are the
 115  * inverse of g_uri_split() and g_uri_split_with_user().
 116  *
 117  * Similarly, g_uri_build() and g_uri_build_with_user() can be used to
 118  * construct a #GUri from a set of component strings.
 119  *
 120  * As with the parsing functions, the building functions take a
 121  * #GUriFlags argument. In particular, it is important to keep in mind
 122  * whether the URI components you are using are already `%`-encoded. If so,
 123  * you must pass the %G_URI_FLAGS_ENCODED flag.
 124  *
 125  * ## `file://` URIs
 126  *
 127  * Note that Windows and Unix both define special rules for parsing
 128  * `file://` URIs (involving non-UTF-8 character sets on Unix, and the
 129  * interpretation of path separators on Windows). #GUri does not
 130  * implement these rules. Use g_filename_from_uri() and
 131  * g_filename_to_uri() if you want to properly convert between
 132  * `file://` URIs and local filenames.
 133  *
 134  * ## URI Equality
 135  *
 136  * Note that there is no `g_uri_equal ()` function, because comparing
 137  * URIs usefully requires scheme-specific knowledge that #GUri does
 138  * not have. #GUri can help with normalization if you use the various
 139  * encoded #GUriFlags as well as %G_URI_FLAGS_SCHEME_NORMALIZE however
 140  * it is not comprehensive.
 141  * For example, `data:,foo` and `data:;base64,Zm9v` resolve to the same
 142  * thing according to the `data:` URI specification which GLib does not
 143  * handle.
 144  *
 145  * Since: 2.66
 146  */
 147
 148 /**
 149  * GUri:
 150  *
 151  * A parsed absolute URI.
 152  *
 153  * Since #GUri only represents absolute URIs, all #GUris will have a
 154  * URI scheme, so g_uri_get_scheme() will always return a non-%NULL
 155  * answer. Likewise, by definition, all URIs have a path component, so
 156  * g_uri_get_path() will always return a non-%NULL string (which may be empty).
 157  *
 158  * If the URI string has an
 159  * [‘authority’ component](https://tools.ietf.org/html/rfc3986#section-3) (that
 160  * is, if the scheme is followed by `://` rather than just `:`), then the
 161  * #GUri will contain a hostname, and possibly a port and ‘userinfo’.
 162  * Additionally, depending on how the #GUri was constructed/parsed (for example,
 163  * using the %G_URI_FLAGS_HAS_PASSWORD and %G_URI_FLAGS_HAS_AUTH_PARAMS flags),
 164  * the userinfo may be split out into a username, password, and
 165  * additional authorization-related parameters.
 166  *
 167  * Normally, the components of a #GUri will have all `%`-encoded
 168  * characters decoded. However, if you construct/parse a #GUri with
 169  * %G_URI_FLAGS_ENCODED, then the `%`-encoding will be preserved instead in
 170  * the userinfo, path, and query fields (and in the host field if also
 171  * created with %G_URI_FLAGS_NON_DNS). In particular, this is necessary if
 172  * the URI may contain binary data or non-UTF-8 text, or if decoding
 173  * the components might change the interpretation of the URI.
 174  *
 175  * For example, with the encoded flag:
 176  *
 177  * |[<!-- language="C" -->
 178  *   g_autoptr(GUri) uri = g_uri_parse ("http://host/path?query=http%3A%2F%2Fhost%2Fpath%3Fparam%3Dvalue", G_URI_FLAGS_ENCODED, &err);
 179  *   g_assert_cmpstr (g_uri_get_query (uri), ==, "query=http%3A%2F%2Fhost%2Fpath%3Fparam%3Dvalue");
 180  * ]|
 181  *
 182  * While the default `%`-decoding behaviour would give:
 183  *
 184  * |[<!-- language="C" -->
 185  *   g_autoptr(GUri) uri = g_uri_parse ("http://host/path?query=http%3A%2F%2Fhost%2Fpath%3Fparam%3Dvalue", G_URI_FLAGS_NONE, &err);
 186  *   g_assert_cmpstr (g_uri_get_query (uri), ==, "query=http://host/path?param=value");
 187  * ]|
 188  *
 189  * During decoding, if an invalid UTF-8 string is encountered, parsing will fail
 190  * with an error indicating the bad string location:
 191  *
 192  * |[<!-- language="C" -->
 193  *   g_autoptr(GUri) uri = g_uri_parse ("http://host/path?query=http%3A%2F%2Fhost%2Fpath%3Fbad%3D%00alue", G_URI_FLAGS_NONE, &err);
 194  *   g_assert_error (err, G_URI_ERROR, G_URI_ERROR_BAD_QUERY);
 195  * ]|
 196  *
 197  * You should pass %G_URI_FLAGS_ENCODED or %G_URI_FLAGS_ENCODED_QUERY if you
 198  * need to handle that case manually. In particular, if the query string
 199  * contains `=` characters that are `%`-encoded, you should let
 200  * g_uri_parse_params() do the decoding once of the query.
 201  *
 202  * #GUri is immutable once constructed, and can safely be accessed from
 203  * multiple threads. Its reference counting is atomic.
 204  *
 205  * Since: 2.66
 206  */
 207 struct _GUri {
 208   gchar     *scheme;
 209   gchar     *userinfo;
 210   gchar     *host;
 211   gint       port;
 212   gchar     *path;
 213   gchar     *query;
 214   gchar     *fragment;
 215
 216   gchar     *user;
 217   gchar     *password;
 218   gchar     *auth_params;
 219
 220   GUriFlags  flags;
 221 };
 222
 223 /**
 224  * g_uri_ref: (skip)
 225  * @uri: a #GUri
 226  *
 227  * Increments the reference count of @uri by one.
 228  *
 229  * Returns: @uri
 230  *
 231  * Since: 2.66
 232  */
 233 GUri *
 234 g_uri_ref (GUri *uri)
 235 {
 236   g_return_val_if_fail (uri != NULL, NULL);
 237
 238   return g_atomic_rc_box_acquire (uri);
 239 }
 240
 241 static void
 242 g_uri_clear (GUri *uri)
 243 {
 244   g_free (uri->scheme);
 245   g_free (uri->userinfo);
 246   g_free (uri->host);
 247   g_free (uri->path);
 248   g_free (uri->query);
 249   g_free (uri->fragment);
 250   g_free (uri->user);
 251   g_free (uri->password);
 252   g_free (uri->auth_params);
 253 }
 254
 255 /**
 256  * g_uri_unref: (skip)
 257  * @uri: a #GUri
 258  *
 259  * Atomically decrements the reference count of @uri by one.
 260  *
 261  * When the reference count reaches zero, the resources allocated by
 262  * @uri are freed
 263  *
 264  * Since: 2.66
 265  */
 266 void
 267 g_uri_unref (GUri *uri)
 268 {
 269   g_return_if_fail (uri != NULL);
 270
 271   g_atomic_rc_box_release_full (uri, (GDestroyNotify)g_uri_clear);
 272 }
 273
 274 static gboolean
 275 g_uri_char_is_unreserved (gchar ch)
 276 {
 277   if (g_ascii_isalnum (ch))
 278     return TRUE;
 279   return ch == '-' || ch == '.' || ch == '_' || ch == '~';
 280 }
 281
 282 #define XDIGIT(c) ((c) <= '9' ? (c) - '0' : ((c) & 0x4F) - 'A' + 10)
 283 #define HEXCHAR(s) ((XDIGIT (s[1]) << 4) + XDIGIT (s[2]))
 284
 285 static gssize
 286 uri_decoder (gchar       **out,
 287              const gchar  *illegal_chars,
 288              const gchar  *start,
 289              gsize         length,
 290              gboolean      just_normalize,
 291              gboolean      www_form,
 292              GUriFlags     flags,
 293              GUriError     parse_error,
 294              GError      **error)
 295 {
 296   gchar c;
 297   GString *decoded;
 298   const gchar *invalid, *s, *end;
 299   gssize len;
 300
 301   if (!(flags & G_URI_FLAGS_ENCODED))
 302     just_normalize = FALSE;
 303
 304   decoded = g_string_sized_new (length + 1);
 305   for (s = start, end = s + length; s < end; s++)
 306     {
 307       if (*s == '%')
 308         {
 309           if (s + 2 >= end ||
 310               !g_ascii_isxdigit (s[1]) ||
 311               !g_ascii_isxdigit (s[2]))
 312             {
 313               /* % followed by non-hex or the end of the string; this is an error */
 314               if (!(flags & G_URI_FLAGS_PARSE_RELAXED))
 315                 {
 316                   g_set_error_literal (error, G_URI_ERROR, parse_error,
 317                                        /* xgettext: no-c-format */
 318                                        _("Invalid %-encoding in URI"));
 319                   g_string_free (decoded, TRUE);
 320                   return -1;
 321                 }
 322
 323               /* In non-strict mode, just let it through; we *don't*
 324                * fix it to "%25", since that might change the way that
 325                * the URI's owner would interpret it.
 326                */
 327               g_string_append_c (decoded, *s);
 328               continue;
 329             }
 330
 331           c = HEXCHAR (s);
 332           if (illegal_chars && strchr (illegal_chars, c))
 333             {
 334               g_set_error_literal (error, G_URI_ERROR, parse_error,
 335                                    _("Illegal character in URI"));
 336               g_string_free (decoded, TRUE);
 337               return -1;
 338             }
 339           if (just_normalize && !g_uri_char_is_unreserved (c))
 340             {
 341               /* Leave the % sequence there but normalize it. */
 342               g_string_append_c (decoded, *s);
 343               g_string_append_c (decoded, g_ascii_toupper (s[1]));
 344               g_string_append_c (decoded, g_ascii_toupper (s[2]));
 345               s += 2;
 346             }
 347           else
 348             {
 349               g_string_append_c (decoded, c);
 350               s += 2;
 351             }
 352         }
 353       else if (www_form && *s == '+')
 354         g_string_append_c (decoded, ' ');
 355       /* Normalize any illegal characters. */
 356       else if (just_normalize && (!g_ascii_isgraph (*s)))
 357         g_string_append_printf (decoded, "%%%02X", (guchar)*s);
 358       else
 359         g_string_append_c (decoded, *s);
 360     }
 361
 362   len = decoded->len;
 363   g_assert (len >= 0);
 364
 365   if (!(flags & G_URI_FLAGS_ENCODED) &&
 366       !g_utf8_validate (decoded->str, len, &invalid))
 367     {
 368       g_set_error_literal (error, G_URI_ERROR, parse_error,
 369                            _("Non-UTF-8 characters in URI"));
 370       g_string_free (decoded, TRUE);
 371       return -1;
 372     }
 373
 374   if (out)
 375     *out = g_string_free (decoded, FALSE);
 376   else
 377     g_string_free (decoded, TRUE);
 378
 379   return len;
 380 }
 381
 382 static gboolean
 383 uri_decode (gchar       **out,
 384             const gchar  *illegal_chars,
 385             const gchar  *start,
 386             gsize         length,
 387             gboolean      www_form,
 388             GUriFlags     flags,
 389             GUriError     parse_error,
 390             GError      **error)
 391 {
 392   return uri_decoder (out, illegal_chars, start, length, FALSE, www_form, flags,
 393                       parse_error, error) != -1;
 394 }
 395
 396 static gboolean
 397 uri_normalize (gchar       **out,
 398                const gchar  *start,
 399                gsize         length,
 400                GUriFlags     flags,
 401                GUriError     parse_error,
 402                GError      **error)
 403 {
 404   return uri_decoder (out, NULL, start, length, TRUE, FALSE, flags,
 405                       parse_error, error) != -1;
 406 }
 407
 408 static gboolean
 409 is_valid (guchar       c,
 410           const gchar *reserved_chars_allowed)
 411 {
 412   if (g_uri_char_is_unreserved (c))
 413     return TRUE;
 414
 415   if (reserved_chars_allowed && strchr (reserved_chars_allowed, c))
 416     return TRUE;
 417
 418   return FALSE;
 419 }
 420
 421 void
 422 _uri_encoder (GString      *out,
 423               const guchar *start,
 424               gsize         length,
 425               const gchar  *reserved_chars_allowed,
 426               gboolean      allow_utf8)
 427 {
 428   static const gchar hex[] = "0123456789ABCDEF";
 429   const guchar *p = start;
 430   const guchar *end = p + length;
 431
 432   while (p < end)
 433     {
 434       gunichar multibyte_utf8_char = 0;
 435
 436       if (allow_utf8 && *p >= 0x80)
 437         multibyte_utf8_char = g_utf8_get_char_validated ((gchar *)p, end - p);
 438
 439       if (multibyte_utf8_char > 0 &&
 440           multibyte_utf8_char != (gunichar) -1 && multibyte_utf8_char != (gunichar) -2)
 441         {
 442           gint len = g_utf8_skip [*p];
 443           g_string_append_len (out, (gchar *)p, len);
 444           p += len;
 445         }
 446       else if (is_valid (*p, reserved_chars_allowed))
 447         {
 448           g_string_append_c (out, *p);
 449           p++;
 450         }
 451       else
 452         {
 453           g_string_append_c (out, '%');
 454           g_string_append_c (out, hex[*p >> 4]);
 455           g_string_append_c (out, hex[*p & 0xf]);
 456           p++;
 457         }
 458     }
 459 }
 460
 461 /* Parse the IP-literal construction from RFC 6874 (which extends RFC 3986 to
 462  * support IPv6 zone identifiers.
 463  *
 464  * Currently, IP versions beyond 6 (i.e. the IPvFuture rule) are unsupported.
 465  * There’s no point supporting them until (a) they exist and (b) the rest of the
 466  * stack (notably, sockets) supports them.
 467  *
 468  * Rules:
 469  *
 470  * IP-literal = "[" ( IPv6address / IPv6addrz / IPvFuture  ) "]"
 471  *
 472  * ZoneID = 1*( unreserved / pct-encoded )
 473  *
 474  * IPv6addrz = IPv6address "%25" ZoneID
 475  *
 476  * If %G_URI_FLAGS_PARSE_RELAXED is specified, this function also accepts:
 477  *
 478  * IPv6addrz = IPv6address "%" ZoneID
 479  */
 480 static gboolean
 481 parse_ip_literal (const gchar  *start,
 482                   gsize         length,
 483                   GUriFlags     flags,
 484                   gchar       **out,
 485                   GError      **error)
 486 {
 487   gchar *pct, *zone_id = NULL;
 488   gchar *addr = NULL;
 489   gsize addr_length = 0;
 490   gsize zone_id_length = 0;
 491   gchar *decoded_zone_id = NULL;
 492
 493   if (start[length - 1] != ']')
 494     goto bad_ipv6_literal;
 495
 496   /* Drop the square brackets */
 497   addr = g_strndup (start + 1, length - 2);
 498   addr_length = length - 2;
 499
 500   /* If there's an IPv6 scope ID, split out the zone. */
 501   pct = strchr (addr, '%');
 502   if (pct != NULL)
 503     {
 504       *pct = '\0';
 505
 506       if (addr_length - (pct - addr) >= 4 &&
 507           *(pct + 1) == '2' && *(pct + 2) == '5')
 508         {
 509           zone_id = pct + 3;
 510           zone_id_length = addr_length - (zone_id - addr);
 511         }
 512       else if (flags & G_URI_FLAGS_PARSE_RELAXED &&
 513                addr_length - (pct - addr) >= 2)
 514         {
 515           zone_id = pct + 1;
 516           zone_id_length = addr_length - (zone_id - addr);
 517         }
 518       else
 519         goto bad_ipv6_literal;
 520
 521       g_assert (zone_id_length >= 1);
 522     }
 523
 524   /* addr must be an IPv6 address */
 525   if (!g_hostname_is_ip_address (addr) || !strchr (addr, ':'))
 526     goto bad_ipv6_literal;
 527
 528   /* Zone ID must be valid. It can contain %-encoded characters. */
 529   if (zone_id != NULL &&
 530       !uri_decode (&decoded_zone_id, NULL, zone_id, zone_id_length, FALSE,
 531                    flags, G_URI_ERROR_BAD_HOST, NULL))
 532     goto bad_ipv6_literal;
 533
 534   /* Success */
 535   if (out != NULL && decoded_zone_id != NULL)
 536     *out = g_strconcat (addr, "%", decoded_zone_id, NULL);
 537   else if (out != NULL)
 538     *out = g_steal_pointer (&addr);
 539
 540   g_free (addr);
 541   g_free (decoded_zone_id);
 542
 543   return TRUE;
 544
 545 bad_ipv6_literal:
 546   g_free (addr);
 547   g_free (decoded_zone_id);
 548   g_set_error (error, G_URI_ERROR, G_URI_ERROR_BAD_HOST,
 549                _("Invalid IPv6 address ‘%.*s’ in URI"),
 550                (gint)length, start);
 551
 552   return FALSE;
 553 }
 554
 555 static gboolean
 556 parse_host (const gchar  *start,
 557             gsize         length,
 558             GUriFlags     flags,
 559             gchar       **out,
 560             GError      **error)
 561 {
 562   gchar *decoded = NULL, *host;
 563   gchar *addr = NULL;
 564
 565   if (*start == '[')
 566     {
 567       if (!parse_ip_literal (start, length, flags, &host, error))
 568         return FALSE;
 569       goto ok;
 570     }
 571
 572   if (g_ascii_isdigit (*start))
 573     {
 574       addr = g_strndup (start, length);
 575       if (g_hostname_is_ip_address (addr))
 576         {
 577           host = addr;
 578           goto ok;
 579         }
 580       g_free (addr);
 581     }
 582
 583   if (flags & G_URI_FLAGS_NON_DNS)
 584     {
 585       if (!uri_normalize (&decoded, start, length, flags,
 586                           G_URI_ERROR_BAD_HOST, error))
 587         return FALSE;
 588       host = g_steal_pointer (&decoded);
 589       goto ok;
 590     }
 591
 592   flags &= ~G_URI_FLAGS_ENCODED;
 593   if (!uri_decode (&decoded, NULL, start, length, FALSE, flags,
 594                    G_URI_ERROR_BAD_HOST, error))
 595     return FALSE;
 596
 597   /* You're not allowed to %-encode an IP address, so if it wasn't
 598    * one before, it better not be one now.
 599    */
 600   if (g_hostname_is_ip_address (decoded))
 601     {
 602       g_free (decoded);
 603       g_set_error (error, G_URI_ERROR, G_URI_ERROR_BAD_HOST,
 604                    _("Illegal encoded IP address ‘%.*s’ in URI"),
 605                    (gint)length, start);
 606       return FALSE;
 607     }
 608
 609   if (g_hostname_is_non_ascii (decoded))
 610     {
 611       host = g_hostname_to_ascii (decoded);
 612       if (host == NULL)
 613         {
 614           g_free (decoded);
 615           g_set_error (error, G_URI_ERROR, G_URI_ERROR_BAD_HOST,
 616                        _("Illegal internationalized hostname ‘%.*s’ in URI"),
 617                        (gint) length, start);
 618           return FALSE;
 619         }
 620     }
 621   else
 622     {
 623       host = g_steal_pointer (&decoded);
 624     }
 625
 626  ok:
 627   if (out)
 628     *out = g_steal_pointer (&host);
 629   g_free (host);
 630   g_free (decoded);
 631
 632   return TRUE;
 633 }
 634
 635 static gboolean
 636 parse_port (const gchar  *start,
 637             gsize         length,
 638             gint         *out,
 639             GError      **error)
 640 {
 641   gchar *end;
 642   gulong parsed_port;
 643
 644   /* strtoul() allows leading + or -, so we have to check this first. */
 645   if (!g_ascii_isdigit (*start))
 646     {
 647       g_set_error (error, G_URI_ERROR, G_URI_ERROR_BAD_PORT,
 648                    _("Could not parse port ‘%.*s’ in URI"),
 649                    (gint)length, start);
 650       return FALSE;
 651     }
 652
 653   /* We know that *(start + length) is either '\0' or a non-numeric
 654    * character, so strtoul() won't scan beyond it.
 655    */
 656   parsed_port = strtoul (start, &end, 10);
 657   if (end != start + length)
 658     {
 659       g_set_error (error, G_URI_ERROR, G_URI_ERROR_BAD_PORT,
 660                    _("Could not parse port ‘%.*s’ in URI"),
 661                    (gint)length, start);
 662       return FALSE;
 663     }
 664   else if (parsed_port > 65535)
 665     {
 666       g_set_error (error, G_URI_ERROR, G_URI_ERROR_BAD_PORT,
 667                    _("Port ‘%.*s’ in URI is out of range"),
 668                    (gint)length, start);
 669       return FALSE;
 670     }
 671
 672   if (out)
 673     *out = parsed_port;
 674   return TRUE;
 675 }
 676
 677 static gboolean
 678 parse_userinfo (const gchar  *start,
 679                 gsize         length,
 680                 GUriFlags     flags,
 681                 gchar       **user,
 682                 gchar       **password,
 683                 gchar       **auth_params,
 684                 GError      **error)
 685 {
 686   const gchar *user_end = NULL, *password_end = NULL, *auth_params_end;
 687
 688   auth_params_end = start + length;
 689   if (flags & G_URI_FLAGS_HAS_AUTH_PARAMS)
 690     password_end = memchr (start, ';', auth_params_end - start);
 691   if (!password_end)
 692     password_end = auth_params_end;
 693   if (flags & G_URI_FLAGS_HAS_PASSWORD)
 694     user_end = memchr (start, ':', password_end - start);
 695   if (!user_end)
 696     user_end = password_end;
 697
 698   if (!uri_normalize (user, start, user_end - start, flags,
 699                       G_URI_ERROR_BAD_USER, error))
 700     return FALSE;
 701
 702   if (*user_end == ':')
 703     {
 704       start = user_end + 1;
 705       if (!uri_normalize (password, start, password_end - start, flags,
 706                           G_URI_ERROR_BAD_PASSWORD, error))
 707         {
 708           if (user)
 709             g_clear_pointer (user, g_free);
 710           return FALSE;
 711         }
 712     }
 713   else if (password)
 714     *password = NULL;
 715
 716   if (*password_end == ';')
 717     {
 718       start = password_end + 1;
 719       if (!uri_normalize (auth_params, start, auth_params_end - start, flags,
 720                           G_URI_ERROR_BAD_AUTH_PARAMS, error))
 721         {
 722           if (user)
 723             g_clear_pointer (user, g_free);
 724           if (password)
 725             g_clear_pointer (password, g_free);
 726           return FALSE;
 727         }
 728     }
 729   else if (auth_params)
 730     *auth_params = NULL;
 731
 732   return TRUE;
 733 }
 734
 735 static gchar *
 736 uri_cleanup (const gchar *uri_string)
 737 {
 738   GString *copy;
 739   const gchar *end;
 740
 741   /* Skip leading whitespace */
 742   while (g_ascii_isspace (*uri_string))
 743     uri_string++;
 744
 745   /* Ignore trailing whitespace */
 746   end = uri_string + strlen (uri_string);
 747   while (end > uri_string && g_ascii_isspace (*(end - 1)))
 748     end--;
 749
 750   /* Copy the rest, encoding unencoded spaces and stripping other whitespace */
 751   copy = g_string_sized_new (end - uri_string);
 752   while (uri_string < end)
 753     {
 754       if (*uri_string == ' ')
 755         g_string_append (copy, "%20");
 756       else if (g_ascii_isspace (*uri_string))
 757         ;
 758       else
 759         g_string_append_c (copy, *uri_string);
 760       uri_string++;
 761     }
 762
 763   return g_string_free (copy, FALSE);
 764 }
 765
 766 static gboolean
 767 should_normalize_empty_path (const char *scheme)
 768 {
 769   const char * const schemes[] = { "https", "http", "wss", "ws" };
 770   gsize i;
 771   for (i = 0; i < G_N_ELEMENTS (schemes); ++i)
 772     {
 773       if (!strcmp (schemes[i], scheme))
 774         return TRUE;
 775     }
 776   return FALSE;
 777 }
 778
 779 static int
 780 normalize_port (const char *scheme,
 781                 int         port)
 782 {
 783   const char *default_schemes[3] = { NULL };
 784   int i;
 785
 786   switch (port)
 787     {
 788     case 21:
 789       default_schemes[0] = "ftp";
 790       break;
 791     case 80:
 792       default_schemes[0] = "http";
 793       default_schemes[1] = "ws";
 794       break;
 795     case 443:
 796       default_schemes[0] = "https";
 797       default_schemes[1] = "wss";
 798       break;
 799     default:
 800       break;
 801     }
 802
 803   for (i = 0; default_schemes[i]; ++i)
 804     {
 805       if (!strcmp (scheme, default_schemes[i]))
 806         return -1;
 807     }
 808
 809   return port;
 810 }
 811
 812 int
 813 g_uri_get_default_scheme_port (const char *scheme)
 814 {
 815   if (strcmp (scheme, "http") == 0 || strcmp (scheme, "ws") == 0)
 816     return 80;
 817
 818   if (strcmp (scheme, "https") == 0 || strcmp (scheme, "wss") == 0)
 819     return 443;
 820
 821   if (strcmp (scheme, "ftp") == 0)
 822     return 21;
 823
 824   if (strstr (scheme, "socks") == scheme)
 825     return 1080;
 826
 827   return -1;
 828 }
 829
 830 static gboolean
 831 g_uri_split_internal (const gchar  *uri_string,
 832                       GUriFlags     flags,
 833                       gchar       **scheme,
 834                       gchar       **userinfo,
 835                       gchar       **user,
 836                       gchar       **password,
 837                       gchar       **auth_params,
 838                       gchar       **host,
 839                       gint         *port,
 840                       gchar       **path,
 841                       gchar       **query,
 842                       gchar       **fragment,
 843                       GError      **error)
 844 {
 845   const gchar *end, *colon, *at, *path_start, *semi, *question;
 846   const gchar *p, *bracket, *hostend;
 847   gchar *cleaned_uri_string = NULL;
 848   gchar *normalized_scheme = NULL;
 849
 850   if (scheme)
 851     *scheme = NULL;
 852   if (userinfo)
 853     *userinfo = NULL;
 854   if (user)
 855     *user = NULL;
 856   if (password)
 857     *password = NULL;
 858   if (auth_params)
 859     *auth_params = NULL;
 860   if (host)
 861     *host = NULL;
 862   if (port)
 863     *port = -1;
 864   if (path)
 865     *path = NULL;
 866   if (query)
 867     *query = NULL;
 868   if (fragment)
 869     *fragment = NULL;
 870
 871   if ((flags & G_URI_FLAGS_PARSE_RELAXED) && strpbrk (uri_string, " \t\n\r"))
 872     {
 873       cleaned_uri_string = uri_cleanup (uri_string);
 874       uri_string = cleaned_uri_string;
 875     }
 876
 877   /* Find scheme */
 878   p = uri_string;
 879   while (*p && (g_ascii_isalpha (*p) ||
 880                (p > uri_string && (g_ascii_isdigit (*p) ||
 881                                    *p == '.' || *p == '+' || *p == '-'))))
 882     p++;
 883
 884   if (p > uri_string && *p == ':')
 885     {
 886       normalized_scheme = g_ascii_strdown (uri_string, p - uri_string);
 887       if (scheme)
 888         *scheme = g_steal_pointer (&normalized_scheme);
 889       p++;
 890     }
 891   else
 892     {
 893       if (scheme)
 894         *scheme = NULL;
 895       p = uri_string;
 896     }
 897
 898   /* Check for authority */
 899   if (strncmp (p, "//", 2) == 0)
 900     {
 901       p += 2;
 902
 903       path_start = p + strcspn (p, "/?#");
 904       at = memchr (p, '@', path_start - p);
 905       if (at)
 906         {
 907           if (flags & G_URI_FLAGS_PARSE_RELAXED)
 908             {
 909               gchar *next_at;
 910
 911               /* Any "@"s in the userinfo must be %-encoded, but
 912                * people get this wrong sometimes. Since "@"s in the
 913                * hostname are unlikely (and also wrong anyway), assume
 914                * that if there are extra "@"s, they belong in the
 915                * userinfo.
 916                */
 917               do
 918                 {
 919                   next_at = memchr (at + 1, '@', path_start - (at + 1));
 920                   if (next_at)
 921                     at = next_at;
 922                 }
 923               while (next_at);
 924             }
 925
 926           if (user || password || auth_params ||
 927               (flags & (G_URI_FLAGS_HAS_PASSWORD|G_URI_FLAGS_HAS_AUTH_PARAMS)))
 928             {
 929               if (!parse_userinfo (p, at - p, flags,
 930                                    user, password, auth_params,
 931                                    error))
 932                 goto fail;
 933             }
 934
 935           if (!uri_normalize (userinfo, p, at - p, flags,
 936                               G_URI_ERROR_BAD_USER, error))
 937             goto fail;
 938
 939           p = at + 1;
 940         }
 941
 942       if (flags & G_URI_FLAGS_PARSE_RELAXED)
 943         {
 944           semi = strchr (p, ';');
 945           if (semi && semi < path_start)
 946             {
 947               /* Technically, semicolons are allowed in the "host"
 948                * production, but no one ever does this, and some
 949                * schemes mistakenly use semicolon as a delimiter
 950                * marking the start of the path. We have to check this
 951                * after checking for userinfo though, because a
 952                * semicolon before the "@" must be part of the
 953                * userinfo.
 954                */
 955               path_start = semi;
 956             }
 957         }
 958
 959       /* Find host and port. The host may be a bracket-delimited IPv6
 960        * address, in which case the colon delimiting the port must come
 961        * (immediately) after the close bracket.
 962        */
 963       if (*p == '[')
 964         {
 965           bracket = memchr (p, ']', path_start - p);
 966           if (bracket && *(bracket + 1) == ':')
 967             colon = bracket + 1;
 968           else
 969             colon = NULL;
 970         }
 971       else
 972         colon = memchr (p, ':', path_start - p);
 973
 974       hostend = colon ? colon : path_start;
 975       if (!parse_host (p, hostend - p, flags, host, error))
 976         goto fail;
 977
 978       if (colon && colon != path_start - 1)
 979         {
 980           p = colon + 1;
 981           if (!parse_port (p, path_start - p, port, error))
 982             goto fail;
 983         }
 984
 985       p = path_start;
 986     }
 987
 988   /* Find fragment. */
 989   end = p + strcspn (p, "#");
 990   if (*end == '#')
 991     {
 992       if (!uri_normalize (fragment, end + 1, strlen (end + 1),
 993                           flags | (flags & G_URI_FLAGS_ENCODED_FRAGMENT ? G_URI_FLAGS_ENCODED : 0),
 994                           G_URI_ERROR_BAD_FRAGMENT, error))
 995         goto fail;
 996     }
 997
 998   /* Find query */
 999   question = memchr (p, '?', end - p);
1000   if (question)
1001     {
1002       if (!uri_normalize (query, question + 1, end - (question + 1),
1003                           flags | (flags & G_URI_FLAGS_ENCODED_QUERY ? G_URI_FLAGS_ENCODED : 0),
1004                           G_URI_ERROR_BAD_QUERY, error))
1005         goto fail;
1006       end = question;
1007     }
1008
1009   if (!uri_normalize (path, p, end - p,
1010                       flags | (flags & G_URI_FLAGS_ENCODED_PATH ? G_URI_FLAGS_ENCODED : 0),
1011                       G_URI_ERROR_BAD_PATH, error))
1012     goto fail;
1013
1014   /* Scheme-based normalization */
1015   if (flags & G_URI_FLAGS_SCHEME_NORMALIZE && ((scheme && *scheme) || normalized_scheme))
1016     {
1017       const char *scheme_str = scheme && *scheme ? *scheme : normalized_scheme;
1018
1019       if (should_normalize_empty_path (scheme_str) && path && !**path)
1020         {
1021           g_free (*path);
1022           *path = g_strdup ("/");
1023         }
1024
1025       if (port && *port == -1)
1026         *port = g_uri_get_default_scheme_port (scheme_str);
1027     }
1028
1029   g_free (normalized_scheme);
1030   g_free (cleaned_uri_string);
1031   return TRUE;
1032
1033  fail:
1034   if (scheme)
1035     g_clear_pointer (scheme, g_free);
1036   if (userinfo)
1037     g_clear_pointer (userinfo, g_free);
1038   if (host)
1039     g_clear_pointer (host, g_free);
1040   if (port)
1041     *port = -1;
1042   if (path)
1043     g_clear_pointer (path, g_free);
1044   if (query)
1045     g_clear_pointer (query, g_free);
1046   if (fragment)
1047     g_clear_pointer (fragment, g_free);
1048
1049   g_free (normalized_scheme);
1050   g_free (cleaned_uri_string);
1051   return FALSE;
1052 }
1053
1054 /**
1055  * g_uri_split:
1056  * @uri_ref: a string containing a relative or absolute URI
1057  * @flags: flags for parsing @uri_ref
1058  * @scheme: (out) (nullable) (optional) (transfer full): on return, contains
1059  *    the scheme (converted to lowercase), or %NULL
1060  * @userinfo: (out) (nullable) (optional) (transfer full): on return, contains
1061  *    the userinfo, or %NULL
1062  * @host: (out) (nullable) (optional) (transfer full): on return, contains the
1063  *    host, or %NULL
1064  * @port: (out) (optional) (transfer full): on return, contains the
1065  *    port, or `-1`
1066  * @path: (out) (not nullable) (optional) (transfer full): on return, contains the
1067  *    path
1068  * @query: (out) (nullable) (optional) (transfer full): on return, contains the
1069  *    query, or %NULL
1070  * @fragment: (out) (nullable) (optional) (transfer full): on return, contains
1071  *    the fragment, or %NULL
1072  * @error: #GError for error reporting, or %NULL to ignore.
1073  *
1074  * Parses @uri_ref (which can be an
1075  * [absolute or relative URI][relative-absolute-uris]) according to @flags, and
1076  * returns the pieces. Any component that doesn't appear in @uri_ref will be
1077  * returned as %NULL (but note that all URIs always have a path component,
1078  * though it may be the empty string).
1079  *
1080  * If @flags contains %G_URI_FLAGS_ENCODED, then `%`-encoded characters in
1081  * @uri_ref will remain encoded in the output strings. (If not,
1082  * then all such characters will be decoded.) Note that decoding will
1083  * only work if the URI components are ASCII or UTF-8, so you will
1084  * need to use %G_URI_FLAGS_ENCODED if they are not.
1085  *
1086  * Note that the %G_URI_FLAGS_HAS_PASSWORD and
1087  * %G_URI_FLAGS_HAS_AUTH_PARAMS @flags are ignored by g_uri_split(),
1088  * since it always returns only the full userinfo; use
1089  * g_uri_split_with_user() if you want it split up.
1090  *
1091  * Returns: (skip): %TRUE if @uri_ref parsed successfully, %FALSE
1092  *   on error.
1093  *
1094  * Since: 2.66
1095  */
1096 gboolean
1097 g_uri_split (const gchar  *uri_ref,
1098              GUriFlags     flags,
1099              gchar       **scheme,
1100              gchar       **userinfo,
1101              gchar       **host,
1102              gint         *port,
1103              gchar       **path,
1104              gchar       **query,
1105              gchar       **fragment,
1106              GError      **error)
1107 {
1108   g_return_val_if_fail (uri_ref != NULL, FALSE);
1109   g_return_val_if_fail (error == NULL || *error == NULL, FALSE);
1110
1111   return g_uri_split_internal (uri_ref, flags,
1112                                scheme, userinfo, NULL, NULL, NULL,
1113                                host, port, path, query, fragment,
1114                                error);
1115 }
1116
1117 /**
1118  * g_uri_split_with_user:
1119  * @uri_ref: a string containing a relative or absolute URI
1120  * @flags: flags for parsing @uri_ref
1121  * @scheme: (out) (nullable) (optional) (transfer full): on return, contains
1122  *    the scheme (converted to lowercase), or %NULL
1123  * @user: (out) (nullable) (optional) (transfer full): on return, contains
1124  *    the user, or %NULL
1125  * @password: (out) (nullable) (optional) (transfer full): on return, contains
1126  *    the password, or %NULL
1127  * @auth_params: (out) (nullable) (optional) (transfer full): on return, contains
1128  *    the auth_params, or %NULL
1129  * @host: (out) (nullable) (optional) (transfer full): on return, contains the
1130  *    host, or %NULL
1131  * @port: (out) (optional) (transfer full): on return, contains the
1132  *    port, or `-1`
1133  * @path: (out) (not nullable) (optional) (transfer full): on return, contains the
1134  *    path
1135  * @query: (out) (nullable) (optional) (transfer full): on return, contains the
1136  *    query, or %NULL
1137  * @fragment: (out) (nullable) (optional) (transfer full): on return, contains
1138  *    the fragment, or %NULL
1139  * @error: #GError for error reporting, or %NULL to ignore.
1140  *
1141  * Parses @uri_ref (which can be an
1142  * [absolute or relative URI][relative-absolute-uris]) according to @flags, and
1143  * returns the pieces. Any component that doesn't appear in @uri_ref will be
1144  * returned as %NULL (but note that all URIs always have a path component,
1145  * though it may be the empty string).
1146  *
1147  * See g_uri_split(), and the definition of #GUriFlags, for more
1148  * information on the effect of @flags. Note that @password will only
1149  * be parsed out if @flags contains %G_URI_FLAGS_HAS_PASSWORD, and
1150  * @auth_params will only be parsed out if @flags contains
1151  * %G_URI_FLAGS_HAS_AUTH_PARAMS.
1152  *
1153  * Returns: (skip): %TRUE if @uri_ref parsed successfully, %FALSE
1154  *   on error.
1155  *
1156  * Since: 2.66
1157  */
1158 gboolean
1159 g_uri_split_with_user (const gchar  *uri_ref,
1160                        GUriFlags     flags,
1161                        gchar       **scheme,
1162                        gchar       **user,
1163                        gchar       **password,
1164                        gchar       **auth_params,
1165                        gchar       **host,
1166                        gint         *port,
1167                        gchar       **path,
1168                        gchar       **query,
1169                        gchar       **fragment,
1170                        GError      **error)
1171 {
1172   g_return_val_if_fail (uri_ref != NULL, FALSE);
1173   g_return_val_if_fail (error == NULL || *error == NULL, FALSE);
1174
1175   return g_uri_split_internal (uri_ref, flags,
1176                                scheme, NULL, user, password, auth_params,
1177                                host, port, path, query, fragment,
1178                                error);
1179 }
1180
1181
1182 /**
1183  * g_uri_split_network:
1184  * @uri_string: a string containing an absolute URI
1185  * @flags: flags for parsing @uri_string
1186  * @scheme: (out) (nullable) (optional) (transfer full): on return, contains
1187  *    the scheme (converted to lowercase), or %NULL
1188  * @host: (out) (nullable) (optional) (transfer full): on return, contains the
1189  *    host, or %NULL
1190  * @port: (out) (optional) (transfer full): on return, contains the
1191  *    port, or `-1`
1192  * @error: #GError for error reporting, or %NULL to ignore.
1193  *
1194  * Parses @uri_string (which must be an [absolute URI][relative-absolute-uris])
1195  * according to @flags, and returns the pieces relevant to connecting to a host.
1196  * See the documentation for g_uri_split() for more details; this is
1197  * mostly a wrapper around that function with simpler arguments.
1198  * However, it will return an error if @uri_string is a relative URI,
1199  * or does not contain a hostname component.
1200  *
1201  * Returns: (skip): %TRUE if @uri_string parsed successfully,
1202  *   %FALSE on error.
1203  *
1204  * Since: 2.66
1205  */
1206 gboolean
1207 g_uri_split_network (const gchar  *uri_string,
1208                      GUriFlags     flags,
1209                      gchar       **scheme,
1210                      gchar       **host,
1211                      gint         *port,
1212                      GError      **error)
1213 {
1214   gchar *my_scheme = NULL, *my_host = NULL;
1215
1216   g_return_val_if_fail (uri_string != NULL, FALSE);
1217   g_return_val_if_fail (error == NULL || *error == NULL, FALSE);
1218
1219   if (!g_uri_split_internal (uri_string, flags,
1220                              &my_scheme, NULL, NULL, NULL, NULL,
1221                              &my_host, port, NULL, NULL, NULL,
1222                              error))
1223     return FALSE;
1224
1225   if (!my_scheme || !my_host)
1226     {
1227       if (!my_scheme)
1228         {
1229           g_set_error (error, G_URI_ERROR, G_URI_ERROR_BAD_SCHEME,
1230                        _("URI ‘%s’ is not an absolute URI"),
1231                        uri_string);
1232         }
1233       else
1234         {
1235           g_set_error (error, G_URI_ERROR, G_URI_ERROR_BAD_HOST,
1236                        _("URI ‘%s’ has no host component"),
1237                        uri_string);
1238         }
1239       g_free (my_scheme);
1240       g_free (my_host);
1241
1242       return FALSE;
1243     }
1244
1245   if (scheme)
1246     *scheme = g_steal_pointer (&my_scheme);
1247   if (host)
1248     *host = g_steal_pointer (&my_host);
1249
1250   g_free (my_scheme);
1251   g_free (my_host);
1252
1253   return TRUE;
1254 }
1255
1256 /**
1257  * g_uri_is_valid:
1258  * @uri_string: a string containing an absolute URI
1259  * @flags: flags for parsing @uri_string
1260  * @error: #GError for error reporting, or %NULL to ignore.
1261  *
1262  * Parses @uri_string according to @flags, to determine whether it is a valid
1263  * [absolute URI][relative-absolute-uris], i.e. it does not need to be resolved
1264  * relative to another URI using g_uri_parse_relative().
1265  *
1266  * If it’s not a valid URI, an error is returned explaining how it’s invalid.
1267  *
1268  * See g_uri_split(), and the definition of #GUriFlags, for more
1269  * information on the effect of @flags.
1270  *
1271  * Returns: %TRUE if @uri_string is a valid absolute URI, %FALSE on error.
1272  *
1273  * Since: 2.66
1274  */
1275 gboolean
1276 g_uri_is_valid (const gchar  *uri_string,
1277                 GUriFlags     flags,
1278                 GError      **error)
1279 {
1280   gchar *my_scheme = NULL;
1281
1282   g_return_val_if_fail (uri_string != NULL, FALSE);
1283   g_return_val_if_fail (error == NULL || *error == NULL, FALSE);
1284
1285   if (!g_uri_split_internal (uri_string, flags,
1286                              &my_scheme, NULL, NULL, NULL, NULL,
1287                              NULL, NULL, NULL, NULL, NULL,
1288                              error))
1289     return FALSE;
1290
1291   if (!my_scheme)
1292     {
1293       g_set_error (error, G_URI_ERROR, G_URI_ERROR_BAD_SCHEME,
1294                    _("URI ‘%s’ is not an absolute URI"),
1295                    uri_string);
1296       return FALSE;
1297     }
1298
1299   g_free (my_scheme);
1300
1301   return TRUE;
1302 }
1303
1304
1305 /* Implements the "Remove Dot Segments" algorithm from section 5.2.4 of
1306  * RFC 3986.
1307  *
1308  * See https://tools.ietf.org/html/rfc3986#section-5.2.4
1309  */
1310 static void
1311 remove_dot_segments (gchar *path)
1312 {
1313   /* The output can be written to the same buffer that the input
1314    * is read from, as the output pointer is only ever increased
1315    * when the input pointer is increased as well, and the input
1316    * pointer is never decreased. */
1317   gchar *input = path;
1318   gchar *output = path;
1319
1320   if (!*path)
1321     return;
1322
1323   while (*input)
1324     {
1325       /*  A.  If the input buffer begins with a prefix of "../" or "./",
1326        *      then remove that prefix from the input buffer; otherwise,
1327        */
1328       if (strncmp (input, "../", 3) == 0)
1329         input += 3;
1330       else if (strncmp (input, "./", 2) == 0)
1331         input += 2;
1332
1333       /*  B.  if the input buffer begins with a prefix of "/./" or "/.",
1334        *      where "." is a complete path segment, then replace that
1335        *      prefix with "/" in the input buffer; otherwise,
1336        */
1337       else if (strncmp (input, "/./", 3) == 0)
1338         input += 2;
1339       else if (strcmp (input, "/.") == 0)
1340         input[1] = '\0';
1341
1342       /*  C.  if the input buffer begins with a prefix of "/../" or "/..",
1343        *      where ".." is a complete path segment, then replace that
1344        *      prefix with "/" in the input buffer and remove the last
1345        *      segment and its preceding "/" (if any) from the output
1346        *      buffer; otherwise,
1347        */
1348       else if (strncmp (input, "/../", 4) == 0)
1349         {
1350           input += 3;
1351           if (output > path)
1352             {
1353               do
1354                 {
1355                   output--;
1356                 }
1357               while (*output != '/' && output > path);
1358             }
1359         }
1360       else if (strcmp (input, "/..") == 0)
1361         {
1362           input[1] = '\0';
1363           if (output > path)
1364             {
1365               do
1366                  {
1367                    output--;
1368                  }
1369               while (*output != '/' && output > path);
1370             }
1371         }
1372
1373       /*  D.  if the input buffer consists only of "." or "..", then remove
1374        *      that from the input buffer; otherwise,
1375        */
1376       else if (strcmp (input, "..") == 0 || strcmp (input, ".") == 0)
1377         input[0] = '\0';
1378
1379       /*  E.  move the first path segment in the input buffer to the end of
1380        *      the output buffer, including the initial "/" character (if
1381        *      any) and any subsequent characters up to, but not including,
1382        *      the next "/" character or the end of the input buffer.
1383        */
1384       else
1385         {
1386           *output++ = *input++;
1387           while (*input && *input != '/')
1388             *output++ = *input++;
1389         }
1390     }
1391   *output = '\0';
1392 }
1393
1394 /**
1395  * g_uri_parse:
1396  * @uri_string: a string representing an absolute URI
1397  * @flags: flags describing how to parse @uri_string
1398  * @error: #GError for error reporting, or %NULL to ignore.
1399  *
1400  * Parses @uri_string according to @flags. If the result is not a
1401  * valid [absolute URI][relative-absolute-uris], it will be discarded, and an
1402  * error returned.
1403  *
1404  * Return value: (transfer full): a new #GUri, or NULL on error.
1405  *
1406  * Since: 2.66
1407  */
1408 GUri *
1409 g_uri_parse (const gchar  *uri_string,
1410              GUriFlags     flags,
1411              GError      **error)
1412 {
1413   g_return_val_if_fail (uri_string != NULL, NULL);
1414   g_return_val_if_fail (error == NULL || *error == NULL, NULL);
1415
1416   return g_uri_parse_relative (NULL, uri_string, flags, error);
1417 }
1418
1419 /**
1420  * g_uri_parse_relative:
1421  * @base_uri: (nullable) (transfer none): a base absolute URI
1422  * @uri_ref: a string representing a relative or absolute URI
1423  * @flags: flags describing how to parse @uri_ref
1424  * @error: #GError for error reporting, or %NULL to ignore.
1425  *
1426  * Parses @uri_ref according to @flags and, if it is a
1427  * [relative URI][relative-absolute-uris], resolves it relative to @base_uri.
1428  * If the result is not a valid absolute URI, it will be discarded, and an error
1429  * returned.
1430  *
1431  * Return value: (transfer full): a new #GUri, or NULL on error.
1432  *
1433  * Since: 2.66
1434  */
1435 GUri *
1436 g_uri_parse_relative (GUri         *base_uri,
1437                       const gchar  *uri_ref,
1438                       GUriFlags     flags,
1439                       GError      **error)
1440 {
1441   GUri *uri = NULL;
1442
1443   g_return_val_if_fail (uri_ref != NULL, NULL);
1444   g_return_val_if_fail (error == NULL || *error == NULL, NULL);
1445   g_return_val_if_fail (base_uri == NULL || base_uri->scheme != NULL, NULL);
1446
1447   /* Use GUri struct to construct the return value: there is no guarantee it is
1448    * actually correct within the function body. */
1449   uri = g_atomic_rc_box_new0 (GUri);
1450   uri->flags = flags;
1451
1452   if (!g_uri_split_internal (uri_ref, flags,
1453                              &uri->scheme, &uri->userinfo,
1454                              &uri->user, &uri->password, &uri->auth_params,
1455                              &uri->host, &uri->port,
1456                              &uri->path, &uri->query, &uri->fragment,
1457                              error))
1458     {
1459       g_uri_unref (uri);
1460       return NULL;
1461     }
1462
1463   if (!uri->scheme && !base_uri)
1464     {
1465       g_set_error_literal (error, G_URI_ERROR, G_URI_ERROR_FAILED,
1466                            _("URI is not absolute, and no base URI was provided"));
1467       g_uri_unref (uri);
1468       return NULL;
1469     }
1470
1471   if (base_uri)
1472     {
1473       /* This is section 5.2.2 of RFC 3986, except that we're doing
1474        * it in place in @uri rather than copying from R to T.
1475        *
1476        * See https://tools.ietf.org/html/rfc3986#section-5.2.2
1477        */
1478       if (uri->scheme)
1479         remove_dot_segments (uri->path);
1480       else
1481         {
1482           uri->scheme = g_strdup (base_uri->scheme);
1483           if (uri->host)
1484             remove_dot_segments (uri->path);
1485           else
1486             {
1487               if (!*uri->path)
1488                 {
1489                   g_free (uri->path);
1490                   uri->path = g_strdup (base_uri->path);
1491                   if (!uri->query)
1492                     uri->query = g_strdup (base_uri->query);
1493                 }
1494               else
1495                 {
1496                   if (*uri->path == '/')
1497                     remove_dot_segments (uri->path);
1498                   else
1499                     {
1500                       gchar *newpath, *last;
1501
1502                       last = strrchr (base_uri->path, '/');
1503                       if (last)
1504                         {
1505                           newpath = g_strdup_printf ("%.*s/%s",
1506                                                      (gint)(last - base_uri->path),
1507                                                      base_uri->path,
1508                                                      uri->path);
1509                         }
1510                       else
1511                         newpath = g_strdup_printf ("/%s", uri->path);
1512
1513                       g_free (uri->path);
1514                       uri->path = g_steal_pointer (&newpath);
1515
1516                       remove_dot_segments (uri->path);
1517                     }
1518                 }
1519
1520               uri->userinfo = g_strdup (base_uri->userinfo);
1521               uri->user = g_strdup (base_uri->user);
1522               uri->password = g_strdup (base_uri->password);
1523               uri->auth_params = g_strdup (base_uri->auth_params);
1524               uri->host = g_strdup (base_uri->host);
1525               uri->port = base_uri->port;
1526             }
1527         }
1528
1529       /* Scheme normalization couldn't have been done earlier
1530        * as the relative URI may not have had a scheme */
1531       if (flags & G_URI_FLAGS_SCHEME_NORMALIZE)
1532         {
1533           if (should_normalize_empty_path (uri->scheme) && !*uri->path)
1534             {
1535               g_free (uri->path);
1536               uri->path = g_strdup ("/");
1537             }
1538
1539           uri->port = normalize_port (uri->scheme, uri->port);
1540         }
1541     }
1542   else
1543     {
1544       remove_dot_segments (uri->path);
1545     }
1546
1547   return g_steal_pointer (&uri);
1548 }
1549
1550 /**
1551  * g_uri_resolve_relative:
1552  * @base_uri_string: (nullable): a string representing a base URI
1553  * @uri_ref: a string representing a relative or absolute URI
1554  * @flags: flags describing how to parse @uri_ref
1555  * @error: #GError for error reporting, or %NULL to ignore.
1556  *
1557  * Parses @uri_ref according to @flags and, if it is a
1558  * [relative URI][relative-absolute-uris], resolves it relative to
1559  * @base_uri_string. If the result is not a valid absolute URI, it will be
1560  * discarded, and an error returned.
1561  *
1562  * (If @base_uri_string is %NULL, this just returns @uri_ref, or
1563  * %NULL if @uri_ref is invalid or not absolute.)
1564  *
1565  * Return value: (transfer full): the resolved URI string,
1566  * or NULL on error.
1567  *
1568  * Since: 2.66
1569  */
1570 gchar *
1571 g_uri_resolve_relative (const gchar  *base_uri_string,
1572                         const gchar  *uri_ref,
1573                         GUriFlags     flags,
1574                         GError      **error)
1575 {
1576   GUri *base_uri, *resolved_uri;
1577   gchar *resolved_uri_string;
1578
1579   g_return_val_if_fail (uri_ref != NULL, NULL);
1580   g_return_val_if_fail (error == NULL || *error == NULL, NULL);
1581
1582   flags |= G_URI_FLAGS_ENCODED;
1583
1584   if (base_uri_string)
1585     {
1586       base_uri = g_uri_parse (base_uri_string, flags, error);
1587       if (!base_uri)
1588         return NULL;
1589     }
1590   else
1591     base_uri = NULL;
1592
1593   resolved_uri = g_uri_parse_relative (base_uri, uri_ref, flags, error);
1594   if (base_uri)
1595     g_uri_unref (base_uri);
1596   if (!resolved_uri)
1597     return NULL;
1598
1599   resolved_uri_string = g_uri_to_string (resolved_uri);
1600   g_uri_unref (resolved_uri);
1601   return g_steal_pointer (&resolved_uri_string);
1602 }
1603
1604 /* userinfo as a whole can contain sub-delims + ":", but split-out
1605  * user can't contain ":" or ";", and split-out password can't contain
1606  * ";".
1607  */
1608 #define USERINFO_ALLOWED_CHARS G_URI_RESERVED_CHARS_ALLOWED_IN_USERINFO
1609 #define USER_ALLOWED_CHARS "!$&'()*+,="
1610 #define PASSWORD_ALLOWED_CHARS "!$&'()*+,=:"
1611 #define AUTH_PARAMS_ALLOWED_CHARS USERINFO_ALLOWED_CHARS
1612 #define IP_ADDR_ALLOWED_CHARS ":"
1613 #define HOST_ALLOWED_CHARS G_URI_RESERVED_CHARS_SUBCOMPONENT_DELIMITERS
1614 #define PATH_ALLOWED_CHARS G_URI_RESERVED_CHARS_ALLOWED_IN_PATH
1615 #define QUERY_ALLOWED_CHARS G_URI_RESERVED_CHARS_ALLOWED_IN_PATH "?"
1616 #define FRAGMENT_ALLOWED_CHARS G_URI_RESERVED_CHARS_ALLOWED_IN_PATH "?"
1617
1618 static gchar *
1619 g_uri_join_internal (GUriFlags    flags,
1620                      const gchar *scheme,
1621                      gboolean     userinfo,
1622                      const gchar *user,
1623                      const gchar *password,
1624                      const gchar *auth_params,
1625                      const gchar *host,
1626                      gint         port,
1627                      const gchar *path,
1628                      const gchar *query,
1629                      const gchar *fragment)
1630 {
1631   gboolean encoded = (flags & G_URI_FLAGS_ENCODED);
1632   GString *str;
1633   char *normalized_scheme = NULL;
1634
1635   /* Restrictions on path prefixes. See:
1636    * https://tools.ietf.org/html/rfc3986#section-3
1637    */
1638   g_return_val_if_fail (path != NULL, NULL);
1639   g_return_val_if_fail (host == NULL || (path[0] == '\0' || path[0] == '/'), NULL);
1640   g_return_val_if_fail (host != NULL || (path[0] != '/' || path[1] != '/'), NULL);
1641
1642   /* Arbitrarily chosen default size which should handle most average length
1643    * URIs. This should avoid a few reallocations of the buffer in most cases.
1644    * It’s 1B shorter than a power of two, since GString will add a
1645    * nul-terminator byte. */
1646   str = g_string_sized_new (127);
1647
1648   if (scheme)
1649     {
1650       g_string_append (str, scheme);
1651       g_string_append_c (str, ':');
1652     }
1653
1654   if (flags & G_URI_FLAGS_SCHEME_NORMALIZE && scheme && ((host && port != -1) || path[0] == '\0'))
1655     normalized_scheme = g_ascii_strdown (scheme, -1);
1656
1657   if (host)
1658     {
1659       g_string_append (str, "//");
1660
1661       if (user)
1662         {
1663           if (encoded)
1664             g_string_append (str, user);
1665           else
1666             {
1667               if (userinfo)
1668                 g_string_append_uri_escaped (str, user, USERINFO_ALLOWED_CHARS, TRUE);
1669               else
1670                 /* Encode ':' and ';' regardless of whether we have a
1671                  * password or auth params, since it may be parsed later
1672                  * under the assumption that it does.
1673                  */
1674                 g_string_append_uri_escaped (str, user, USER_ALLOWED_CHARS, TRUE);
1675             }
1676
1677           if (password)
1678             {
1679               g_string_append_c (str, ':');
1680               if (encoded)
1681                 g_string_append (str, password);
1682               else
1683                 g_string_append_uri_escaped (str, password,
1684                                              PASSWORD_ALLOWED_CHARS, TRUE);
1685             }
1686
1687           if (auth_params)
1688             {
1689               g_string_append_c (str, ';');
1690               if (encoded)
1691                 g_string_append (str, auth_params);
1692               else
1693                 g_string_append_uri_escaped (str, auth_params,
1694                                              AUTH_PARAMS_ALLOWED_CHARS, TRUE);
1695             }
1696
1697           g_string_append_c (str, '@');
1698         }
1699
1700       if (strchr (host, ':') && g_hostname_is_ip_address (host))
1701         {
1702           g_string_append_c (str, '[');
1703           if (encoded)
1704             g_string_append (str, host);
1705           else
1706             g_string_append_uri_escaped (str, host, IP_ADDR_ALLOWED_CHARS, TRUE);
1707           g_string_append_c (str, ']');
1708         }
1709       else
1710         {
1711           if (encoded)
1712             g_string_append (str, host);
1713           else
1714             g_string_append_uri_escaped (str, host, HOST_ALLOWED_CHARS, TRUE);
1715         }
1716
1717       if (port != -1 && (!normalized_scheme || normalize_port (normalized_scheme, port) != -1))
1718         g_string_append_printf (str, ":%d", port);
1719     }
1720
1721   if (path[0] == '\0' && normalized_scheme && should_normalize_empty_path (normalized_scheme))
1722     g_string_append (str, "/");
1723   else if (encoded || flags & G_URI_FLAGS_ENCODED_PATH)
1724     g_string_append (str, path);
1725   else
1726     g_string_append_uri_escaped (str, path, PATH_ALLOWED_CHARS, TRUE);
1727
1728   g_free (normalized_scheme);
1729
1730   if (query)
1731     {
1732       g_string_append_c (str, '?');
1733       if (encoded || flags & G_URI_FLAGS_ENCODED_QUERY)
1734         g_string_append (str, query);
1735       else
1736         g_string_append_uri_escaped (str, query, QUERY_ALLOWED_CHARS, TRUE);
1737     }
1738   if (fragment)
1739     {
1740       g_string_append_c (str, '#');
1741       if (encoded || flags & G_URI_FLAGS_ENCODED_FRAGMENT)
1742         g_string_append (str, fragment);
1743       else
1744         g_string_append_uri_escaped (str, fragment, FRAGMENT_ALLOWED_CHARS, TRUE);
1745     }
1746
1747   return g_string_free (str, FALSE);
1748 }
1749
1750 /**
1751  * g_uri_join:
1752  * @flags: flags describing how to build the URI string
1753  * @scheme: (nullable): the URI scheme, or %NULL
1754  * @userinfo: (nullable): the userinfo component, or %NULL
1755  * @host: (nullable): the host component, or %NULL
1756  * @port: the port, or `-1`
1757  * @path: (not nullable): the path component
1758  * @query: (nullable): the query component, or %NULL
1759  * @fragment: (nullable): the fragment, or %NULL
1760  *
1761  * Joins the given components together according to @flags to create
1762  * an absolute URI string. @path may not be %NULL (though it may be the empty
1763  * string).
1764  *
1765  * When @host is present, @path must either be empty or begin with a slash (`/`)
1766  * character. When @host is not present, @path cannot begin with two slash
1767  * characters (`//`). See
1768  * [RFC 3986, section 3](https://tools.ietf.org/html/rfc3986#section-3).
1769  *
1770  * See also g_uri_join_with_user(), which allows specifying the
1771  * components of the ‘userinfo’ separately.
1772  *
1773  * %G_URI_FLAGS_HAS_PASSWORD and %G_URI_FLAGS_HAS_AUTH_PARAMS are ignored if set
1774  * in @flags.
1775  *
1776  * Return value: (not nullable) (transfer full): an absolute URI string
1777  *
1778  * Since: 2.66
1779  */
1780 gchar *
1781 g_uri_join (GUriFlags    flags,
1782             const gchar *scheme,
1783             const gchar *userinfo,
1784             const gchar *host,
1785             gint         port,
1786             const gchar *path,
1787             const gchar *query,
1788             const gchar *fragment)
1789 {
1790   g_return_val_if_fail (port >= -1 && port <= 65535, NULL);
1791   g_return_val_if_fail (path != NULL, NULL);
1792
1793   return g_uri_join_internal (flags,
1794                               scheme,
1795                               TRUE, userinfo, NULL, NULL,
1796                               host,
1797                               port,
1798                               path,
1799                               query,
1800                               fragment);
1801 }
1802
1803 /**
1804  * g_uri_join_with_user:
1805  * @flags: flags describing how to build the URI string
1806  * @scheme: (nullable): the URI scheme, or %NULL
1807  * @user: (nullable): the user component of the userinfo, or %NULL
1808  * @password: (nullable): the password component of the userinfo, or
1809  *   %NULL
1810  * @auth_params: (nullable): the auth params of the userinfo, or
1811  *   %NULL
1812  * @host: (nullable): the host component, or %NULL
1813  * @port: the port, or `-1`
1814  * @path: (not nullable): the path component
1815  * @query: (nullable): the query component, or %NULL
1816  * @fragment: (nullable): the fragment, or %NULL
1817  *
1818  * Joins the given components together according to @flags to create
1819  * an absolute URI string. @path may not be %NULL (though it may be the empty
1820  * string).
1821  *
1822  * In contrast to g_uri_join(), this allows specifying the components
1823  * of the ‘userinfo’ separately. It otherwise behaves the same.
1824  *
1825  * %G_URI_FLAGS_HAS_PASSWORD and %G_URI_FLAGS_HAS_AUTH_PARAMS are ignored if set
1826  * in @flags.
1827  *
1828  * Return value: (not nullable) (transfer full): an absolute URI string
1829  *
1830  * Since: 2.66
1831  */
1832 gchar *
1833 g_uri_join_with_user (GUriFlags    flags,
1834                       const gchar *scheme,
1835                       const gchar *user,
1836                       const gchar *password,
1837                       const gchar *auth_params,
1838                       const gchar *host,
1839                       gint         port,
1840                       const gchar *path,
1841                       const gchar *query,
1842                       const gchar *fragment)
1843 {
1844   g_return_val_if_fail (port >= -1 && port <= 65535, NULL);
1845   g_return_val_if_fail (path != NULL, NULL);
1846
1847   return g_uri_join_internal (flags,
1848                               scheme,
1849                               FALSE, user, password, auth_params,
1850                               host,
1851                               port,
1852                               path,
1853                               query,
1854                               fragment);
1855 }
1856
1857 /**
1858  * g_uri_build:
1859  * @flags: flags describing how to build the #GUri
1860  * @scheme: (not nullable): the URI scheme
1861  * @userinfo: (nullable): the userinfo component, or %NULL
1862  * @host: (nullable): the host component, or %NULL
1863  * @port: the port, or `-1`
1864  * @path: (not nullable): the path component
1865  * @query: (nullable): the query component, or %NULL
1866  * @fragment: (nullable): the fragment, or %NULL
1867  *
1868  * Creates a new #GUri from the given components according to @flags.
1869  *
1870  * See also g_uri_build_with_user(), which allows specifying the
1871  * components of the "userinfo" separately.
1872  *
1873  * Return value: (not nullable) (transfer full): a new #GUri
1874  *
1875  * Since: 2.66
1876  */
1877 GUri *
1878 g_uri_build (GUriFlags    flags,
1879              const gchar *scheme,
1880              const gchar *userinfo,
1881              const gchar *host,
1882              gint         port,
1883              const gchar *path,
1884              const gchar *query,
1885              const gchar *fragment)
1886 {
1887   GUri *uri;
1888
1889   g_return_val_if_fail (scheme != NULL, NULL);
1890   g_return_val_if_fail (port >= -1 && port <= 65535, NULL);
1891   g_return_val_if_fail (path != NULL, NULL);
1892
1893   uri = g_atomic_rc_box_new0 (GUri);
1894   uri->flags = flags;
1895   uri->scheme = g_ascii_strdown (scheme, -1);
1896   uri->userinfo = g_strdup (userinfo);
1897   uri->host = g_strdup (host);
1898   uri->port = port;
1899   uri->path = g_strdup (path);
1900   uri->query = g_strdup (query);
1901   uri->fragment = g_strdup (fragment);
1902
1903   return g_steal_pointer (&uri);
1904 }
1905
1906 /**
1907  * g_uri_build_with_user:
1908  * @flags: flags describing how to build the #GUri
1909  * @scheme: (not nullable): the URI scheme
1910  * @user: (nullable): the user component of the userinfo, or %NULL
1911  * @password: (nullable): the password component of the userinfo, or %NULL
1912  * @auth_params: (nullable): the auth params of the userinfo, or %NULL
1913  * @host: (nullable): the host component, or %NULL
1914  * @port: the port, or `-1`
1915  * @path: (not nullable): the path component
1916  * @query: (nullable): the query component, or %NULL
1917  * @fragment: (nullable): the fragment, or %NULL
1918  *
1919  * Creates a new #GUri from the given components according to @flags
1920  * (%G_URI_FLAGS_HAS_PASSWORD is added unconditionally). The @flags must be
1921  * coherent with the passed values, in particular use `%`-encoded values with
1922  * %G_URI_FLAGS_ENCODED.
1923  *
1924  * In contrast to g_uri_build(), this allows specifying the components
1925  * of the ‘userinfo’ field separately. Note that @user must be non-%NULL
1926  * if either @password or @auth_params is non-%NULL.
1927  *
1928  * Return value: (not nullable) (transfer full): a new #GUri
1929  *
1930  * Since: 2.66
1931  */
1932 GUri *
1933 g_uri_build_with_user (GUriFlags    flags,
1934                        const gchar *scheme,
1935                        const gchar *user,
1936                        const gchar *password,
1937                        const gchar *auth_params,
1938                        const gchar *host,
1939                        gint         port,
1940                        const gchar *path,
1941                        const gchar *query,
1942                        const gchar *fragment)
1943 {
1944   GUri *uri;
1945   GString *userinfo;
1946
1947   g_return_val_if_fail (scheme != NULL, NULL);
1948   g_return_val_if_fail (password == NULL || user != NULL, NULL);
1949   g_return_val_if_fail (auth_params == NULL || user != NULL, NULL);
1950   g_return_val_if_fail (port >= -1 && port <= 65535, NULL);
1951   g_return_val_if_fail (path != NULL, NULL);
1952
1953   uri = g_atomic_rc_box_new0 (GUri);
1954   uri->flags = flags | G_URI_FLAGS_HAS_PASSWORD;
1955   uri->scheme = g_ascii_strdown (scheme, -1);
1956   uri->user = g_strdup (user);
1957   uri->password = g_strdup (password);
1958   uri->auth_params = g_strdup (auth_params);
1959   uri->host = g_strdup (host);
1960   uri->port = port;
1961   uri->path = g_strdup (path);
1962   uri->query = g_strdup (query);
1963   uri->fragment = g_strdup (fragment);
1964
1965   if (user)
1966     {
1967       userinfo = g_string_new (user);
1968       if (password)
1969         {
1970           g_string_append_c (userinfo, ':');
1971           g_string_append (userinfo, uri->password);
1972         }
1973       if (auth_params)
1974         {
1975           g_string_append_c (userinfo, ';');
1976           g_string_append (userinfo, uri->auth_params);
1977         }
1978       uri->userinfo = g_string_free (userinfo, FALSE);
1979     }
1980
1981   return g_steal_pointer (&uri);
1982 }
1983
1984 /**
1985  * g_uri_to_string:
1986  * @uri: a #GUri
1987  *
1988  * Returns a string representing @uri.
1989  *
1990  * This is not guaranteed to return a string which is identical to the
1991  * string that @uri was parsed from. However, if the source URI was
1992  * syntactically correct (according to RFC 3986), and it was parsed
1993  * with %G_URI_FLAGS_ENCODED, then g_uri_to_string() is guaranteed to return
1994  * a string which is at least semantically equivalent to the source
1995  * URI (according to RFC 3986).
1996  *
1997  * If @uri might contain sensitive details, such as authentication parameters,
1998  * or private data in its query string, and the returned string is going to be
1999  * logged, then consider using g_uri_to_string_partial() to redact parts.
2000  *
2001  * Return value: (not nullable) (transfer full): a string representing @uri,
2002  *     which the caller must free.
2003  *
2004  * Since: 2.66
2005  */
2006 gchar *
2007 g_uri_to_string (GUri *uri)
2008 {
2009   g_return_val_if_fail (uri != NULL, NULL);
2010
2011   return g_uri_to_string_partial (uri, G_URI_HIDE_NONE);
2012 }
2013
2014 /**
2015  * g_uri_to_string_partial:
2016  * @uri: a #GUri
2017  * @flags: flags describing what parts of @uri to hide
2018  *
2019  * Returns a string representing @uri, subject to the options in
2020  * @flags. See g_uri_to_string() and #GUriHideFlags for more details.
2021  *
2022  * Return value: (not nullable) (transfer full): a string representing
2023  *     @uri, which the caller must free.
2024  *
2025  * Since: 2.66
2026  */
2027 gchar *
2028 g_uri_to_string_partial (GUri          *uri,
2029                          GUriHideFlags  flags)
2030 {
2031   gboolean hide_user = (flags & G_URI_HIDE_USERINFO);
2032   gboolean hide_password = (flags & (G_URI_HIDE_USERINFO | G_URI_HIDE_PASSWORD));
2033   gboolean hide_auth_params = (flags & (G_URI_HIDE_USERINFO | G_URI_HIDE_AUTH_PARAMS));
2034   gboolean hide_query = (flags & G_URI_HIDE_QUERY);
2035   gboolean hide_fragment = (flags & G_URI_HIDE_FRAGMENT);
2036
2037   g_return_val_if_fail (uri != NULL, NULL);
2038
2039   if (uri->flags & (G_URI_FLAGS_HAS_PASSWORD | G_URI_FLAGS_HAS_AUTH_PARAMS))
2040     {
2041       return g_uri_join_with_user (uri->flags,
2042                                    uri->scheme,
2043                                    hide_user ? NULL : uri->user,
2044                                    hide_password ? NULL : uri->password,
2045                                    hide_auth_params ? NULL : uri->auth_params,
2046                                    uri->host,
2047                                    uri->port,
2048                                    uri->path,
2049                                    hide_query ? NULL : uri->query,
2050                                    hide_fragment ? NULL : uri->fragment);
2051     }
2052
2053   return g_uri_join (uri->flags,
2054                      uri->scheme,
2055                      hide_user ? NULL : uri->userinfo,
2056                      uri->host,
2057                      uri->port,
2058                      uri->path,
2059                      hide_query ? NULL : uri->query,
2060                      hide_fragment ? NULL : uri->fragment);
2061 }
2062
2063 /* This is just a copy of g_str_hash() with g_ascii_toupper() added */
2064 static guint
2065 str_ascii_case_hash (gconstpointer v)
2066 {
2067   const signed char *p;
2068   guint32 h = 5381;
2069
2070   for (p = v; *p != '\0'; p++)
2071     h = (h << 5) + h + g_ascii_toupper (*p);
2072
2073   return h;
2074 }
2075
2076 static gboolean
2077 str_ascii_case_equal (gconstpointer v1,
2078                       gconstpointer v2)
2079 {
2080   const gchar *string1 = v1;
2081   const gchar *string2 = v2;
2082
2083   return g_ascii_strcasecmp (string1, string2) == 0;
2084 }
2085
2086 /**
2087  * GUriParamsIter:
2088  *
2089  * Many URI schemes include one or more attribute/value pairs as part of the URI
2090  * value. For example `scheme://server/path?query=string&is=there` has two
2091  * attributes – `query=string` and `is=there` – in its query part.
2092  *
2093  * A #GUriParamsIter structure represents an iterator that can be used to
2094  * iterate over the attribute/value pairs of a URI query string. #GUriParamsIter
2095  * structures are typically allocated on the stack and then initialized with
2096  * g_uri_params_iter_init(). See the documentation for g_uri_params_iter_init()
2097  * for a usage example.
2098  *
2099  * Since: 2.66
2100  */
2101 typedef struct
2102 {
2103   GUriParamsFlags flags;
2104   const gchar    *attr;
2105   const gchar    *end;
2106   guint8          sep_table[256]; /* 1 = index is a separator; 0 otherwise */
2107 } RealIter;
2108
2109 G_STATIC_ASSERT (sizeof (GUriParamsIter) == sizeof (RealIter));
2110 G_STATIC_ASSERT (G_ALIGNOF (GUriParamsIter) >= G_ALIGNOF (RealIter));
2111
2112 /**
2113  * g_uri_params_iter_init:
2114  * @iter: an uninitialized #GUriParamsIter
2115  * @params: a `%`-encoded string containing `attribute=value`
2116  *   parameters
2117  * @length: the length of @params, or `-1` if it is nul-terminated
2118  * @separators: the separator byte character set between parameters. (usually
2119  *   `&`, but sometimes `;` or both `&;`). Note that this function works on
2120  *   bytes not characters, so it can't be used to delimit UTF-8 strings for
2121  *   anything but ASCII characters. You may pass an empty set, in which case
2122  *   no splitting will occur.
2123  * @flags: flags to modify the way the parameters are handled.
2124  *
2125  * Initializes an attribute/value pair iterator.
2126  *
2127  * The iterator keeps pointers to the @params and @separators arguments, those
2128  * variables must thus outlive the iterator and not be modified during the
2129  * iteration.
2130  *
2131  * If %G_URI_PARAMS_WWW_FORM is passed in @flags, `+` characters in the param
2132  * string will be replaced with spaces in the output. For example, `foo=bar+baz`
2133  * will give attribute `foo` with value `bar baz`. This is commonly used on the
2134  * web (the `https` and `http` schemes only), but is deprecated in favour of
2135  * the equivalent of encoding spaces as `%20`.
2136  *
2137  * Unlike with g_uri_parse_params(), %G_URI_PARAMS_CASE_INSENSITIVE has no
2138  * effect if passed to @flags for g_uri_params_iter_init(). The caller is
2139  * responsible for doing their own case-insensitive comparisons.
2140  *
2141  * |[<!-- language="C" -->
2142  * GUriParamsIter iter;
2143  * GError *error = NULL;
2144  * gchar *unowned_attr, *unowned_value;
2145  *
2146  * g_uri_params_iter_init (&iter, "foo=bar&baz=bar&Foo=frob&baz=bar2", -1, "&", G_URI_PARAMS_NONE);
2147  * while (g_uri_params_iter_next (&iter, &unowned_attr, &unowned_value, &error))
2148  *   {
2149  *     g_autofree gchar *attr = g_steal_pointer (&unowned_attr);
2150  *     g_autofree gchar *value = g_steal_pointer (&unowned_value);
2151  *     // do something with attr and value; this code will be called 4 times
2152  *     // for the params string in this example: once with attr=foo and value=bar,
2153  *     // then with baz/bar, then Foo/frob, then baz/bar2.
2154  *   }
2155  * if (error)
2156  *   // handle parsing error
2157  * ]|
2158  *
2159  * Since: 2.66
2160  */
2161 void
2162 g_uri_params_iter_init (GUriParamsIter *iter,
2163                         const gchar    *params,
2164                         gssize          length,
2165                         const gchar    *separators,
2166                         GUriParamsFlags flags)
2167 {
2168   RealIter *ri = (RealIter *)iter;
2169   const gchar *s;
2170
2171   g_return_if_fail (iter != NULL);
2172   g_return_if_fail (length == 0 || params != NULL);
2173   g_return_if_fail (length >= -1);
2174   g_return_if_fail (separators != NULL);
2175
2176   ri->flags = flags;
2177
2178   if (length == -1)
2179     ri->end = params + strlen (params);
2180   else
2181     ri->end = params + length;
2182
2183   memset (ri->sep_table, FALSE, sizeof (ri->sep_table));
2184   for (s = separators; *s != '\0'; ++s)
2185     ri->sep_table[*(guchar *)s] = TRUE;
2186
2187   ri->attr = params;
2188 }
2189
2190 /**
2191  * g_uri_params_iter_next:
2192  * @iter: an initialized #GUriParamsIter
2193  * @attribute: (out) (nullable) (optional) (transfer full): on return, contains
2194  *     the attribute, or %NULL.
2195  * @value: (out) (nullable) (optional) (transfer full): on return, contains
2196  *     the value, or %NULL.
2197  * @error: #GError for error reporting, or %NULL to ignore.
2198  *
2199  * Advances @iter and retrieves the next attribute/value. %FALSE is returned if
2200  * an error has occurred (in which case @error is set), or if the end of the
2201  * iteration is reached (in which case @attribute and @value are set to %NULL
2202  * and the iterator becomes invalid). If %TRUE is returned,
2203  * g_uri_params_iter_next() may be called again to receive another
2204  * attribute/value pair.
2205  *
2206  * Note that the same @attribute may be returned multiple times, since URIs
2207  * allow repeated attributes.
2208  *
2209  * Returns: %FALSE if the end of the parameters has been reached or an error was
2210  *     encountered. %TRUE otherwise.
2211  *
2212  * Since: 2.66
2213  */
2214 gboolean
2215 g_uri_params_iter_next (GUriParamsIter *iter,
2216                         gchar         **attribute,
2217                         gchar         **value,
2218                         GError        **error)
2219 {
2220   RealIter *ri = (RealIter *)iter;
2221   const gchar *attr_end, *val, *val_end;
2222   gchar *decoded_attr, *decoded_value;
2223   gboolean www_form = ri->flags & G_URI_PARAMS_WWW_FORM;
2224   GUriFlags decode_flags = G_URI_FLAGS_NONE;
2225
2226   g_return_val_if_fail (iter != NULL, FALSE);
2227   g_return_val_if_fail (error == NULL || *error == NULL, FALSE);
2228
2229   /* Pre-clear these in case of failure or finishing. */
2230   if (attribute)
2231     *attribute = NULL;
2232   if (value)
2233     *value = NULL;
2234
2235   if (ri->attr >= ri->end)
2236     return FALSE;
2237
2238   if (ri->flags & G_URI_PARAMS_PARSE_RELAXED)
2239     decode_flags |= G_URI_FLAGS_PARSE_RELAXED;
2240
2241   /* Check if each character in @attr is a separator, by indexing by the
2242    * character value into the @sep_table, which has value 1 stored at an
2243    * index if that index is a separator. */
2244   for (val_end = ri->attr; val_end < ri->end; val_end++)
2245     if (ri->sep_table[*(guchar *)val_end])
2246       break;
2247
2248   attr_end = memchr (ri->attr, '=', val_end - ri->attr);
2249   if (!attr_end)
2250     {
2251       g_set_error_literal (error, G_URI_ERROR, G_URI_ERROR_FAILED,
2252                            _("Missing ‘=’ and parameter value"));
2253       return FALSE;
2254     }
2255   if (!uri_decode (&decoded_attr, NULL, ri->attr, attr_end - ri->attr,
2256                    www_form, decode_flags, G_URI_ERROR_FAILED, error))
2257     {
2258       return FALSE;
2259     }
2260
2261   val = attr_end + 1;
2262   if (!uri_decode (&decoded_value, NULL, val, val_end - val,
2263                    www_form, decode_flags, G_URI_ERROR_FAILED, error))
2264     {
2265       g_free (decoded_attr);
2266       return FALSE;
2267     }
2268
2269   if (attribute)
2270     *attribute = g_steal_pointer (&decoded_attr);
2271   if (value)
2272     *value = g_steal_pointer (&decoded_value);
2273
2274   g_free (decoded_attr);
2275   g_free (decoded_value);
2276
2277   ri->attr = val_end + 1;
2278   return TRUE;
2279 }
2280
2281 /**
2282  * g_uri_parse_params:
2283  * @params: a `%`-encoded string containing `attribute=value`
2284  *   parameters
2285  * @length: the length of @params, or `-1` if it is nul-terminated
2286  * @separators: the separator byte character set between parameters. (usually
2287  *   `&`, but sometimes `;` or both `&;`). Note that this function works on
2288  *   bytes not characters, so it can't be used to delimit UTF-8 strings for
2289  *   anything but ASCII characters. You may pass an empty set, in which case
2290  *   no splitting will occur.
2291  * @flags: flags to modify the way the parameters are handled.
2292  * @error: #GError for error reporting, or %NULL to ignore.
2293  *
2294  * Many URI schemes include one or more attribute/value pairs as part of the URI
2295  * value. This method can be used to parse them into a hash table. When an
2296  * attribute has multiple occurrences, the last value is the final returned
2297  * value. If you need to handle repeated attributes differently, use
2298  * #GUriParamsIter.
2299  *
2300  * The @params string is assumed to still be `%`-encoded, but the returned
2301  * values will be fully decoded. (Thus it is possible that the returned values
2302  * may contain `=` or @separators, if the value was encoded in the input.)
2303  * Invalid `%`-encoding is treated as with the %G_URI_FLAGS_PARSE_RELAXED
2304  * rules for g_uri_parse(). (However, if @params is the path or query string
2305  * from a #GUri that was parsed without %G_URI_FLAGS_PARSE_RELAXED and
2306  * %G_URI_FLAGS_ENCODED, then you already know that it does not contain any
2307  * invalid encoding.)
2308  *
2309  * %G_URI_PARAMS_WWW_FORM is handled as documented for g_uri_params_iter_init().
2310  *
2311  * If %G_URI_PARAMS_CASE_INSENSITIVE is passed to @flags, attributes will be
2312  * compared case-insensitively, so a params string `attr=123&Attr=456` will only
2313  * return a single attribute–value pair, `Attr=456`. Case will be preserved in
2314  * the returned attributes.
2315  *
2316  * If @params cannot be parsed (for example, it contains two @separators
2317  * characters in a row), then @error is set and %NULL is returned.
2318  *
2319  * Return value: (transfer full) (element-type utf8 utf8):
2320  *     A hash table of attribute/value pairs, with both names and values
2321  *     fully-decoded; or %NULL on error.
2322  *
2323  * Since: 2.66
2324  */
2325 GHashTable *
2326 g_uri_parse_params (const gchar     *params,
2327                     gssize           length,
2328                     const gchar     *separators,
2329                     GUriParamsFlags  flags,
2330                     GError         **error)
2331 {
2332   GHashTable *hash;
2333   GUriParamsIter iter;
2334   gchar *attribute, *value;
2335   GError *err = NULL;
2336
2337   g_return_val_if_fail (length == 0 || params != NULL, NULL);
2338   g_return_val_if_fail (length >= -1, NULL);
2339   g_return_val_if_fail (separators != NULL, NULL);
2340   g_return_val_if_fail (error == NULL || *error == NULL, FALSE);
2341
2342   if (flags & G_URI_PARAMS_CASE_INSENSITIVE)
2343     {
2344       hash = g_hash_table_new_full (str_ascii_case_hash,
2345                                     str_ascii_case_equal,
2346                                     g_free, g_free);
2347     }
2348   else
2349     {
2350       hash = g_hash_table_new_full (g_str_hash, g_str_equal,
2351                                     g_free, g_free);
2352     }
2353
2354   g_uri_params_iter_init (&iter, params, length, separators, flags);
2355
2356   while (g_uri_params_iter_next (&iter, &attribute, &value, &err))
2357     g_hash_table_insert (hash, attribute, value);
2358
2359   if (err)
2360     {
2361       g_propagate_error (error, g_steal_pointer (&err));
2362       g_hash_table_destroy (hash);
2363       return NULL;
2364     }
2365
2366   return g_steal_pointer (&hash);
2367 }
2368
2369 /**
2370  * g_uri_get_scheme:
2371  * @uri: a #GUri
2372  *
2373  * Gets @uri's scheme. Note that this will always be all-lowercase,
2374  * regardless of the string or strings that @uri was created from.
2375  *
2376  * Return value: (not nullable): @uri's scheme.
2377  *
2378  * Since: 2.66
2379  */
2380 const gchar *
2381 g_uri_get_scheme (GUri *uri)
2382 {
2383   g_return_val_if_fail (uri != NULL, NULL);
2384
2385   return uri->scheme;
2386 }
2387
2388 /**
2389  * g_uri_get_userinfo:
2390  * @uri: a #GUri
2391  *
2392  * Gets @uri's userinfo, which may contain `%`-encoding, depending on
2393  * the flags with which @uri was created.
2394  *
2395  * Return value: (nullable): @uri's userinfo.
2396  *
2397  * Since: 2.66
2398  */
2399 const gchar *
2400 g_uri_get_userinfo (GUri *uri)
2401 {
2402   g_return_val_if_fail (uri != NULL, NULL);
2403
2404   return uri->userinfo;
2405 }
2406
2407 /**
2408  * g_uri_get_user:
2409  * @uri: a #GUri
2410  *
2411  * Gets the ‘username’ component of @uri's userinfo, which may contain
2412  * `%`-encoding, depending on the flags with which @uri was created.
2413  * If @uri was not created with %G_URI_FLAGS_HAS_PASSWORD or
2414  * %G_URI_FLAGS_HAS_AUTH_PARAMS, this is the same as g_uri_get_userinfo().
2415  *
2416  * Return value: (nullable): @uri's user.
2417  *
2418  * Since: 2.66
2419  */
2420 const gchar *
2421 g_uri_get_user (GUri *uri)
2422 {
2423   g_return_val_if_fail (uri != NULL, NULL);
2424
2425   return uri->user;
2426 }
2427
2428 /**
2429  * g_uri_get_password:
2430  * @uri: a #GUri
2431  *
2432  * Gets @uri's password, which may contain `%`-encoding, depending on
2433  * the flags with which @uri was created. (If @uri was not created
2434  * with %G_URI_FLAGS_HAS_PASSWORD then this will be %NULL.)
2435  *
2436  * Return value: (nullable): @uri's password.
2437  *
2438  * Since: 2.66
2439  */
2440 const gchar *
2441 g_uri_get_password (GUri *uri)
2442 {
2443   g_return_val_if_fail (uri != NULL, NULL);
2444
2445   return uri->password;
2446 }
2447
2448 /**
2449  * g_uri_get_auth_params:
2450  * @uri: a #GUri
2451  *
2452  * Gets @uri's authentication parameters, which may contain
2453  * `%`-encoding, depending on the flags with which @uri was created.
2454  * (If @uri was not created with %G_URI_FLAGS_HAS_AUTH_PARAMS then this will
2455  * be %NULL.)
2456  *
2457  * Depending on the URI scheme, g_uri_parse_params() may be useful for
2458  * further parsing this information.
2459  *
2460  * Return value: (nullable): @uri's authentication parameters.
2461  *
2462  * Since: 2.66
2463  */
2464 const gchar *
2465 g_uri_get_auth_params (GUri *uri)
2466 {
2467   g_return_val_if_fail (uri != NULL, NULL);
2468
2469   return uri->auth_params;
2470 }
2471
2472 /**
2473  * g_uri_get_host:
2474  * @uri: a #GUri
2475  *
2476  * Gets @uri's host. This will never have `%`-encoded characters,
2477  * unless it is non-UTF-8 (which can only be the case if @uri was
2478  * created with %G_URI_FLAGS_NON_DNS).
2479  *
2480  * If @uri contained an IPv6 address literal, this value will be just
2481  * that address, without the brackets around it that are necessary in
2482  * the string form of the URI. Note that in this case there may also
2483  * be a scope ID attached to the address. Eg, `fe80::1234%``em1` (or
2484  * `fe80::1234%``25em1` if the string is still encoded).
2485  *
2486  * Return value: (nullable): @uri's host.
2487  *
2488  * Since: 2.66
2489  */
2490 const gchar *
2491 g_uri_get_host (GUri *uri)
2492 {
2493   g_return_val_if_fail (uri != NULL, NULL);
2494
2495   return uri->host;
2496 }
2497
2498 /**
2499  * g_uri_get_port:
2500  * @uri: a #GUri
2501  *
2502  * Gets @uri's port.
2503  *
2504  * Return value: @uri's port, or `-1` if no port was specified.
2505  *
2506  * Since: 2.66
2507  */
2508 gint
2509 g_uri_get_port (GUri *uri)
2510 {
2511   g_return_val_if_fail (uri != NULL, -1);
2512
2513   if (uri->port == -1 && uri->flags & G_URI_FLAGS_SCHEME_NORMALIZE)
2514     return g_uri_get_default_scheme_port (uri->scheme);
2515
2516   return uri->port;
2517 }
2518
2519 /**
2520  * g_uri_get_path:
2521  * @uri: a #GUri
2522  *
2523  * Gets @uri's path, which may contain `%`-encoding, depending on the
2524  * flags with which @uri was created.
2525  *
2526  * Return value: (not nullable): @uri's path.
2527  *
2528  * Since: 2.66
2529  */
2530 const gchar *
2531 g_uri_get_path (GUri *uri)
2532 {
2533   g_return_val_if_fail (uri != NULL, NULL);
2534
2535   return uri->path;
2536 }
2537
2538 /**
2539  * g_uri_get_query:
2540  * @uri: a #GUri
2541  *
2542  * Gets @uri's query, which may contain `%`-encoding, depending on the
2543  * flags with which @uri was created.
2544  *
2545  * For queries consisting of a series of `name=value` parameters,
2546  * #GUriParamsIter or g_uri_parse_params() may be useful.
2547  *
2548  * Return value: (nullable): @uri's query.
2549  *
2550  * Since: 2.66
2551  */
2552 const gchar *
2553 g_uri_get_query (GUri *uri)
2554 {
2555   g_return_val_if_fail (uri != NULL, NULL);
2556
2557   return uri->query;
2558 }
2559
2560 /**
2561  * g_uri_get_fragment:
2562  * @uri: a #GUri
2563  *
2564  * Gets @uri's fragment, which may contain `%`-encoding, depending on
2565  * the flags with which @uri was created.
2566  *
2567  * Return value: (nullable): @uri's fragment.
2568  *
2569  * Since: 2.66
2570  */
2571 const gchar *
2572 g_uri_get_fragment (GUri *uri)
2573 {
2574   g_return_val_if_fail (uri != NULL, NULL);
2575
2576   return uri->fragment;
2577 }
2578
2579
2580 /**
2581  * g_uri_get_flags:
2582  * @uri: a #GUri
2583  *
2584  * Gets @uri's flags set upon construction.
2585  *
2586  * Return value: @uri's flags.
2587  *
2588  * Since: 2.66
2589  **/
2590 GUriFlags
2591 g_uri_get_flags (GUri *uri)
2592 {
2593   g_return_val_if_fail (uri != NULL, G_URI_FLAGS_NONE);
2594
2595   return uri->flags;
2596 }
2597
2598 /**
2599  * g_uri_unescape_segment:
2600  * @escaped_string: (nullable): A string, may be %NULL
2601  * @escaped_string_end: (nullable): Pointer to end of @escaped_string,
2602  *   may be %NULL
2603  * @illegal_characters: (nullable): An optional string of illegal
2604  *   characters not to be allowed, may be %NULL
2605  *
2606  * Unescapes a segment of an escaped string.
2607  *
2608  * If any of the characters in @illegal_characters or the NUL
2609  * character appears as an escaped character in @escaped_string, then
2610  * that is an error and %NULL will be returned. This is useful if you
2611  * want to avoid for instance having a slash being expanded in an
2612  * escaped path element, which might confuse pathname handling.
2613  *
2614  * Note: `NUL` byte is not accepted in the output, in contrast to
2615  * g_uri_unescape_bytes().
2616  *
2617  * Returns: (nullable): an unescaped version of @escaped_string,
2618  * or %NULL on error. The returned string should be freed when no longer
2619  * needed.  As a special case if %NULL is given for @escaped_string, this
2620  * function will return %NULL.
2621  *
2622  * Since: 2.16
2623  **/
2624 gchar *
2625 g_uri_unescape_segment (const gchar *escaped_string,
2626                         const gchar *escaped_string_end,
2627                         const gchar *illegal_characters)
2628 {
2629   gchar *unescaped;
2630   gsize length;
2631   gssize decoded_len;
2632
2633   if (!escaped_string)
2634     return NULL;
2635
2636   if (escaped_string_end)
2637     length = escaped_string_end - escaped_string;
2638   else
2639     length = strlen (escaped_string);
2640
2641   decoded_len = uri_decoder (&unescaped,
2642                              illegal_characters,
2643                              escaped_string, length,
2644                              FALSE, FALSE,
2645                              G_URI_FLAGS_ENCODED,
2646                              0, NULL);
2647   if (decoded_len < 0)
2648     return NULL;
2649
2650   if (memchr (unescaped, '\0', decoded_len))
2651     {
2652       g_free (unescaped);
2653       return NULL;
2654     }
2655
2656   return unescaped;
2657 }
2658
2659 /**
2660  * g_uri_unescape_string:
2661  * @escaped_string: an escaped string to be unescaped.
2662  * @illegal_characters: (nullable): a string of illegal characters
2663  *   not to be allowed, or %NULL.
2664  *
2665  * Unescapes a whole escaped string.
2666  *
2667  * If any of the characters in @illegal_characters or the NUL
2668  * character appears as an escaped character in @escaped_string, then
2669  * that is an error and %NULL will be returned. This is useful if you
2670  * want to avoid for instance having a slash being expanded in an
2671  * escaped path element, which might confuse pathname handling.
2672  *
2673  * Returns: (nullable): an unescaped version of @escaped_string.
2674  * The returned string should be freed when no longer needed.
2675  *
2676  * Since: 2.16
2677  **/
2678 gchar *
2679 g_uri_unescape_string (const gchar *escaped_string,
2680                        const gchar *illegal_characters)
2681 {
2682   return g_uri_unescape_segment (escaped_string, NULL, illegal_characters);
2683 }
2684
2685 /**
2686  * g_uri_escape_string:
2687  * @unescaped: the unescaped input string.
2688  * @reserved_chars_allowed: (nullable): a string of reserved
2689  *   characters that are allowed to be used, or %NULL.
2690  * @allow_utf8: %TRUE if the result can include UTF-8 characters.
2691  *
2692  * Escapes a string for use in a URI.
2693  *
2694  * Normally all characters that are not "unreserved" (i.e. ASCII
2695  * alphanumerical characters plus dash, dot, underscore and tilde) are
2696  * escaped. But if you specify characters in @reserved_chars_allowed
2697  * they are not escaped. This is useful for the "reserved" characters
2698  * in the URI specification, since those are allowed unescaped in some
2699  * portions of a URI.
2700  *
2701  * Returns: (not nullable): an escaped version of @unescaped. The
2702  * returned string should be freed when no longer needed.
2703  *
2704  * Since: 2.16
2705  **/
2706 gchar *
2707 g_uri_escape_string (const gchar *unescaped,
2708                      const gchar *reserved_chars_allowed,
2709                      gboolean     allow_utf8)
2710 {
2711   GString *s;
2712
2713   g_return_val_if_fail (unescaped != NULL, NULL);
2714
2715   s = g_string_sized_new (strlen (unescaped) * 1.25);
2716
2717   g_string_append_uri_escaped (s, unescaped, reserved_chars_allowed, allow_utf8);
2718
2719   return g_string_free (s, FALSE);
2720 }
2721
2722 /**
2723  * g_uri_unescape_bytes:
2724  * @escaped_string: A URI-escaped string
2725  * @length: the length (in bytes) of @escaped_string to escape, or `-1` if it
2726  *   is nul-terminated.
2727  * @illegal_characters: (nullable): a string of illegal characters
2728  *   not to be allowed, or %NULL.
2729  * @error: #GError for error reporting, or %NULL to ignore.
2730  *
2731  * Unescapes a segment of an escaped string as binary data.
2732  *
2733  * Note that in contrast to g_uri_unescape_string(), this does allow
2734  * nul bytes to appear in the output.
2735  *
2736  * If any of the characters in @illegal_characters appears as an escaped
2737  * character in @escaped_string, then that is an error and %NULL will be
2738  * returned. This is useful if you want to avoid for instance having a slash
2739  * being expanded in an escaped path element, which might confuse pathname
2740  * handling.
2741  *
2742  * Returns: (transfer full): an unescaped version of @escaped_string
2743  *     or %NULL on error (if decoding failed, using %G_URI_ERROR_FAILED error
2744  *     code). The returned #GBytes should be unreffed when no longer needed.
2745  *
2746  * Since: 2.66
2747  **/
2748 GBytes *
2749 g_uri_unescape_bytes (const gchar *escaped_string,
2750                       gssize       length,
2751                       const char *illegal_characters,
2752                       GError     **error)
2753 {
2754   gchar *buf;
2755   gssize unescaped_length;
2756
2757   g_return_val_if_fail (escaped_string != NULL, NULL);
2758   g_return_val_if_fail (error == NULL || *error == NULL, NULL);
2759
2760   if (length == -1)
2761     length = strlen (escaped_string);
2762
2763   unescaped_length = uri_decoder (&buf,
2764                                   illegal_characters,
2765                                   escaped_string, length,
2766                                   FALSE,
2767                                   FALSE,
2768                                   G_URI_FLAGS_ENCODED,
2769                                   G_URI_ERROR_FAILED, error);
2770   if (unescaped_length == -1)
2771     return NULL;
2772
2773   return g_bytes_new_take (buf, unescaped_length);
2774 }
2775
2776 /**
2777  * g_uri_escape_bytes:
2778  * @unescaped: (array length=length): the unescaped input data.
2779  * @length: the length of @unescaped
2780  * @reserved_chars_allowed: (nullable): a string of reserved
2781  *   characters that are allowed to be used, or %NULL.
2782  *
2783  * Escapes arbitrary data for use in a URI.
2784  *
2785  * Normally all characters that are not ‘unreserved’ (i.e. ASCII
2786  * alphanumerical characters plus dash, dot, underscore and tilde) are
2787  * escaped. But if you specify characters in @reserved_chars_allowed
2788  * they are not escaped. This is useful for the ‘reserved’ characters
2789  * in the URI specification, since those are allowed unescaped in some
2790  * portions of a URI.
2791  *
2792  * Though technically incorrect, this will also allow escaping nul
2793  * bytes as `%``00`.
2794  *
2795  * Returns: (not nullable) (transfer full): an escaped version of @unescaped.
2796  *     The returned string should be freed when no longer needed.
2797  *
2798  * Since: 2.66
2799  */
2800 gchar *
2801 g_uri_escape_bytes (const guint8 *unescaped,
2802                     gsize         length,
2803                     const gchar  *reserved_chars_allowed)
2804 {
2805   GString *string;
2806
2807   g_return_val_if_fail (unescaped != NULL, NULL);
2808
2809   string = g_string_sized_new (length * 1.25);
2810
2811   _uri_encoder (string, unescaped, length,
2812                reserved_chars_allowed, FALSE);
2813
2814   return g_string_free (string, FALSE);
2815 }
2816
2817 static gssize
2818 g_uri_scheme_length (const gchar *uri)
2819 {
2820   const gchar *p;
2821
2822   p = uri;
2823   if (!g_ascii_isalpha (*p))
2824     return -1;
2825   p++;
2826   while (g_ascii_isalnum (*p) || *p == '.' || *p == '+' || *p == '-')
2827     p++;
2828
2829   if (p > uri && *p == ':')
2830     return p - uri;
2831
2832   return -1;
2833 }
2834
2835 /**
2836  * g_uri_parse_scheme:
2837  * @uri: a valid URI.
2838  *
2839  * Gets the scheme portion of a URI string.
2840  * [RFC 3986](https://tools.ietf.org/html/rfc3986#section-3) decodes the scheme
2841  * as:
2842  * |[
2843  * URI = scheme ":" hier-part [ "?" query ] [ "#" fragment ]
2844  * ]|
2845  * Common schemes include `file`, `https`, `svn+ssh`, etc.
2846  *
2847  * Returns: (transfer full) (nullable): The ‘scheme’ component of the URI, or
2848  *     %NULL on error. The returned string should be freed when no longer needed.
2849  *
2850  * Since: 2.16
2851  **/
2852 gchar *
2853 g_uri_parse_scheme (const gchar *uri)
2854 {
2855   gssize len;
2856
2857   g_return_val_if_fail (uri != NULL, NULL);
2858
2859   len = g_uri_scheme_length (uri);
2860   return len == -1 ? NULL : g_strndup (uri, len);
2861 }
2862
2863 /**
2864  * g_uri_peek_scheme:
2865  * @uri: a valid URI.
2866  *
2867  * Gets the scheme portion of a URI string.
2868  * [RFC 3986](https://tools.ietf.org/html/rfc3986#section-3) decodes the scheme
2869  * as:
2870  * |[
2871  * URI = scheme ":" hier-part [ "?" query ] [ "#" fragment ]
2872  * ]|
2873  * Common schemes include `file`, `https`, `svn+ssh`, etc.
2874  *
2875  * Unlike g_uri_parse_scheme(), the returned scheme is normalized to
2876  * all-lowercase and does not need to be freed.
2877  *
2878  * Returns: (transfer none) (nullable): The ‘scheme’ component of the URI, or
2879  *     %NULL on error. The returned string is normalized to all-lowercase, and
2880  *     interned via g_intern_string(), so it does not need to be freed.
2881  *
2882  * Since: 2.66
2883  **/
2884 const gchar *
2885 g_uri_peek_scheme (const gchar *uri)
2886 {
2887   gssize len;
2888   gchar *lower_scheme;
2889   const gchar *scheme;
2890
2891   g_return_val_if_fail (uri != NULL, NULL);
2892
2893   len = g_uri_scheme_length (uri);
2894   if (len == -1)
2895     return NULL;
2896
2897   lower_scheme = g_ascii_strdown (uri, len);
2898   scheme = g_intern_string (lower_scheme);
2899   g_free (lower_scheme);
2900
2901   return scheme;
2902 }
2903
2904 G_DEFINE_QUARK (g-uri-quark, g_uri_error)