1 /* GLIB - Library of useful routines for C programming
2 * Copyright © 2020 Red Hat, Inc.
4 * SPDX-License-Identifier: LGPL-2.1-or-later
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General
17 * Public License along with this library; if not, see
18 * <http://www.gnu.org/licenses/>.
28 #include "glib-private.h"
29 #include "guriprivate.h"
33 * @short_description: URI-handling utilities
36 * The #GUri type and related functions can be used to parse URIs into
37 * their components, and build valid URIs from individual components.
39 * Note that #GUri scope is to help manipulate URIs in various applications,
40 * following [RFC 3986](https://tools.ietf.org/html/rfc3986). In particular,
41 * it doesn't intend to cover web browser needs, and doesn't implement the
42 * [WHATWG URL](https://url.spec.whatwg.org/) standard. No APIs are provided to
44 * [homograph attacks](https://en.wikipedia.org/wiki/IDN_homograph_attack), so
45 * #GUri is not suitable for formatting URIs for display to the user for making
46 * security-sensitive decisions.
48 * ## Relative and absolute URIs # {#relative-absolute-uris}
50 * As defined in [RFC 3986](https://tools.ietf.org/html/rfc3986#section-4), the
51 * hierarchical nature of URIs means that they can either be ‘relative
52 * references’ (sometimes referred to as ‘relative URIs’) or ‘URIs’ (for
53 * clarity, ‘URIs’ are referred to in this documentation as
54 * ‘absolute URIs’ — although
55 * [in constrast to RFC 3986](https://tools.ietf.org/html/rfc3986#section-4.3),
56 * fragment identifiers are always allowed).
58 * Relative references have one or more components of the URI missing. In
59 * particular, they have no scheme. Any other component, such as hostname,
60 * query, etc. may be missing, apart from a path, which has to be specified (but
61 * may be empty). The path may be relative, starting with `./` rather than `/`.
63 * For example, a valid relative reference is `./path?query`,
64 * `/?query#fragment` or `//example.com`.
66 * Absolute URIs have a scheme specified. Any other components of the URI which
67 * are missing are specified as explicitly unset in the URI, rather than being
68 * resolved relative to a base URI using g_uri_parse_relative().
70 * For example, a valid absolute URI is `file:///home/bob` or
71 * `https://search.com?query=string`.
73 * A #GUri instance is always an absolute URI. A string may be an absolute URI
74 * or a relative reference; see the documentation for individual functions as to
75 * what forms they accept.
79 * The most minimalist APIs for parsing URIs are g_uri_split() and
80 * g_uri_split_with_user(). These split a URI into its component
81 * parts, and return the parts; the difference between the two is that
82 * g_uri_split() treats the ‘userinfo’ component of the URI as a
83 * single element, while g_uri_split_with_user() can (depending on the
84 * #GUriFlags you pass) treat it as containing a username, password,
85 * and authentication parameters. Alternatively, g_uri_split_network()
86 * can be used when you are only interested in the components that are
87 * needed to initiate a network connection to the service (scheme,
90 * g_uri_parse() is similar to g_uri_split(), but instead of returning
91 * individual strings, it returns a #GUri structure (and it requires
92 * that the URI be an absolute URI).
94 * g_uri_resolve_relative() and g_uri_parse_relative() allow you to
95 * resolve a relative URI relative to a base URI.
96 * g_uri_resolve_relative() takes two strings and returns a string,
97 * and g_uri_parse_relative() takes a #GUri and a string and returns a
100 * All of the parsing functions take a #GUriFlags argument describing
101 * exactly how to parse the URI; see the documentation for that type
102 * for more details on the specific flags that you can pass. If you
103 * need to choose different flags based on the type of URI, you can
104 * use g_uri_peek_scheme() on the URI string to check the scheme
105 * first, and use that to decide what flags to parse it with.
107 * For example, you might want to use %G_URI_PARAMS_WWW_FORM when parsing the
108 * params for a web URI, so compare the result of g_uri_peek_scheme() against
109 * `http` and `https`.
113 * g_uri_join() and g_uri_join_with_user() can be used to construct
114 * valid URI strings from a set of component strings. They are the
115 * inverse of g_uri_split() and g_uri_split_with_user().
117 * Similarly, g_uri_build() and g_uri_build_with_user() can be used to
118 * construct a #GUri from a set of component strings.
120 * As with the parsing functions, the building functions take a
121 * #GUriFlags argument. In particular, it is important to keep in mind
122 * whether the URI components you are using are already `%`-encoded. If so,
123 * you must pass the %G_URI_FLAGS_ENCODED flag.
127 * Note that Windows and Unix both define special rules for parsing
128 * `file://` URIs (involving non-UTF-8 character sets on Unix, and the
129 * interpretation of path separators on Windows). #GUri does not
130 * implement these rules. Use g_filename_from_uri() and
131 * g_filename_to_uri() if you want to properly convert between
132 * `file://` URIs and local filenames.
136 * Note that there is no `g_uri_equal ()` function, because comparing
137 * URIs usefully requires scheme-specific knowledge that #GUri does
138 * not have. #GUri can help with normalization if you use the various
139 * encoded #GUriFlags as well as %G_URI_FLAGS_SCHEME_NORMALIZE however
140 * it is not comprehensive.
141 * For example, `data:,foo` and `data:;base64,Zm9v` resolve to the same
142 * thing according to the `data:` URI specification which GLib does not
151 * A parsed absolute URI.
153 * Since #GUri only represents absolute URIs, all #GUris will have a
154 * URI scheme, so g_uri_get_scheme() will always return a non-%NULL
155 * answer. Likewise, by definition, all URIs have a path component, so
156 * g_uri_get_path() will always return a non-%NULL string (which may be empty).
158 * If the URI string has an
159 * [‘authority’ component](https://tools.ietf.org/html/rfc3986#section-3) (that
160 * is, if the scheme is followed by `://` rather than just `:`), then the
161 * #GUri will contain a hostname, and possibly a port and ‘userinfo’.
162 * Additionally, depending on how the #GUri was constructed/parsed (for example,
163 * using the %G_URI_FLAGS_HAS_PASSWORD and %G_URI_FLAGS_HAS_AUTH_PARAMS flags),
164 * the userinfo may be split out into a username, password, and
165 * additional authorization-related parameters.
167 * Normally, the components of a #GUri will have all `%`-encoded
168 * characters decoded. However, if you construct/parse a #GUri with
169 * %G_URI_FLAGS_ENCODED, then the `%`-encoding will be preserved instead in
170 * the userinfo, path, and query fields (and in the host field if also
171 * created with %G_URI_FLAGS_NON_DNS). In particular, this is necessary if
172 * the URI may contain binary data or non-UTF-8 text, or if decoding
173 * the components might change the interpretation of the URI.
175 * For example, with the encoded flag:
177 * |[<!-- language="C" -->
178 * g_autoptr(GUri) uri = g_uri_parse ("http://host/path?query=http%3A%2F%2Fhost%2Fpath%3Fparam%3Dvalue", G_URI_FLAGS_ENCODED, &err);
179 * g_assert_cmpstr (g_uri_get_query (uri), ==, "query=http%3A%2F%2Fhost%2Fpath%3Fparam%3Dvalue");
182 * While the default `%`-decoding behaviour would give:
184 * |[<!-- language="C" -->
185 * g_autoptr(GUri) uri = g_uri_parse ("http://host/path?query=http%3A%2F%2Fhost%2Fpath%3Fparam%3Dvalue", G_URI_FLAGS_NONE, &err);
186 * g_assert_cmpstr (g_uri_get_query (uri), ==, "query=http://host/path?param=value");
189 * During decoding, if an invalid UTF-8 string is encountered, parsing will fail
190 * with an error indicating the bad string location:
192 * |[<!-- language="C" -->
193 * g_autoptr(GUri) uri = g_uri_parse ("http://host/path?query=http%3A%2F%2Fhost%2Fpath%3Fbad%3D%00alue", G_URI_FLAGS_NONE, &err);
194 * g_assert_error (err, G_URI_ERROR, G_URI_ERROR_BAD_QUERY);
197 * You should pass %G_URI_FLAGS_ENCODED or %G_URI_FLAGS_ENCODED_QUERY if you
198 * need to handle that case manually. In particular, if the query string
199 * contains `=` characters that are `%`-encoded, you should let
200 * g_uri_parse_params() do the decoding once of the query.
202 * #GUri is immutable once constructed, and can safely be accessed from
203 * multiple threads. Its reference counting is atomic.
227 * Increments the reference count of @uri by one.
234 g_uri_ref (GUri *uri)
236 g_return_val_if_fail (uri != NULL, NULL);
238 return g_atomic_rc_box_acquire (uri);
242 g_uri_clear (GUri *uri)
244 g_free (uri->scheme);
245 g_free (uri->userinfo);
249 g_free (uri->fragment);
251 g_free (uri->password);
252 g_free (uri->auth_params);
256 * g_uri_unref: (skip)
259 * Atomically decrements the reference count of @uri by one.
261 * When the reference count reaches zero, the resources allocated by
267 g_uri_unref (GUri *uri)
269 g_return_if_fail (uri != NULL);
271 g_atomic_rc_box_release_full (uri, (GDestroyNotify)g_uri_clear);
275 g_uri_char_is_unreserved (gchar ch)
277 if (g_ascii_isalnum (ch))
279 return ch == '-' || ch == '.' || ch == '_' || ch == '~';
282 #define XDIGIT(c) ((c) <= '9' ? (c) - '0' : ((c) & 0x4F) - 'A' + 10)
283 #define HEXCHAR(s) ((XDIGIT (s[1]) << 4) + XDIGIT (s[2]))
286 uri_decoder (gchar **out,
287 const gchar *illegal_chars,
290 gboolean just_normalize,
293 GUriError parse_error,
298 const gchar *invalid, *s, *end;
301 if (!(flags & G_URI_FLAGS_ENCODED))
302 just_normalize = FALSE;
304 decoded = g_string_sized_new (length + 1);
305 for (s = start, end = s + length; s < end; s++)
310 !g_ascii_isxdigit (s[1]) ||
311 !g_ascii_isxdigit (s[2]))
313 /* % followed by non-hex or the end of the string; this is an error */
314 if (!(flags & G_URI_FLAGS_PARSE_RELAXED))
316 g_set_error_literal (error, G_URI_ERROR, parse_error,
317 /* xgettext: no-c-format */
318 _("Invalid %-encoding in URI"));
319 g_string_free (decoded, TRUE);
323 /* In non-strict mode, just let it through; we *don't*
324 * fix it to "%25", since that might change the way that
325 * the URI's owner would interpret it.
327 g_string_append_c (decoded, *s);
332 if (illegal_chars && strchr (illegal_chars, c))
334 g_set_error_literal (error, G_URI_ERROR, parse_error,
335 _("Illegal character in URI"));
336 g_string_free (decoded, TRUE);
339 if (just_normalize && !g_uri_char_is_unreserved (c))
341 /* Leave the % sequence there but normalize it. */
342 g_string_append_c (decoded, *s);
343 g_string_append_c (decoded, g_ascii_toupper (s[1]));
344 g_string_append_c (decoded, g_ascii_toupper (s[2]));
349 g_string_append_c (decoded, c);
353 else if (www_form && *s == '+')
354 g_string_append_c (decoded, ' ');
355 /* Normalize any illegal characters. */
356 else if (just_normalize && (!g_ascii_isgraph (*s)))
357 g_string_append_printf (decoded, "%%%02X", (guchar)*s);
359 g_string_append_c (decoded, *s);
365 if (!(flags & G_URI_FLAGS_ENCODED) &&
366 !g_utf8_validate (decoded->str, len, &invalid))
368 g_set_error_literal (error, G_URI_ERROR, parse_error,
369 _("Non-UTF-8 characters in URI"));
370 g_string_free (decoded, TRUE);
375 *out = g_string_free (decoded, FALSE);
377 g_string_free (decoded, TRUE);
383 uri_decode (gchar **out,
384 const gchar *illegal_chars,
389 GUriError parse_error,
392 return uri_decoder (out, illegal_chars, start, length, FALSE, www_form, flags,
393 parse_error, error) != -1;
397 uri_normalize (gchar **out,
401 GUriError parse_error,
404 return uri_decoder (out, NULL, start, length, TRUE, FALSE, flags,
405 parse_error, error) != -1;
410 const gchar *reserved_chars_allowed)
412 if (g_uri_char_is_unreserved (c))
415 if (reserved_chars_allowed && strchr (reserved_chars_allowed, c))
422 _uri_encoder (GString *out,
425 const gchar *reserved_chars_allowed,
428 static const gchar hex[] = "0123456789ABCDEF";
429 const guchar *p = start;
430 const guchar *end = p + length;
434 gunichar multibyte_utf8_char = 0;
436 if (allow_utf8 && *p >= 0x80)
437 multibyte_utf8_char = g_utf8_get_char_validated ((gchar *)p, end - p);
439 if (multibyte_utf8_char > 0 &&
440 multibyte_utf8_char != (gunichar) -1 && multibyte_utf8_char != (gunichar) -2)
442 gint len = g_utf8_skip [*p];
443 g_string_append_len (out, (gchar *)p, len);
446 else if (is_valid (*p, reserved_chars_allowed))
448 g_string_append_c (out, *p);
453 g_string_append_c (out, '%');
454 g_string_append_c (out, hex[*p >> 4]);
455 g_string_append_c (out, hex[*p & 0xf]);
461 /* Parse the IP-literal construction from RFC 6874 (which extends RFC 3986 to
462 * support IPv6 zone identifiers.
464 * Currently, IP versions beyond 6 (i.e. the IPvFuture rule) are unsupported.
465 * There’s no point supporting them until (a) they exist and (b) the rest of the
466 * stack (notably, sockets) supports them.
470 * IP-literal = "[" ( IPv6address / IPv6addrz / IPvFuture ) "]"
472 * ZoneID = 1*( unreserved / pct-encoded )
474 * IPv6addrz = IPv6address "%25" ZoneID
476 * If %G_URI_FLAGS_PARSE_RELAXED is specified, this function also accepts:
478 * IPv6addrz = IPv6address "%" ZoneID
481 parse_ip_literal (const gchar *start,
487 gchar *pct, *zone_id = NULL;
489 gsize addr_length = 0;
490 gsize zone_id_length = 0;
491 gchar *decoded_zone_id = NULL;
493 if (start[length - 1] != ']')
494 goto bad_ipv6_literal;
496 /* Drop the square brackets */
497 addr = g_strndup (start + 1, length - 2);
498 addr_length = length - 2;
500 /* If there's an IPv6 scope ID, split out the zone. */
501 pct = strchr (addr, '%');
506 if (addr_length - (pct - addr) >= 4 &&
507 *(pct + 1) == '2' && *(pct + 2) == '5')
510 zone_id_length = addr_length - (zone_id - addr);
512 else if (flags & G_URI_FLAGS_PARSE_RELAXED &&
513 addr_length - (pct - addr) >= 2)
516 zone_id_length = addr_length - (zone_id - addr);
519 goto bad_ipv6_literal;
521 g_assert (zone_id_length >= 1);
524 /* addr must be an IPv6 address */
525 if (!g_hostname_is_ip_address (addr) || !strchr (addr, ':'))
526 goto bad_ipv6_literal;
528 /* Zone ID must be valid. It can contain %-encoded characters. */
529 if (zone_id != NULL &&
530 !uri_decode (&decoded_zone_id, NULL, zone_id, zone_id_length, FALSE,
531 flags, G_URI_ERROR_BAD_HOST, NULL))
532 goto bad_ipv6_literal;
535 if (out != NULL && decoded_zone_id != NULL)
536 *out = g_strconcat (addr, "%", decoded_zone_id, NULL);
537 else if (out != NULL)
538 *out = g_steal_pointer (&addr);
541 g_free (decoded_zone_id);
547 g_free (decoded_zone_id);
548 g_set_error (error, G_URI_ERROR, G_URI_ERROR_BAD_HOST,
549 _("Invalid IPv6 address ‘%.*s’ in URI"),
550 (gint)length, start);
556 parse_host (const gchar *start,
562 gchar *decoded = NULL, *host;
567 if (!parse_ip_literal (start, length, flags, &host, error))
572 if (g_ascii_isdigit (*start))
574 addr = g_strndup (start, length);
575 if (g_hostname_is_ip_address (addr))
583 if (flags & G_URI_FLAGS_NON_DNS)
585 if (!uri_normalize (&decoded, start, length, flags,
586 G_URI_ERROR_BAD_HOST, error))
588 host = g_steal_pointer (&decoded);
592 flags &= ~G_URI_FLAGS_ENCODED;
593 if (!uri_decode (&decoded, NULL, start, length, FALSE, flags,
594 G_URI_ERROR_BAD_HOST, error))
597 /* You're not allowed to %-encode an IP address, so if it wasn't
598 * one before, it better not be one now.
600 if (g_hostname_is_ip_address (decoded))
603 g_set_error (error, G_URI_ERROR, G_URI_ERROR_BAD_HOST,
604 _("Illegal encoded IP address ‘%.*s’ in URI"),
605 (gint)length, start);
609 if (g_hostname_is_non_ascii (decoded))
611 host = g_hostname_to_ascii (decoded);
615 g_set_error (error, G_URI_ERROR, G_URI_ERROR_BAD_HOST,
616 _("Illegal internationalized hostname ‘%.*s’ in URI"),
617 (gint) length, start);
623 host = g_steal_pointer (&decoded);
628 *out = g_steal_pointer (&host);
636 parse_port (const gchar *start,
644 /* strtoul() allows leading + or -, so we have to check this first. */
645 if (!g_ascii_isdigit (*start))
647 g_set_error (error, G_URI_ERROR, G_URI_ERROR_BAD_PORT,
648 _("Could not parse port ‘%.*s’ in URI"),
649 (gint)length, start);
653 /* We know that *(start + length) is either '\0' or a non-numeric
654 * character, so strtoul() won't scan beyond it.
656 parsed_port = strtoul (start, &end, 10);
657 if (end != start + length)
659 g_set_error (error, G_URI_ERROR, G_URI_ERROR_BAD_PORT,
660 _("Could not parse port ‘%.*s’ in URI"),
661 (gint)length, start);
664 else if (parsed_port > 65535)
666 g_set_error (error, G_URI_ERROR, G_URI_ERROR_BAD_PORT,
667 _("Port ‘%.*s’ in URI is out of range"),
668 (gint)length, start);
678 parse_userinfo (const gchar *start,
686 const gchar *user_end = NULL, *password_end = NULL, *auth_params_end;
688 auth_params_end = start + length;
689 if (flags & G_URI_FLAGS_HAS_AUTH_PARAMS)
690 password_end = memchr (start, ';', auth_params_end - start);
692 password_end = auth_params_end;
693 if (flags & G_URI_FLAGS_HAS_PASSWORD)
694 user_end = memchr (start, ':', password_end - start);
696 user_end = password_end;
698 if (!uri_normalize (user, start, user_end - start, flags,
699 G_URI_ERROR_BAD_USER, error))
702 if (*user_end == ':')
704 start = user_end + 1;
705 if (!uri_normalize (password, start, password_end - start, flags,
706 G_URI_ERROR_BAD_PASSWORD, error))
709 g_clear_pointer (user, g_free);
716 if (*password_end == ';')
718 start = password_end + 1;
719 if (!uri_normalize (auth_params, start, auth_params_end - start, flags,
720 G_URI_ERROR_BAD_AUTH_PARAMS, error))
723 g_clear_pointer (user, g_free);
725 g_clear_pointer (password, g_free);
729 else if (auth_params)
736 uri_cleanup (const gchar *uri_string)
741 /* Skip leading whitespace */
742 while (g_ascii_isspace (*uri_string))
745 /* Ignore trailing whitespace */
746 end = uri_string + strlen (uri_string);
747 while (end > uri_string && g_ascii_isspace (*(end - 1)))
750 /* Copy the rest, encoding unencoded spaces and stripping other whitespace */
751 copy = g_string_sized_new (end - uri_string);
752 while (uri_string < end)
754 if (*uri_string == ' ')
755 g_string_append (copy, "%20");
756 else if (g_ascii_isspace (*uri_string))
759 g_string_append_c (copy, *uri_string);
763 return g_string_free (copy, FALSE);
767 should_normalize_empty_path (const char *scheme)
769 const char * const schemes[] = { "https", "http", "wss", "ws" };
771 for (i = 0; i < G_N_ELEMENTS (schemes); ++i)
773 if (!strcmp (schemes[i], scheme))
780 normalize_port (const char *scheme,
783 const char *default_schemes[3] = { NULL };
789 default_schemes[0] = "ftp";
792 default_schemes[0] = "http";
793 default_schemes[1] = "ws";
796 default_schemes[0] = "https";
797 default_schemes[1] = "wss";
803 for (i = 0; default_schemes[i]; ++i)
805 if (!strcmp (scheme, default_schemes[i]))
813 g_uri_get_default_scheme_port (const char *scheme)
815 if (strcmp (scheme, "http") == 0 || strcmp (scheme, "ws") == 0)
818 if (strcmp (scheme, "https") == 0 || strcmp (scheme, "wss") == 0)
821 if (strcmp (scheme, "ftp") == 0)
824 if (strstr (scheme, "socks") == scheme)
831 g_uri_split_internal (const gchar *uri_string,
845 const gchar *end, *colon, *at, *path_start, *semi, *question;
846 const gchar *p, *bracket, *hostend;
847 gchar *cleaned_uri_string = NULL;
848 gchar *normalized_scheme = NULL;
871 if ((flags & G_URI_FLAGS_PARSE_RELAXED) && strpbrk (uri_string, " \t\n\r"))
873 cleaned_uri_string = uri_cleanup (uri_string);
874 uri_string = cleaned_uri_string;
879 while (*p && (g_ascii_isalpha (*p) ||
880 (p > uri_string && (g_ascii_isdigit (*p) ||
881 *p == '.' || *p == '+' || *p == '-'))))
884 if (p > uri_string && *p == ':')
886 normalized_scheme = g_ascii_strdown (uri_string, p - uri_string);
888 *scheme = g_steal_pointer (&normalized_scheme);
898 /* Check for authority */
899 if (strncmp (p, "//", 2) == 0)
903 path_start = p + strcspn (p, "/?#");
904 at = memchr (p, '@', path_start - p);
907 if (flags & G_URI_FLAGS_PARSE_RELAXED)
911 /* Any "@"s in the userinfo must be %-encoded, but
912 * people get this wrong sometimes. Since "@"s in the
913 * hostname are unlikely (and also wrong anyway), assume
914 * that if there are extra "@"s, they belong in the
919 next_at = memchr (at + 1, '@', path_start - (at + 1));
926 if (user || password || auth_params ||
927 (flags & (G_URI_FLAGS_HAS_PASSWORD|G_URI_FLAGS_HAS_AUTH_PARAMS)))
929 if (!parse_userinfo (p, at - p, flags,
930 user, password, auth_params,
935 if (!uri_normalize (userinfo, p, at - p, flags,
936 G_URI_ERROR_BAD_USER, error))
942 if (flags & G_URI_FLAGS_PARSE_RELAXED)
944 semi = strchr (p, ';');
945 if (semi && semi < path_start)
947 /* Technically, semicolons are allowed in the "host"
948 * production, but no one ever does this, and some
949 * schemes mistakenly use semicolon as a delimiter
950 * marking the start of the path. We have to check this
951 * after checking for userinfo though, because a
952 * semicolon before the "@" must be part of the
959 /* Find host and port. The host may be a bracket-delimited IPv6
960 * address, in which case the colon delimiting the port must come
961 * (immediately) after the close bracket.
965 bracket = memchr (p, ']', path_start - p);
966 if (bracket && *(bracket + 1) == ':')
972 colon = memchr (p, ':', path_start - p);
974 hostend = colon ? colon : path_start;
975 if (!parse_host (p, hostend - p, flags, host, error))
978 if (colon && colon != path_start - 1)
981 if (!parse_port (p, path_start - p, port, error))
989 end = p + strcspn (p, "#");
992 if (!uri_normalize (fragment, end + 1, strlen (end + 1),
993 flags | (flags & G_URI_FLAGS_ENCODED_FRAGMENT ? G_URI_FLAGS_ENCODED : 0),
994 G_URI_ERROR_BAD_FRAGMENT, error))
999 question = memchr (p, '?', end - p);
1002 if (!uri_normalize (query, question + 1, end - (question + 1),
1003 flags | (flags & G_URI_FLAGS_ENCODED_QUERY ? G_URI_FLAGS_ENCODED : 0),
1004 G_URI_ERROR_BAD_QUERY, error))
1009 if (!uri_normalize (path, p, end - p,
1010 flags | (flags & G_URI_FLAGS_ENCODED_PATH ? G_URI_FLAGS_ENCODED : 0),
1011 G_URI_ERROR_BAD_PATH, error))
1014 /* Scheme-based normalization */
1015 if (flags & G_URI_FLAGS_SCHEME_NORMALIZE && ((scheme && *scheme) || normalized_scheme))
1017 const char *scheme_str = scheme && *scheme ? *scheme : normalized_scheme;
1019 if (should_normalize_empty_path (scheme_str) && path && !**path)
1022 *path = g_strdup ("/");
1025 if (port && *port == -1)
1026 *port = g_uri_get_default_scheme_port (scheme_str);
1029 g_free (normalized_scheme);
1030 g_free (cleaned_uri_string);
1035 g_clear_pointer (scheme, g_free);
1037 g_clear_pointer (userinfo, g_free);
1039 g_clear_pointer (host, g_free);
1043 g_clear_pointer (path, g_free);
1045 g_clear_pointer (query, g_free);
1047 g_clear_pointer (fragment, g_free);
1049 g_free (normalized_scheme);
1050 g_free (cleaned_uri_string);
1056 * @uri_ref: a string containing a relative or absolute URI
1057 * @flags: flags for parsing @uri_ref
1058 * @scheme: (out) (nullable) (optional) (transfer full): on return, contains
1059 * the scheme (converted to lowercase), or %NULL
1060 * @userinfo: (out) (nullable) (optional) (transfer full): on return, contains
1061 * the userinfo, or %NULL
1062 * @host: (out) (nullable) (optional) (transfer full): on return, contains the
1064 * @port: (out) (optional) (transfer full): on return, contains the
1066 * @path: (out) (not nullable) (optional) (transfer full): on return, contains the
1068 * @query: (out) (nullable) (optional) (transfer full): on return, contains the
1070 * @fragment: (out) (nullable) (optional) (transfer full): on return, contains
1071 * the fragment, or %NULL
1072 * @error: #GError for error reporting, or %NULL to ignore.
1074 * Parses @uri_ref (which can be an
1075 * [absolute or relative URI][relative-absolute-uris]) according to @flags, and
1076 * returns the pieces. Any component that doesn't appear in @uri_ref will be
1077 * returned as %NULL (but note that all URIs always have a path component,
1078 * though it may be the empty string).
1080 * If @flags contains %G_URI_FLAGS_ENCODED, then `%`-encoded characters in
1081 * @uri_ref will remain encoded in the output strings. (If not,
1082 * then all such characters will be decoded.) Note that decoding will
1083 * only work if the URI components are ASCII or UTF-8, so you will
1084 * need to use %G_URI_FLAGS_ENCODED if they are not.
1086 * Note that the %G_URI_FLAGS_HAS_PASSWORD and
1087 * %G_URI_FLAGS_HAS_AUTH_PARAMS @flags are ignored by g_uri_split(),
1088 * since it always returns only the full userinfo; use
1089 * g_uri_split_with_user() if you want it split up.
1091 * Returns: (skip): %TRUE if @uri_ref parsed successfully, %FALSE
1097 g_uri_split (const gchar *uri_ref,
1108 g_return_val_if_fail (uri_ref != NULL, FALSE);
1109 g_return_val_if_fail (error == NULL || *error == NULL, FALSE);
1111 return g_uri_split_internal (uri_ref, flags,
1112 scheme, userinfo, NULL, NULL, NULL,
1113 host, port, path, query, fragment,
1118 * g_uri_split_with_user:
1119 * @uri_ref: a string containing a relative or absolute URI
1120 * @flags: flags for parsing @uri_ref
1121 * @scheme: (out) (nullable) (optional) (transfer full): on return, contains
1122 * the scheme (converted to lowercase), or %NULL
1123 * @user: (out) (nullable) (optional) (transfer full): on return, contains
1124 * the user, or %NULL
1125 * @password: (out) (nullable) (optional) (transfer full): on return, contains
1126 * the password, or %NULL
1127 * @auth_params: (out) (nullable) (optional) (transfer full): on return, contains
1128 * the auth_params, or %NULL
1129 * @host: (out) (nullable) (optional) (transfer full): on return, contains the
1131 * @port: (out) (optional) (transfer full): on return, contains the
1133 * @path: (out) (not nullable) (optional) (transfer full): on return, contains the
1135 * @query: (out) (nullable) (optional) (transfer full): on return, contains the
1137 * @fragment: (out) (nullable) (optional) (transfer full): on return, contains
1138 * the fragment, or %NULL
1139 * @error: #GError for error reporting, or %NULL to ignore.
1141 * Parses @uri_ref (which can be an
1142 * [absolute or relative URI][relative-absolute-uris]) according to @flags, and
1143 * returns the pieces. Any component that doesn't appear in @uri_ref will be
1144 * returned as %NULL (but note that all URIs always have a path component,
1145 * though it may be the empty string).
1147 * See g_uri_split(), and the definition of #GUriFlags, for more
1148 * information on the effect of @flags. Note that @password will only
1149 * be parsed out if @flags contains %G_URI_FLAGS_HAS_PASSWORD, and
1150 * @auth_params will only be parsed out if @flags contains
1151 * %G_URI_FLAGS_HAS_AUTH_PARAMS.
1153 * Returns: (skip): %TRUE if @uri_ref parsed successfully, %FALSE
1159 g_uri_split_with_user (const gchar *uri_ref,
1164 gchar **auth_params,
1172 g_return_val_if_fail (uri_ref != NULL, FALSE);
1173 g_return_val_if_fail (error == NULL || *error == NULL, FALSE);
1175 return g_uri_split_internal (uri_ref, flags,
1176 scheme, NULL, user, password, auth_params,
1177 host, port, path, query, fragment,
1183 * g_uri_split_network:
1184 * @uri_string: a string containing an absolute URI
1185 * @flags: flags for parsing @uri_string
1186 * @scheme: (out) (nullable) (optional) (transfer full): on return, contains
1187 * the scheme (converted to lowercase), or %NULL
1188 * @host: (out) (nullable) (optional) (transfer full): on return, contains the
1190 * @port: (out) (optional) (transfer full): on return, contains the
1192 * @error: #GError for error reporting, or %NULL to ignore.
1194 * Parses @uri_string (which must be an [absolute URI][relative-absolute-uris])
1195 * according to @flags, and returns the pieces relevant to connecting to a host.
1196 * See the documentation for g_uri_split() for more details; this is
1197 * mostly a wrapper around that function with simpler arguments.
1198 * However, it will return an error if @uri_string is a relative URI,
1199 * or does not contain a hostname component.
1201 * Returns: (skip): %TRUE if @uri_string parsed successfully,
1207 g_uri_split_network (const gchar *uri_string,
1214 gchar *my_scheme = NULL, *my_host = NULL;
1216 g_return_val_if_fail (uri_string != NULL, FALSE);
1217 g_return_val_if_fail (error == NULL || *error == NULL, FALSE);
1219 if (!g_uri_split_internal (uri_string, flags,
1220 &my_scheme, NULL, NULL, NULL, NULL,
1221 &my_host, port, NULL, NULL, NULL,
1225 if (!my_scheme || !my_host)
1229 g_set_error (error, G_URI_ERROR, G_URI_ERROR_BAD_SCHEME,
1230 _("URI ‘%s’ is not an absolute URI"),
1235 g_set_error (error, G_URI_ERROR, G_URI_ERROR_BAD_HOST,
1236 _("URI ‘%s’ has no host component"),
1246 *scheme = g_steal_pointer (&my_scheme);
1248 *host = g_steal_pointer (&my_host);
1258 * @uri_string: a string containing an absolute URI
1259 * @flags: flags for parsing @uri_string
1260 * @error: #GError for error reporting, or %NULL to ignore.
1262 * Parses @uri_string according to @flags, to determine whether it is a valid
1263 * [absolute URI][relative-absolute-uris], i.e. it does not need to be resolved
1264 * relative to another URI using g_uri_parse_relative().
1266 * If it’s not a valid URI, an error is returned explaining how it’s invalid.
1268 * See g_uri_split(), and the definition of #GUriFlags, for more
1269 * information on the effect of @flags.
1271 * Returns: %TRUE if @uri_string is a valid absolute URI, %FALSE on error.
1276 g_uri_is_valid (const gchar *uri_string,
1280 gchar *my_scheme = NULL;
1282 g_return_val_if_fail (uri_string != NULL, FALSE);
1283 g_return_val_if_fail (error == NULL || *error == NULL, FALSE);
1285 if (!g_uri_split_internal (uri_string, flags,
1286 &my_scheme, NULL, NULL, NULL, NULL,
1287 NULL, NULL, NULL, NULL, NULL,
1293 g_set_error (error, G_URI_ERROR, G_URI_ERROR_BAD_SCHEME,
1294 _("URI ‘%s’ is not an absolute URI"),
1305 /* Implements the "Remove Dot Segments" algorithm from section 5.2.4 of
1308 * See https://tools.ietf.org/html/rfc3986#section-5.2.4
1311 remove_dot_segments (gchar *path)
1313 /* The output can be written to the same buffer that the input
1314 * is read from, as the output pointer is only ever increased
1315 * when the input pointer is increased as well, and the input
1316 * pointer is never decreased. */
1317 gchar *input = path;
1318 gchar *output = path;
1325 /* A. If the input buffer begins with a prefix of "../" or "./",
1326 * then remove that prefix from the input buffer; otherwise,
1328 if (strncmp (input, "../", 3) == 0)
1330 else if (strncmp (input, "./", 2) == 0)
1333 /* B. if the input buffer begins with a prefix of "/./" or "/.",
1334 * where "." is a complete path segment, then replace that
1335 * prefix with "/" in the input buffer; otherwise,
1337 else if (strncmp (input, "/./", 3) == 0)
1339 else if (strcmp (input, "/.") == 0)
1342 /* C. if the input buffer begins with a prefix of "/../" or "/..",
1343 * where ".." is a complete path segment, then replace that
1344 * prefix with "/" in the input buffer and remove the last
1345 * segment and its preceding "/" (if any) from the output
1346 * buffer; otherwise,
1348 else if (strncmp (input, "/../", 4) == 0)
1357 while (*output != '/' && output > path);
1360 else if (strcmp (input, "/..") == 0)
1369 while (*output != '/' && output > path);
1373 /* D. if the input buffer consists only of "." or "..", then remove
1374 * that from the input buffer; otherwise,
1376 else if (strcmp (input, "..") == 0 || strcmp (input, ".") == 0)
1379 /* E. move the first path segment in the input buffer to the end of
1380 * the output buffer, including the initial "/" character (if
1381 * any) and any subsequent characters up to, but not including,
1382 * the next "/" character or the end of the input buffer.
1386 *output++ = *input++;
1387 while (*input && *input != '/')
1388 *output++ = *input++;
1396 * @uri_string: a string representing an absolute URI
1397 * @flags: flags describing how to parse @uri_string
1398 * @error: #GError for error reporting, or %NULL to ignore.
1400 * Parses @uri_string according to @flags. If the result is not a
1401 * valid [absolute URI][relative-absolute-uris], it will be discarded, and an
1404 * Return value: (transfer full): a new #GUri, or NULL on error.
1409 g_uri_parse (const gchar *uri_string,
1413 g_return_val_if_fail (uri_string != NULL, NULL);
1414 g_return_val_if_fail (error == NULL || *error == NULL, NULL);
1416 return g_uri_parse_relative (NULL, uri_string, flags, error);
1420 * g_uri_parse_relative:
1421 * @base_uri: (nullable) (transfer none): a base absolute URI
1422 * @uri_ref: a string representing a relative or absolute URI
1423 * @flags: flags describing how to parse @uri_ref
1424 * @error: #GError for error reporting, or %NULL to ignore.
1426 * Parses @uri_ref according to @flags and, if it is a
1427 * [relative URI][relative-absolute-uris], resolves it relative to @base_uri.
1428 * If the result is not a valid absolute URI, it will be discarded, and an error
1431 * Return value: (transfer full): a new #GUri, or NULL on error.
1436 g_uri_parse_relative (GUri *base_uri,
1437 const gchar *uri_ref,
1443 g_return_val_if_fail (uri_ref != NULL, NULL);
1444 g_return_val_if_fail (error == NULL || *error == NULL, NULL);
1445 g_return_val_if_fail (base_uri == NULL || base_uri->scheme != NULL, NULL);
1447 /* Use GUri struct to construct the return value: there is no guarantee it is
1448 * actually correct within the function body. */
1449 uri = g_atomic_rc_box_new0 (GUri);
1452 if (!g_uri_split_internal (uri_ref, flags,
1453 &uri->scheme, &uri->userinfo,
1454 &uri->user, &uri->password, &uri->auth_params,
1455 &uri->host, &uri->port,
1456 &uri->path, &uri->query, &uri->fragment,
1463 if (!uri->scheme && !base_uri)
1465 g_set_error_literal (error, G_URI_ERROR, G_URI_ERROR_FAILED,
1466 _("URI is not absolute, and no base URI was provided"));
1473 /* This is section 5.2.2 of RFC 3986, except that we're doing
1474 * it in place in @uri rather than copying from R to T.
1476 * See https://tools.ietf.org/html/rfc3986#section-5.2.2
1479 remove_dot_segments (uri->path);
1482 uri->scheme = g_strdup (base_uri->scheme);
1484 remove_dot_segments (uri->path);
1490 uri->path = g_strdup (base_uri->path);
1492 uri->query = g_strdup (base_uri->query);
1496 if (*uri->path == '/')
1497 remove_dot_segments (uri->path);
1500 gchar *newpath, *last;
1502 last = strrchr (base_uri->path, '/');
1505 newpath = g_strdup_printf ("%.*s/%s",
1506 (gint)(last - base_uri->path),
1511 newpath = g_strdup_printf ("/%s", uri->path);
1514 uri->path = g_steal_pointer (&newpath);
1516 remove_dot_segments (uri->path);
1520 uri->userinfo = g_strdup (base_uri->userinfo);
1521 uri->user = g_strdup (base_uri->user);
1522 uri->password = g_strdup (base_uri->password);
1523 uri->auth_params = g_strdup (base_uri->auth_params);
1524 uri->host = g_strdup (base_uri->host);
1525 uri->port = base_uri->port;
1529 /* Scheme normalization couldn't have been done earlier
1530 * as the relative URI may not have had a scheme */
1531 if (flags & G_URI_FLAGS_SCHEME_NORMALIZE)
1533 if (should_normalize_empty_path (uri->scheme) && !*uri->path)
1536 uri->path = g_strdup ("/");
1539 uri->port = normalize_port (uri->scheme, uri->port);
1544 remove_dot_segments (uri->path);
1547 return g_steal_pointer (&uri);
1551 * g_uri_resolve_relative:
1552 * @base_uri_string: (nullable): a string representing a base URI
1553 * @uri_ref: a string representing a relative or absolute URI
1554 * @flags: flags describing how to parse @uri_ref
1555 * @error: #GError for error reporting, or %NULL to ignore.
1557 * Parses @uri_ref according to @flags and, if it is a
1558 * [relative URI][relative-absolute-uris], resolves it relative to
1559 * @base_uri_string. If the result is not a valid absolute URI, it will be
1560 * discarded, and an error returned.
1562 * (If @base_uri_string is %NULL, this just returns @uri_ref, or
1563 * %NULL if @uri_ref is invalid or not absolute.)
1565 * Return value: (transfer full): the resolved URI string,
1571 g_uri_resolve_relative (const gchar *base_uri_string,
1572 const gchar *uri_ref,
1576 GUri *base_uri, *resolved_uri;
1577 gchar *resolved_uri_string;
1579 g_return_val_if_fail (uri_ref != NULL, NULL);
1580 g_return_val_if_fail (error == NULL || *error == NULL, NULL);
1582 flags |= G_URI_FLAGS_ENCODED;
1584 if (base_uri_string)
1586 base_uri = g_uri_parse (base_uri_string, flags, error);
1593 resolved_uri = g_uri_parse_relative (base_uri, uri_ref, flags, error);
1595 g_uri_unref (base_uri);
1599 resolved_uri_string = g_uri_to_string (resolved_uri);
1600 g_uri_unref (resolved_uri);
1601 return g_steal_pointer (&resolved_uri_string);
1604 /* userinfo as a whole can contain sub-delims + ":", but split-out
1605 * user can't contain ":" or ";", and split-out password can't contain
1608 #define USERINFO_ALLOWED_CHARS G_URI_RESERVED_CHARS_ALLOWED_IN_USERINFO
1609 #define USER_ALLOWED_CHARS "!$&'()*+,="
1610 #define PASSWORD_ALLOWED_CHARS "!$&'()*+,=:"
1611 #define AUTH_PARAMS_ALLOWED_CHARS USERINFO_ALLOWED_CHARS
1612 #define IP_ADDR_ALLOWED_CHARS ":"
1613 #define HOST_ALLOWED_CHARS G_URI_RESERVED_CHARS_SUBCOMPONENT_DELIMITERS
1614 #define PATH_ALLOWED_CHARS G_URI_RESERVED_CHARS_ALLOWED_IN_PATH
1615 #define QUERY_ALLOWED_CHARS G_URI_RESERVED_CHARS_ALLOWED_IN_PATH "?"
1616 #define FRAGMENT_ALLOWED_CHARS G_URI_RESERVED_CHARS_ALLOWED_IN_PATH "?"
1619 g_uri_join_internal (GUriFlags flags,
1620 const gchar *scheme,
1623 const gchar *password,
1624 const gchar *auth_params,
1629 const gchar *fragment)
1631 gboolean encoded = (flags & G_URI_FLAGS_ENCODED);
1633 char *normalized_scheme = NULL;
1635 /* Restrictions on path prefixes. See:
1636 * https://tools.ietf.org/html/rfc3986#section-3
1638 g_return_val_if_fail (path != NULL, NULL);
1639 g_return_val_if_fail (host == NULL || (path[0] == '\0' || path[0] == '/'), NULL);
1640 g_return_val_if_fail (host != NULL || (path[0] != '/' || path[1] != '/'), NULL);
1642 /* Arbitrarily chosen default size which should handle most average length
1643 * URIs. This should avoid a few reallocations of the buffer in most cases.
1644 * It’s 1B shorter than a power of two, since GString will add a
1645 * nul-terminator byte. */
1646 str = g_string_sized_new (127);
1650 g_string_append (str, scheme);
1651 g_string_append_c (str, ':');
1654 if (flags & G_URI_FLAGS_SCHEME_NORMALIZE && scheme && ((host && port != -1) || path[0] == '\0'))
1655 normalized_scheme = g_ascii_strdown (scheme, -1);
1659 g_string_append (str, "//");
1664 g_string_append (str, user);
1668 g_string_append_uri_escaped (str, user, USERINFO_ALLOWED_CHARS, TRUE);
1670 /* Encode ':' and ';' regardless of whether we have a
1671 * password or auth params, since it may be parsed later
1672 * under the assumption that it does.
1674 g_string_append_uri_escaped (str, user, USER_ALLOWED_CHARS, TRUE);
1679 g_string_append_c (str, ':');
1681 g_string_append (str, password);
1683 g_string_append_uri_escaped (str, password,
1684 PASSWORD_ALLOWED_CHARS, TRUE);
1689 g_string_append_c (str, ';');
1691 g_string_append (str, auth_params);
1693 g_string_append_uri_escaped (str, auth_params,
1694 AUTH_PARAMS_ALLOWED_CHARS, TRUE);
1697 g_string_append_c (str, '@');
1700 if (strchr (host, ':') && g_hostname_is_ip_address (host))
1702 g_string_append_c (str, '[');
1704 g_string_append (str, host);
1706 g_string_append_uri_escaped (str, host, IP_ADDR_ALLOWED_CHARS, TRUE);
1707 g_string_append_c (str, ']');
1712 g_string_append (str, host);
1714 g_string_append_uri_escaped (str, host, HOST_ALLOWED_CHARS, TRUE);
1717 if (port != -1 && (!normalized_scheme || normalize_port (normalized_scheme, port) != -1))
1718 g_string_append_printf (str, ":%d", port);
1721 if (path[0] == '\0' && normalized_scheme && should_normalize_empty_path (normalized_scheme))
1722 g_string_append (str, "/");
1723 else if (encoded || flags & G_URI_FLAGS_ENCODED_PATH)
1724 g_string_append (str, path);
1726 g_string_append_uri_escaped (str, path, PATH_ALLOWED_CHARS, TRUE);
1728 g_free (normalized_scheme);
1732 g_string_append_c (str, '?');
1733 if (encoded || flags & G_URI_FLAGS_ENCODED_QUERY)
1734 g_string_append (str, query);
1736 g_string_append_uri_escaped (str, query, QUERY_ALLOWED_CHARS, TRUE);
1740 g_string_append_c (str, '#');
1741 if (encoded || flags & G_URI_FLAGS_ENCODED_FRAGMENT)
1742 g_string_append (str, fragment);
1744 g_string_append_uri_escaped (str, fragment, FRAGMENT_ALLOWED_CHARS, TRUE);
1747 return g_string_free (str, FALSE);
1752 * @flags: flags describing how to build the URI string
1753 * @scheme: (nullable): the URI scheme, or %NULL
1754 * @userinfo: (nullable): the userinfo component, or %NULL
1755 * @host: (nullable): the host component, or %NULL
1756 * @port: the port, or `-1`
1757 * @path: (not nullable): the path component
1758 * @query: (nullable): the query component, or %NULL
1759 * @fragment: (nullable): the fragment, or %NULL
1761 * Joins the given components together according to @flags to create
1762 * an absolute URI string. @path may not be %NULL (though it may be the empty
1765 * When @host is present, @path must either be empty or begin with a slash (`/`)
1766 * character. When @host is not present, @path cannot begin with two slash
1767 * characters (`//`). See
1768 * [RFC 3986, section 3](https://tools.ietf.org/html/rfc3986#section-3).
1770 * See also g_uri_join_with_user(), which allows specifying the
1771 * components of the ‘userinfo’ separately.
1773 * %G_URI_FLAGS_HAS_PASSWORD and %G_URI_FLAGS_HAS_AUTH_PARAMS are ignored if set
1776 * Return value: (not nullable) (transfer full): an absolute URI string
1781 g_uri_join (GUriFlags flags,
1782 const gchar *scheme,
1783 const gchar *userinfo,
1788 const gchar *fragment)
1790 g_return_val_if_fail (port >= -1 && port <= 65535, NULL);
1791 g_return_val_if_fail (path != NULL, NULL);
1793 return g_uri_join_internal (flags,
1795 TRUE, userinfo, NULL, NULL,
1804 * g_uri_join_with_user:
1805 * @flags: flags describing how to build the URI string
1806 * @scheme: (nullable): the URI scheme, or %NULL
1807 * @user: (nullable): the user component of the userinfo, or %NULL
1808 * @password: (nullable): the password component of the userinfo, or
1810 * @auth_params: (nullable): the auth params of the userinfo, or
1812 * @host: (nullable): the host component, or %NULL
1813 * @port: the port, or `-1`
1814 * @path: (not nullable): the path component
1815 * @query: (nullable): the query component, or %NULL
1816 * @fragment: (nullable): the fragment, or %NULL
1818 * Joins the given components together according to @flags to create
1819 * an absolute URI string. @path may not be %NULL (though it may be the empty
1822 * In contrast to g_uri_join(), this allows specifying the components
1823 * of the ‘userinfo’ separately. It otherwise behaves the same.
1825 * %G_URI_FLAGS_HAS_PASSWORD and %G_URI_FLAGS_HAS_AUTH_PARAMS are ignored if set
1828 * Return value: (not nullable) (transfer full): an absolute URI string
1833 g_uri_join_with_user (GUriFlags flags,
1834 const gchar *scheme,
1836 const gchar *password,
1837 const gchar *auth_params,
1842 const gchar *fragment)
1844 g_return_val_if_fail (port >= -1 && port <= 65535, NULL);
1845 g_return_val_if_fail (path != NULL, NULL);
1847 return g_uri_join_internal (flags,
1849 FALSE, user, password, auth_params,
1859 * @flags: flags describing how to build the #GUri
1860 * @scheme: (not nullable): the URI scheme
1861 * @userinfo: (nullable): the userinfo component, or %NULL
1862 * @host: (nullable): the host component, or %NULL
1863 * @port: the port, or `-1`
1864 * @path: (not nullable): the path component
1865 * @query: (nullable): the query component, or %NULL
1866 * @fragment: (nullable): the fragment, or %NULL
1868 * Creates a new #GUri from the given components according to @flags.
1870 * See also g_uri_build_with_user(), which allows specifying the
1871 * components of the "userinfo" separately.
1873 * Return value: (not nullable) (transfer full): a new #GUri
1878 g_uri_build (GUriFlags flags,
1879 const gchar *scheme,
1880 const gchar *userinfo,
1885 const gchar *fragment)
1889 g_return_val_if_fail (scheme != NULL, NULL);
1890 g_return_val_if_fail (port >= -1 && port <= 65535, NULL);
1891 g_return_val_if_fail (path != NULL, NULL);
1893 uri = g_atomic_rc_box_new0 (GUri);
1895 uri->scheme = g_ascii_strdown (scheme, -1);
1896 uri->userinfo = g_strdup (userinfo);
1897 uri->host = g_strdup (host);
1899 uri->path = g_strdup (path);
1900 uri->query = g_strdup (query);
1901 uri->fragment = g_strdup (fragment);
1903 return g_steal_pointer (&uri);
1907 * g_uri_build_with_user:
1908 * @flags: flags describing how to build the #GUri
1909 * @scheme: (not nullable): the URI scheme
1910 * @user: (nullable): the user component of the userinfo, or %NULL
1911 * @password: (nullable): the password component of the userinfo, or %NULL
1912 * @auth_params: (nullable): the auth params of the userinfo, or %NULL
1913 * @host: (nullable): the host component, or %NULL
1914 * @port: the port, or `-1`
1915 * @path: (not nullable): the path component
1916 * @query: (nullable): the query component, or %NULL
1917 * @fragment: (nullable): the fragment, or %NULL
1919 * Creates a new #GUri from the given components according to @flags
1920 * (%G_URI_FLAGS_HAS_PASSWORD is added unconditionally). The @flags must be
1921 * coherent with the passed values, in particular use `%`-encoded values with
1922 * %G_URI_FLAGS_ENCODED.
1924 * In contrast to g_uri_build(), this allows specifying the components
1925 * of the ‘userinfo’ field separately. Note that @user must be non-%NULL
1926 * if either @password or @auth_params is non-%NULL.
1928 * Return value: (not nullable) (transfer full): a new #GUri
1933 g_uri_build_with_user (GUriFlags flags,
1934 const gchar *scheme,
1936 const gchar *password,
1937 const gchar *auth_params,
1942 const gchar *fragment)
1947 g_return_val_if_fail (scheme != NULL, NULL);
1948 g_return_val_if_fail (password == NULL || user != NULL, NULL);
1949 g_return_val_if_fail (auth_params == NULL || user != NULL, NULL);
1950 g_return_val_if_fail (port >= -1 && port <= 65535, NULL);
1951 g_return_val_if_fail (path != NULL, NULL);
1953 uri = g_atomic_rc_box_new0 (GUri);
1954 uri->flags = flags | G_URI_FLAGS_HAS_PASSWORD;
1955 uri->scheme = g_ascii_strdown (scheme, -1);
1956 uri->user = g_strdup (user);
1957 uri->password = g_strdup (password);
1958 uri->auth_params = g_strdup (auth_params);
1959 uri->host = g_strdup (host);
1961 uri->path = g_strdup (path);
1962 uri->query = g_strdup (query);
1963 uri->fragment = g_strdup (fragment);
1967 userinfo = g_string_new (user);
1970 g_string_append_c (userinfo, ':');
1971 g_string_append (userinfo, uri->password);
1975 g_string_append_c (userinfo, ';');
1976 g_string_append (userinfo, uri->auth_params);
1978 uri->userinfo = g_string_free (userinfo, FALSE);
1981 return g_steal_pointer (&uri);
1988 * Returns a string representing @uri.
1990 * This is not guaranteed to return a string which is identical to the
1991 * string that @uri was parsed from. However, if the source URI was
1992 * syntactically correct (according to RFC 3986), and it was parsed
1993 * with %G_URI_FLAGS_ENCODED, then g_uri_to_string() is guaranteed to return
1994 * a string which is at least semantically equivalent to the source
1995 * URI (according to RFC 3986).
1997 * If @uri might contain sensitive details, such as authentication parameters,
1998 * or private data in its query string, and the returned string is going to be
1999 * logged, then consider using g_uri_to_string_partial() to redact parts.
2001 * Return value: (not nullable) (transfer full): a string representing @uri,
2002 * which the caller must free.
2007 g_uri_to_string (GUri *uri)
2009 g_return_val_if_fail (uri != NULL, NULL);
2011 return g_uri_to_string_partial (uri, G_URI_HIDE_NONE);
2015 * g_uri_to_string_partial:
2017 * @flags: flags describing what parts of @uri to hide
2019 * Returns a string representing @uri, subject to the options in
2020 * @flags. See g_uri_to_string() and #GUriHideFlags for more details.
2022 * Return value: (not nullable) (transfer full): a string representing
2023 * @uri, which the caller must free.
2028 g_uri_to_string_partial (GUri *uri,
2029 GUriHideFlags flags)
2031 gboolean hide_user = (flags & G_URI_HIDE_USERINFO);
2032 gboolean hide_password = (flags & (G_URI_HIDE_USERINFO | G_URI_HIDE_PASSWORD));
2033 gboolean hide_auth_params = (flags & (G_URI_HIDE_USERINFO | G_URI_HIDE_AUTH_PARAMS));
2034 gboolean hide_query = (flags & G_URI_HIDE_QUERY);
2035 gboolean hide_fragment = (flags & G_URI_HIDE_FRAGMENT);
2037 g_return_val_if_fail (uri != NULL, NULL);
2039 if (uri->flags & (G_URI_FLAGS_HAS_PASSWORD | G_URI_FLAGS_HAS_AUTH_PARAMS))
2041 return g_uri_join_with_user (uri->flags,
2043 hide_user ? NULL : uri->user,
2044 hide_password ? NULL : uri->password,
2045 hide_auth_params ? NULL : uri->auth_params,
2049 hide_query ? NULL : uri->query,
2050 hide_fragment ? NULL : uri->fragment);
2053 return g_uri_join (uri->flags,
2055 hide_user ? NULL : uri->userinfo,
2059 hide_query ? NULL : uri->query,
2060 hide_fragment ? NULL : uri->fragment);
2063 /* This is just a copy of g_str_hash() with g_ascii_toupper() added */
2065 str_ascii_case_hash (gconstpointer v)
2067 const signed char *p;
2070 for (p = v; *p != '\0'; p++)
2071 h = (h << 5) + h + g_ascii_toupper (*p);
2077 str_ascii_case_equal (gconstpointer v1,
2080 const gchar *string1 = v1;
2081 const gchar *string2 = v2;
2083 return g_ascii_strcasecmp (string1, string2) == 0;
2089 * Many URI schemes include one or more attribute/value pairs as part of the URI
2090 * value. For example `scheme://server/path?query=string&is=there` has two
2091 * attributes – `query=string` and `is=there` – in its query part.
2093 * A #GUriParamsIter structure represents an iterator that can be used to
2094 * iterate over the attribute/value pairs of a URI query string. #GUriParamsIter
2095 * structures are typically allocated on the stack and then initialized with
2096 * g_uri_params_iter_init(). See the documentation for g_uri_params_iter_init()
2097 * for a usage example.
2103 GUriParamsFlags flags;
2106 guint8 sep_table[256]; /* 1 = index is a separator; 0 otherwise */
2109 G_STATIC_ASSERT (sizeof (GUriParamsIter) == sizeof (RealIter));
2110 G_STATIC_ASSERT (G_ALIGNOF (GUriParamsIter) >= G_ALIGNOF (RealIter));
2113 * g_uri_params_iter_init:
2114 * @iter: an uninitialized #GUriParamsIter
2115 * @params: a `%`-encoded string containing `attribute=value`
2117 * @length: the length of @params, or `-1` if it is nul-terminated
2118 * @separators: the separator byte character set between parameters. (usually
2119 * `&`, but sometimes `;` or both `&;`). Note that this function works on
2120 * bytes not characters, so it can't be used to delimit UTF-8 strings for
2121 * anything but ASCII characters. You may pass an empty set, in which case
2122 * no splitting will occur.
2123 * @flags: flags to modify the way the parameters are handled.
2125 * Initializes an attribute/value pair iterator.
2127 * The iterator keeps pointers to the @params and @separators arguments, those
2128 * variables must thus outlive the iterator and not be modified during the
2131 * If %G_URI_PARAMS_WWW_FORM is passed in @flags, `+` characters in the param
2132 * string will be replaced with spaces in the output. For example, `foo=bar+baz`
2133 * will give attribute `foo` with value `bar baz`. This is commonly used on the
2134 * web (the `https` and `http` schemes only), but is deprecated in favour of
2135 * the equivalent of encoding spaces as `%20`.
2137 * Unlike with g_uri_parse_params(), %G_URI_PARAMS_CASE_INSENSITIVE has no
2138 * effect if passed to @flags for g_uri_params_iter_init(). The caller is
2139 * responsible for doing their own case-insensitive comparisons.
2141 * |[<!-- language="C" -->
2142 * GUriParamsIter iter;
2143 * GError *error = NULL;
2144 * gchar *unowned_attr, *unowned_value;
2146 * g_uri_params_iter_init (&iter, "foo=bar&baz=bar&Foo=frob&baz=bar2", -1, "&", G_URI_PARAMS_NONE);
2147 * while (g_uri_params_iter_next (&iter, &unowned_attr, &unowned_value, &error))
2149 * g_autofree gchar *attr = g_steal_pointer (&unowned_attr);
2150 * g_autofree gchar *value = g_steal_pointer (&unowned_value);
2151 * // do something with attr and value; this code will be called 4 times
2152 * // for the params string in this example: once with attr=foo and value=bar,
2153 * // then with baz/bar, then Foo/frob, then baz/bar2.
2156 * // handle parsing error
2162 g_uri_params_iter_init (GUriParamsIter *iter,
2163 const gchar *params,
2165 const gchar *separators,
2166 GUriParamsFlags flags)
2168 RealIter *ri = (RealIter *)iter;
2171 g_return_if_fail (iter != NULL);
2172 g_return_if_fail (length == 0 || params != NULL);
2173 g_return_if_fail (length >= -1);
2174 g_return_if_fail (separators != NULL);
2179 ri->end = params + strlen (params);
2181 ri->end = params + length;
2183 memset (ri->sep_table, FALSE, sizeof (ri->sep_table));
2184 for (s = separators; *s != '\0'; ++s)
2185 ri->sep_table[*(guchar *)s] = TRUE;
2191 * g_uri_params_iter_next:
2192 * @iter: an initialized #GUriParamsIter
2193 * @attribute: (out) (nullable) (optional) (transfer full): on return, contains
2194 * the attribute, or %NULL.
2195 * @value: (out) (nullable) (optional) (transfer full): on return, contains
2196 * the value, or %NULL.
2197 * @error: #GError for error reporting, or %NULL to ignore.
2199 * Advances @iter and retrieves the next attribute/value. %FALSE is returned if
2200 * an error has occurred (in which case @error is set), or if the end of the
2201 * iteration is reached (in which case @attribute and @value are set to %NULL
2202 * and the iterator becomes invalid). If %TRUE is returned,
2203 * g_uri_params_iter_next() may be called again to receive another
2204 * attribute/value pair.
2206 * Note that the same @attribute may be returned multiple times, since URIs
2207 * allow repeated attributes.
2209 * Returns: %FALSE if the end of the parameters has been reached or an error was
2210 * encountered. %TRUE otherwise.
2215 g_uri_params_iter_next (GUriParamsIter *iter,
2220 RealIter *ri = (RealIter *)iter;
2221 const gchar *attr_end, *val, *val_end;
2222 gchar *decoded_attr, *decoded_value;
2223 gboolean www_form = ri->flags & G_URI_PARAMS_WWW_FORM;
2224 GUriFlags decode_flags = G_URI_FLAGS_NONE;
2226 g_return_val_if_fail (iter != NULL, FALSE);
2227 g_return_val_if_fail (error == NULL || *error == NULL, FALSE);
2229 /* Pre-clear these in case of failure or finishing. */
2235 if (ri->attr >= ri->end)
2238 if (ri->flags & G_URI_PARAMS_PARSE_RELAXED)
2239 decode_flags |= G_URI_FLAGS_PARSE_RELAXED;
2241 /* Check if each character in @attr is a separator, by indexing by the
2242 * character value into the @sep_table, which has value 1 stored at an
2243 * index if that index is a separator. */
2244 for (val_end = ri->attr; val_end < ri->end; val_end++)
2245 if (ri->sep_table[*(guchar *)val_end])
2248 attr_end = memchr (ri->attr, '=', val_end - ri->attr);
2251 g_set_error_literal (error, G_URI_ERROR, G_URI_ERROR_FAILED,
2252 _("Missing ‘=’ and parameter value"));
2255 if (!uri_decode (&decoded_attr, NULL, ri->attr, attr_end - ri->attr,
2256 www_form, decode_flags, G_URI_ERROR_FAILED, error))
2262 if (!uri_decode (&decoded_value, NULL, val, val_end - val,
2263 www_form, decode_flags, G_URI_ERROR_FAILED, error))
2265 g_free (decoded_attr);
2270 *attribute = g_steal_pointer (&decoded_attr);
2272 *value = g_steal_pointer (&decoded_value);
2274 g_free (decoded_attr);
2275 g_free (decoded_value);
2277 ri->attr = val_end + 1;
2282 * g_uri_parse_params:
2283 * @params: a `%`-encoded string containing `attribute=value`
2285 * @length: the length of @params, or `-1` if it is nul-terminated
2286 * @separators: the separator byte character set between parameters. (usually
2287 * `&`, but sometimes `;` or both `&;`). Note that this function works on
2288 * bytes not characters, so it can't be used to delimit UTF-8 strings for
2289 * anything but ASCII characters. You may pass an empty set, in which case
2290 * no splitting will occur.
2291 * @flags: flags to modify the way the parameters are handled.
2292 * @error: #GError for error reporting, or %NULL to ignore.
2294 * Many URI schemes include one or more attribute/value pairs as part of the URI
2295 * value. This method can be used to parse them into a hash table. When an
2296 * attribute has multiple occurrences, the last value is the final returned
2297 * value. If you need to handle repeated attributes differently, use
2300 * The @params string is assumed to still be `%`-encoded, but the returned
2301 * values will be fully decoded. (Thus it is possible that the returned values
2302 * may contain `=` or @separators, if the value was encoded in the input.)
2303 * Invalid `%`-encoding is treated as with the %G_URI_FLAGS_PARSE_RELAXED
2304 * rules for g_uri_parse(). (However, if @params is the path or query string
2305 * from a #GUri that was parsed without %G_URI_FLAGS_PARSE_RELAXED and
2306 * %G_URI_FLAGS_ENCODED, then you already know that it does not contain any
2307 * invalid encoding.)
2309 * %G_URI_PARAMS_WWW_FORM is handled as documented for g_uri_params_iter_init().
2311 * If %G_URI_PARAMS_CASE_INSENSITIVE is passed to @flags, attributes will be
2312 * compared case-insensitively, so a params string `attr=123&Attr=456` will only
2313 * return a single attribute–value pair, `Attr=456`. Case will be preserved in
2314 * the returned attributes.
2316 * If @params cannot be parsed (for example, it contains two @separators
2317 * characters in a row), then @error is set and %NULL is returned.
2319 * Return value: (transfer full) (element-type utf8 utf8):
2320 * A hash table of attribute/value pairs, with both names and values
2321 * fully-decoded; or %NULL on error.
2326 g_uri_parse_params (const gchar *params,
2328 const gchar *separators,
2329 GUriParamsFlags flags,
2333 GUriParamsIter iter;
2334 gchar *attribute, *value;
2337 g_return_val_if_fail (length == 0 || params != NULL, NULL);
2338 g_return_val_if_fail (length >= -1, NULL);
2339 g_return_val_if_fail (separators != NULL, NULL);
2340 g_return_val_if_fail (error == NULL || *error == NULL, FALSE);
2342 if (flags & G_URI_PARAMS_CASE_INSENSITIVE)
2344 hash = g_hash_table_new_full (str_ascii_case_hash,
2345 str_ascii_case_equal,
2350 hash = g_hash_table_new_full (g_str_hash, g_str_equal,
2354 g_uri_params_iter_init (&iter, params, length, separators, flags);
2356 while (g_uri_params_iter_next (&iter, &attribute, &value, &err))
2357 g_hash_table_insert (hash, attribute, value);
2361 g_propagate_error (error, g_steal_pointer (&err));
2362 g_hash_table_destroy (hash);
2366 return g_steal_pointer (&hash);
2373 * Gets @uri's scheme. Note that this will always be all-lowercase,
2374 * regardless of the string or strings that @uri was created from.
2376 * Return value: (not nullable): @uri's scheme.
2381 g_uri_get_scheme (GUri *uri)
2383 g_return_val_if_fail (uri != NULL, NULL);
2389 * g_uri_get_userinfo:
2392 * Gets @uri's userinfo, which may contain `%`-encoding, depending on
2393 * the flags with which @uri was created.
2395 * Return value: (nullable): @uri's userinfo.
2400 g_uri_get_userinfo (GUri *uri)
2402 g_return_val_if_fail (uri != NULL, NULL);
2404 return uri->userinfo;
2411 * Gets the ‘username’ component of @uri's userinfo, which may contain
2412 * `%`-encoding, depending on the flags with which @uri was created.
2413 * If @uri was not created with %G_URI_FLAGS_HAS_PASSWORD or
2414 * %G_URI_FLAGS_HAS_AUTH_PARAMS, this is the same as g_uri_get_userinfo().
2416 * Return value: (nullable): @uri's user.
2421 g_uri_get_user (GUri *uri)
2423 g_return_val_if_fail (uri != NULL, NULL);
2429 * g_uri_get_password:
2432 * Gets @uri's password, which may contain `%`-encoding, depending on
2433 * the flags with which @uri was created. (If @uri was not created
2434 * with %G_URI_FLAGS_HAS_PASSWORD then this will be %NULL.)
2436 * Return value: (nullable): @uri's password.
2441 g_uri_get_password (GUri *uri)
2443 g_return_val_if_fail (uri != NULL, NULL);
2445 return uri->password;
2449 * g_uri_get_auth_params:
2452 * Gets @uri's authentication parameters, which may contain
2453 * `%`-encoding, depending on the flags with which @uri was created.
2454 * (If @uri was not created with %G_URI_FLAGS_HAS_AUTH_PARAMS then this will
2457 * Depending on the URI scheme, g_uri_parse_params() may be useful for
2458 * further parsing this information.
2460 * Return value: (nullable): @uri's authentication parameters.
2465 g_uri_get_auth_params (GUri *uri)
2467 g_return_val_if_fail (uri != NULL, NULL);
2469 return uri->auth_params;
2476 * Gets @uri's host. This will never have `%`-encoded characters,
2477 * unless it is non-UTF-8 (which can only be the case if @uri was
2478 * created with %G_URI_FLAGS_NON_DNS).
2480 * If @uri contained an IPv6 address literal, this value will be just
2481 * that address, without the brackets around it that are necessary in
2482 * the string form of the URI. Note that in this case there may also
2483 * be a scope ID attached to the address. Eg, `fe80::1234%``em1` (or
2484 * `fe80::1234%``25em1` if the string is still encoded).
2486 * Return value: (nullable): @uri's host.
2491 g_uri_get_host (GUri *uri)
2493 g_return_val_if_fail (uri != NULL, NULL);
2504 * Return value: @uri's port, or `-1` if no port was specified.
2509 g_uri_get_port (GUri *uri)
2511 g_return_val_if_fail (uri != NULL, -1);
2513 if (uri->port == -1 && uri->flags & G_URI_FLAGS_SCHEME_NORMALIZE)
2514 return g_uri_get_default_scheme_port (uri->scheme);
2523 * Gets @uri's path, which may contain `%`-encoding, depending on the
2524 * flags with which @uri was created.
2526 * Return value: (not nullable): @uri's path.
2531 g_uri_get_path (GUri *uri)
2533 g_return_val_if_fail (uri != NULL, NULL);
2542 * Gets @uri's query, which may contain `%`-encoding, depending on the
2543 * flags with which @uri was created.
2545 * For queries consisting of a series of `name=value` parameters,
2546 * #GUriParamsIter or g_uri_parse_params() may be useful.
2548 * Return value: (nullable): @uri's query.
2553 g_uri_get_query (GUri *uri)
2555 g_return_val_if_fail (uri != NULL, NULL);
2561 * g_uri_get_fragment:
2564 * Gets @uri's fragment, which may contain `%`-encoding, depending on
2565 * the flags with which @uri was created.
2567 * Return value: (nullable): @uri's fragment.
2572 g_uri_get_fragment (GUri *uri)
2574 g_return_val_if_fail (uri != NULL, NULL);
2576 return uri->fragment;
2584 * Gets @uri's flags set upon construction.
2586 * Return value: @uri's flags.
2591 g_uri_get_flags (GUri *uri)
2593 g_return_val_if_fail (uri != NULL, G_URI_FLAGS_NONE);
2599 * g_uri_unescape_segment:
2600 * @escaped_string: (nullable): A string, may be %NULL
2601 * @escaped_string_end: (nullable): Pointer to end of @escaped_string,
2603 * @illegal_characters: (nullable): An optional string of illegal
2604 * characters not to be allowed, may be %NULL
2606 * Unescapes a segment of an escaped string.
2608 * If any of the characters in @illegal_characters or the NUL
2609 * character appears as an escaped character in @escaped_string, then
2610 * that is an error and %NULL will be returned. This is useful if you
2611 * want to avoid for instance having a slash being expanded in an
2612 * escaped path element, which might confuse pathname handling.
2614 * Note: `NUL` byte is not accepted in the output, in contrast to
2615 * g_uri_unescape_bytes().
2617 * Returns: (nullable): an unescaped version of @escaped_string,
2618 * or %NULL on error. The returned string should be freed when no longer
2619 * needed. As a special case if %NULL is given for @escaped_string, this
2620 * function will return %NULL.
2625 g_uri_unescape_segment (const gchar *escaped_string,
2626 const gchar *escaped_string_end,
2627 const gchar *illegal_characters)
2633 if (!escaped_string)
2636 if (escaped_string_end)
2637 length = escaped_string_end - escaped_string;
2639 length = strlen (escaped_string);
2641 decoded_len = uri_decoder (&unescaped,
2643 escaped_string, length,
2645 G_URI_FLAGS_ENCODED,
2647 if (decoded_len < 0)
2650 if (memchr (unescaped, '\0', decoded_len))
2660 * g_uri_unescape_string:
2661 * @escaped_string: an escaped string to be unescaped.
2662 * @illegal_characters: (nullable): a string of illegal characters
2663 * not to be allowed, or %NULL.
2665 * Unescapes a whole escaped string.
2667 * If any of the characters in @illegal_characters or the NUL
2668 * character appears as an escaped character in @escaped_string, then
2669 * that is an error and %NULL will be returned. This is useful if you
2670 * want to avoid for instance having a slash being expanded in an
2671 * escaped path element, which might confuse pathname handling.
2673 * Returns: (nullable): an unescaped version of @escaped_string.
2674 * The returned string should be freed when no longer needed.
2679 g_uri_unescape_string (const gchar *escaped_string,
2680 const gchar *illegal_characters)
2682 return g_uri_unescape_segment (escaped_string, NULL, illegal_characters);
2686 * g_uri_escape_string:
2687 * @unescaped: the unescaped input string.
2688 * @reserved_chars_allowed: (nullable): a string of reserved
2689 * characters that are allowed to be used, or %NULL.
2690 * @allow_utf8: %TRUE if the result can include UTF-8 characters.
2692 * Escapes a string for use in a URI.
2694 * Normally all characters that are not "unreserved" (i.e. ASCII
2695 * alphanumerical characters plus dash, dot, underscore and tilde) are
2696 * escaped. But if you specify characters in @reserved_chars_allowed
2697 * they are not escaped. This is useful for the "reserved" characters
2698 * in the URI specification, since those are allowed unescaped in some
2699 * portions of a URI.
2701 * Returns: (not nullable): an escaped version of @unescaped. The
2702 * returned string should be freed when no longer needed.
2707 g_uri_escape_string (const gchar *unescaped,
2708 const gchar *reserved_chars_allowed,
2709 gboolean allow_utf8)
2713 g_return_val_if_fail (unescaped != NULL, NULL);
2715 s = g_string_sized_new (strlen (unescaped) * 1.25);
2717 g_string_append_uri_escaped (s, unescaped, reserved_chars_allowed, allow_utf8);
2719 return g_string_free (s, FALSE);
2723 * g_uri_unescape_bytes:
2724 * @escaped_string: A URI-escaped string
2725 * @length: the length (in bytes) of @escaped_string to escape, or `-1` if it
2726 * is nul-terminated.
2727 * @illegal_characters: (nullable): a string of illegal characters
2728 * not to be allowed, or %NULL.
2729 * @error: #GError for error reporting, or %NULL to ignore.
2731 * Unescapes a segment of an escaped string as binary data.
2733 * Note that in contrast to g_uri_unescape_string(), this does allow
2734 * nul bytes to appear in the output.
2736 * If any of the characters in @illegal_characters appears as an escaped
2737 * character in @escaped_string, then that is an error and %NULL will be
2738 * returned. This is useful if you want to avoid for instance having a slash
2739 * being expanded in an escaped path element, which might confuse pathname
2742 * Returns: (transfer full): an unescaped version of @escaped_string
2743 * or %NULL on error (if decoding failed, using %G_URI_ERROR_FAILED error
2744 * code). The returned #GBytes should be unreffed when no longer needed.
2749 g_uri_unescape_bytes (const gchar *escaped_string,
2751 const char *illegal_characters,
2755 gssize unescaped_length;
2757 g_return_val_if_fail (escaped_string != NULL, NULL);
2758 g_return_val_if_fail (error == NULL || *error == NULL, NULL);
2761 length = strlen (escaped_string);
2763 unescaped_length = uri_decoder (&buf,
2765 escaped_string, length,
2768 G_URI_FLAGS_ENCODED,
2769 G_URI_ERROR_FAILED, error);
2770 if (unescaped_length == -1)
2773 return g_bytes_new_take (buf, unescaped_length);
2777 * g_uri_escape_bytes:
2778 * @unescaped: (array length=length): the unescaped input data.
2779 * @length: the length of @unescaped
2780 * @reserved_chars_allowed: (nullable): a string of reserved
2781 * characters that are allowed to be used, or %NULL.
2783 * Escapes arbitrary data for use in a URI.
2785 * Normally all characters that are not ‘unreserved’ (i.e. ASCII
2786 * alphanumerical characters plus dash, dot, underscore and tilde) are
2787 * escaped. But if you specify characters in @reserved_chars_allowed
2788 * they are not escaped. This is useful for the ‘reserved’ characters
2789 * in the URI specification, since those are allowed unescaped in some
2790 * portions of a URI.
2792 * Though technically incorrect, this will also allow escaping nul
2795 * Returns: (not nullable) (transfer full): an escaped version of @unescaped.
2796 * The returned string should be freed when no longer needed.
2801 g_uri_escape_bytes (const guint8 *unescaped,
2803 const gchar *reserved_chars_allowed)
2807 g_return_val_if_fail (unescaped != NULL, NULL);
2809 string = g_string_sized_new (length * 1.25);
2811 _uri_encoder (string, unescaped, length,
2812 reserved_chars_allowed, FALSE);
2814 return g_string_free (string, FALSE);
2818 g_uri_scheme_length (const gchar *uri)
2823 if (!g_ascii_isalpha (*p))
2826 while (g_ascii_isalnum (*p) || *p == '.' || *p == '+' || *p == '-')
2829 if (p > uri && *p == ':')
2836 * g_uri_parse_scheme:
2837 * @uri: a valid URI.
2839 * Gets the scheme portion of a URI string.
2840 * [RFC 3986](https://tools.ietf.org/html/rfc3986#section-3) decodes the scheme
2843 * URI = scheme ":" hier-part [ "?" query ] [ "#" fragment ]
2845 * Common schemes include `file`, `https`, `svn+ssh`, etc.
2847 * Returns: (transfer full) (nullable): The ‘scheme’ component of the URI, or
2848 * %NULL on error. The returned string should be freed when no longer needed.
2853 g_uri_parse_scheme (const gchar *uri)
2857 g_return_val_if_fail (uri != NULL, NULL);
2859 len = g_uri_scheme_length (uri);
2860 return len == -1 ? NULL : g_strndup (uri, len);
2864 * g_uri_peek_scheme:
2865 * @uri: a valid URI.
2867 * Gets the scheme portion of a URI string.
2868 * [RFC 3986](https://tools.ietf.org/html/rfc3986#section-3) decodes the scheme
2871 * URI = scheme ":" hier-part [ "?" query ] [ "#" fragment ]
2873 * Common schemes include `file`, `https`, `svn+ssh`, etc.
2875 * Unlike g_uri_parse_scheme(), the returned scheme is normalized to
2876 * all-lowercase and does not need to be freed.
2878 * Returns: (transfer none) (nullable): The ‘scheme’ component of the URI, or
2879 * %NULL on error. The returned string is normalized to all-lowercase, and
2880 * interned via g_intern_string(), so it does not need to be freed.
2885 g_uri_peek_scheme (const gchar *uri)
2888 gchar *lower_scheme;
2889 const gchar *scheme;
2891 g_return_val_if_fail (uri != NULL, NULL);
2893 len = g_uri_scheme_length (uri);
2897 lower_scheme = g_ascii_strdown (uri, len);
2898 scheme = g_intern_string (lower_scheme);
2899 g_free (lower_scheme);
2904 G_DEFINE_QUARK (g-uri-quark, g_uri_error)