libsoup/soup-uri.c

   1 /* -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
   2 /* soup-uri.c : utility functions to parse URLs */
   3
   4 /*
   5  * Copyright 1999-2003 Ximian, Inc.
   6  */
   7
   8 #include <ctype.h>
   9 #include <string.h>
  10 #include <stdlib.h>
  11
  12 #include "soup-uri.h"
  13 #include "soup-misc-private.h"
  14 #include "soup-form.h"
  15 #include "soup-misc.h"
  16
  17 /**
  18  * SECTION:soup-uri
  19  * @short_description: URIs
  20  *
  21  * A #SoupURI represents a (parsed) URI.
  22  *
  23  * Many applications will not need to use #SoupURI directly at all; on
  24  * the client side, soup_message_new() takes a stringified URI, and on
  25  * the server side, the path and query components are provided for you
  26  * in the server callback.
  27  **/
  28
  29 /**
  30  * SoupURI:
  31  * @scheme: the URI scheme (eg, "http")
  32  * @user: a username, or %NULL
  33  * @password: a password, or %NULL
  34  * @host: the hostname or IP address
  35  * @port: the port number on @host
  36  * @path: the path on @host
  37  * @query: a query for @path, or %NULL
  38  * @fragment: a fragment identifier within @path, or %NULL
  39  *
  40  * A #SoupURI represents a (parsed) URI. #SoupURI supports RFC 3986
  41  * (URI Generic Syntax), and can parse any valid URI. However, libsoup
  42  * only uses "http" and "https" URIs internally; You can use
  43  * SOUP_URI_VALID_FOR_HTTP() to test if a #SoupURI is a valid HTTP
  44  * URI.
  45  *
  46  * @scheme will always be set in any URI. It is an interned string and
  47  * is always all lowercase. (If you parse a URI with a non-lowercase
  48  * scheme, it will be converted to lowercase.) The macros
  49  * %SOUP_URI_SCHEME_HTTP and %SOUP_URI_SCHEME_HTTPS provide the
  50  * interned values for "http" and "https" and can be compared against
  51  * URI @scheme values.
  52  *
  53  * @user and @password are parsed as defined in the older URI specs
  54  * (ie, separated by a colon; RFC 3986 only talks about a single
  55  * "userinfo" field). Note that @password is not included in the
  56  * output of soup_uri_to_string(). libsoup does not normally use these
  57  * fields; authentication is handled via #SoupSession signals.
  58  *
  59  * @host contains the hostname, and @port the port specified in the
  60  * URI. If the URI doesn't contain a hostname, @host will be %NULL,
  61  * and if it doesn't specify a port, @port may be 0. However, for
  62  * "http" and "https" URIs, @host is guaranteed to be non-%NULL
  63  * (trying to parse an http URI with no @host will return %NULL), and
  64  * @port will always be non-0 (because libsoup knows the default value
  65  * to use when it is not specified in the URI).
  66  *
  67  * @path is always non-%NULL. For http/https URIs, @path will never be
  68  * an empty string either; if the input URI has no path, the parsed
  69  * #SoupURI will have a @path of "/".
  70  *
  71  * @query and @fragment are optional for all URI types.
  72  * soup_form_decode() may be useful for parsing @query.
  73  *
  74  * Note that @path, @query, and @fragment may contain
  75  * %<!-- -->-encoded characters. soup_uri_new() calls
  76  * soup_uri_normalize() on them, but not soup_uri_decode(). This is
  77  * necessary to ensure that soup_uri_to_string() will generate a URI
  78  * that has exactly the same meaning as the original. (In theory,
  79  * #SoupURI should leave @user, @password, and @host partially-encoded
  80  * as well, but this would be more annoying than useful.)
  81  **/
  82
  83 /**
  84  * SOUP_URI_VALID_FOR_HTTP:
  85  * @uri: a #SoupURI
  86  *
  87  * Tests if @uri is a valid #SoupURI for HTTP communication; that is, if
  88  * it can be used to construct a #SoupMessage.
  89  *
  90  * Return value: %TRUE if @uri is a valid "http" or "https" URI.
  91  *
  92  * Since: 2.24
  93  **/
  94
  95 static void append_uri_encoded (GString *str, const char *in, const char *extra_enc_chars);
  96 static char *uri_normalized_copy (const char *str, int length, const char *unescape_extra);
  97
  98 gpointer _SOUP_URI_SCHEME_HTTP, _SOUP_URI_SCHEME_HTTPS;
  99 gpointer _SOUP_URI_SCHEME_FTP;
 100 gpointer _SOUP_URI_SCHEME_FILE, _SOUP_URI_SCHEME_DATA;
 101
 102 static inline const char *
 103 soup_uri_parse_scheme (const char *scheme, int len)
 104 {
 105         if (len == 4 && !g_ascii_strncasecmp (scheme, "http", len)) {
 106                 return SOUP_URI_SCHEME_HTTP;
 107         } else if (len == 5 && !g_ascii_strncasecmp (scheme, "https", len)) {
 108                 return SOUP_URI_SCHEME_HTTPS;
 109         } else {
 110                 char *lower_scheme;
 111
 112                 lower_scheme = g_ascii_strdown (scheme, len);
 113                 scheme = g_intern_static_string (lower_scheme);
 114                 if (scheme != (const char *)lower_scheme)
 115                         g_free (lower_scheme);
 116                 return scheme;
 117         }
 118 }
 119
 120 static inline guint
 121 soup_scheme_default_port (const char *scheme)
 122 {
 123         if (scheme == SOUP_URI_SCHEME_HTTP)
 124                 return 80;
 125         else if (scheme == SOUP_URI_SCHEME_HTTPS)
 126                 return 443;
 127         else if (scheme == SOUP_URI_SCHEME_FTP)
 128                 return 21;
 129         else
 130                 return 0;
 131 }
 132
 133 /**
 134  * soup_uri_new_with_base:
 135  * @base: a base URI
 136  * @uri_string: the URI
 137  *
 138  * Parses @uri_string relative to @base.
 139  *
 140  * Return value: a parsed #SoupURI.
 141  **/
 142 SoupURI *
 143 soup_uri_new_with_base (SoupURI *base, const char *uri_string)
 144 {
 145         SoupURI *uri;
 146         const char *end, *hash, *colon, *at, *path, *question;
 147         const char *p, *hostend;
 148         gboolean remove_dot_segments = TRUE;
 149         int len;
 150
 151         /* First some cleanup steps (which are supposed to all be no-ops,
 152          * but...). Skip initial whitespace, strip out internal tabs and
 153          * line breaks, and ignore trailing whitespace.
 154          */
 155         while (g_ascii_isspace (*uri_string))
 156                 uri_string++;
 157
 158         len = strcspn (uri_string, "\t\n\r");
 159         if (uri_string[len]) {
 160                 char *clean = g_malloc (strlen (uri_string) + 1), *d;
 161                 const char *s;
 162
 163                 for (s = uri_string, d = clean; *s; s++) {
 164                         if (*s != '\t' && *s != '\n' && *s != '\r')
 165                                 *d++ = *s;
 166                 }
 167                 *d = '\0';
 168
 169                 uri = soup_uri_new_with_base (base, clean);
 170                 g_free (clean);
 171                 return uri;
 172         }
 173         end = uri_string + len;
 174         while (end > uri_string && g_ascii_isspace (end[-1]))
 175                 end--;
 176
 177         uri = g_slice_new0 (SoupURI);
 178
 179         /* Find fragment. */
 180         hash = strchr (uri_string, '#');
 181         if (hash) {
 182                 uri->fragment = uri_normalized_copy (hash + 1, end - hash + 1,
 183                                                      NULL);
 184                 end = hash;
 185         }
 186
 187         /* Find scheme: initial [a-z+.-]* substring until ":" */
 188         p = uri_string;
 189         while (p < end && (g_ascii_isalnum (*p) ||
 190                            *p == '.' || *p == '+' || *p == '-'))
 191                 p++;
 192
 193         if (p > uri_string && *p == ':') {
 194                 uri->scheme = soup_uri_parse_scheme (uri_string, p - uri_string);
 195                 uri_string = p + 1;
 196         }
 197
 198         if (uri_string == end && !base && !uri->fragment)
 199                 return uri;
 200
 201         /* Check for authority */
 202         if (strncmp (uri_string, "//", 2) == 0) {
 203                 uri_string += 2;
 204
 205                 path = uri_string + strcspn (uri_string, "/?#");
 206                 if (path > end)
 207                         path = end;
 208                 at = strchr (uri_string, '@');
 209                 if (at && at < path) {
 210                         colon = strchr (uri_string, ':');
 211                         if (colon && colon < at) {
 212                                 uri->password = uri_decoded_copy (colon + 1,
 213                                                                   at - colon - 1);
 214                         } else {
 215                                 uri->password = NULL;
 216                                 colon = at;
 217                         }
 218
 219                         uri->user = uri_decoded_copy (uri_string,
 220                                                       colon - uri_string);
 221                         uri_string = at + 1;
 222                 } else
 223                         uri->user = uri->password = NULL;
 224
 225                 /* Find host and port. */
 226                 if (*uri_string == '[') {
 227                         uri_string++;
 228                         hostend = strchr (uri_string, ']');
 229                         if (!hostend || hostend > path) {
 230                                 soup_uri_free (uri);
 231                                 return NULL;
 232                         }
 233                         if (*(hostend + 1) == ':')
 234                                 colon = hostend + 1;
 235                         else
 236                                 colon = NULL;
 237                 } else {
 238                         colon = memchr (uri_string, ':', path - uri_string);
 239                         hostend = colon ? colon : path;
 240                 }
 241
 242                 uri->host = uri_decoded_copy (uri_string, hostend - uri_string);
 243
 244                 if (colon && colon != path - 1) {
 245                         char *portend;
 246                         uri->port = strtoul (colon + 1, &portend, 10);
 247                         if (portend != (char *)path) {
 248                                 soup_uri_free (uri);
 249                                 return NULL;
 250                         }
 251                 }
 252
 253                 uri_string = path;
 254         }
 255
 256         /* Find query */
 257         question = memchr (uri_string, '?', end - uri_string);
 258         if (question) {
 259                 uri->query = uri_normalized_copy (question + 1,
 260                                                   end - (question + 1),
 261                                                   NULL);
 262                 end = question;
 263         }
 264
 265         if (end != uri_string) {
 266                 uri->path = uri_normalized_copy (uri_string, end - uri_string,
 267                                                  NULL);
 268         }
 269
 270         /* Apply base URI. This is spelled out in RFC 3986. */
 271         if (base && !uri->scheme && uri->host)
 272                 uri->scheme = base->scheme;
 273         else if (base && !uri->scheme) {
 274                 uri->scheme = base->scheme;
 275                 uri->user = g_strdup (base->user);
 276                 uri->password = g_strdup (base->password);
 277                 uri->host = g_strdup (base->host);
 278                 uri->port = base->port;
 279
 280                 if (!uri->path) {
 281                         uri->path = g_strdup (base->path);
 282                         if (!uri->query)
 283                                 uri->query = g_strdup (base->query);
 284                         remove_dot_segments = FALSE;
 285                 } else if (*uri->path != '/') {
 286                         char *newpath, *last;
 287
 288                         last = strrchr (base->path, '/');
 289                         if (last) {
 290                                 newpath = g_strdup_printf ("%.*s%s",
 291                                                            (int)(last + 1 - base->path),
 292                                                            base->path,
 293                                                            uri->path);
 294                         } else
 295                                 newpath = g_strdup_printf ("/%s", uri->path);
 296
 297                         g_free (uri->path);
 298                         uri->path = newpath;
 299                 }
 300         }
 301
 302         if (remove_dot_segments && uri->path && *uri->path) {
 303                 char *p, *q;
 304
 305                 /* Remove "./" where "." is a complete segment. */
 306                 for (p = uri->path + 1; *p; ) {
 307                         if (*(p - 1) == '/' &&
 308                             *p == '.' && *(p + 1) == '/')
 309                                 memmove (p, p + 2, strlen (p + 2) + 1);
 310                         else
 311                                 p++;
 312                 }
 313                 /* Remove "." at end. */
 314                 if (p > uri->path + 2 &&
 315                     *(p - 1) == '.' && *(p - 2) == '/')
 316                         *(p - 1) = '\0';
 317
 318                 /* Remove "<segment>/../" where <segment> != ".." */
 319                 for (p = uri->path + 1; *p; ) {
 320                         if (!strncmp (p, "../", 3)) {
 321                                 p += 3;
 322                                 continue;
 323                         }
 324                         q = strchr (p + 1, '/');
 325                         if (!q)
 326                                 break;
 327                         if (strncmp (q, "/../", 4) != 0) {
 328                                 p = q + 1;
 329                                 continue;
 330                         }
 331                         memmove (p, q + 4, strlen (q + 4) + 1);
 332                         p = uri->path + 1;
 333                 }
 334                 /* Remove "<segment>/.." at end where <segment> != ".." */
 335                 q = strrchr (uri->path, '/');
 336                 if (q && !strcmp (q, "/..")) {
 337                         p = q - 1;
 338                         while (p > uri->path && *p != '/')
 339                                 p--;
 340                         if (strncmp (p, "/../", 4) != 0)
 341                                 *(p + 1) = 0;
 342                 }
 343
 344                 /* Remove extraneous initial "/.."s */
 345                 while (!strncmp (uri->path, "/../", 4))
 346                         memmove (uri->path, uri->path + 3, strlen (uri->path) - 2);
 347                 if (!strcmp (uri->path, "/.."))
 348                         uri->path[1] = '\0';
 349         }
 350
 351         /* HTTP-specific stuff */
 352         if (uri->scheme == SOUP_URI_SCHEME_HTTP ||
 353             uri->scheme == SOUP_URI_SCHEME_HTTPS) {
 354                 if (!uri->path)
 355                         uri->path = g_strdup ("/");
 356                 if (!SOUP_URI_VALID_FOR_HTTP (uri)) {
 357                         soup_uri_free (uri);
 358                         return NULL;
 359                 }
 360         }
 361
 362         if (uri->scheme == SOUP_URI_SCHEME_FTP) {
 363                 if (!uri->host) {
 364                         soup_uri_free (uri);
 365                         return NULL;
 366                 }
 367         }
 368
 369         if (!uri->port)
 370                 uri->port = soup_scheme_default_port (uri->scheme);
 371         if (!uri->path)
 372                 uri->path = g_strdup ("");
 373
 374         return uri;
 375 }
 376
 377 /**
 378  * soup_uri_new:
 379  * @uri_string: a URI
 380  *
 381  * Parses an absolute URI.
 382  *
 383  * You can also pass %NULL for @uri_string if you want to get back an
 384  * "empty" #SoupURI that you can fill in by hand. (You will need to
 385  * call at least soup_uri_set_scheme() and soup_uri_set_path(), since
 386  * those fields are required.)
 387  *
 388  * Return value: a #SoupURI, or %NULL.
 389  **/
 390 SoupURI *
 391 soup_uri_new (const char *uri_string)
 392 {
 393         SoupURI *uri;
 394
 395         if (!uri_string)
 396                 return g_slice_new0 (SoupURI);
 397
 398         uri = soup_uri_new_with_base (NULL, uri_string);
 399         if (!uri)
 400                 return NULL;
 401         if (!uri->scheme) {
 402                 soup_uri_free (uri);
 403                 return NULL;
 404         }
 405
 406         return uri;
 407 }
 408
 409
 410 /**
 411  * soup_uri_to_string:
 412  * @uri: a #SoupURI
 413  * @just_path_and_query: if %TRUE, output just the path and query portions
 414  *
 415  * Returns a string representing @uri.
 416  *
 417  * If @just_path_and_query is %TRUE, this concatenates the path and query
 418  * together. That is, it constructs the string that would be needed in
 419  * the Request-Line of an HTTP request for @uri.
 420  *
 421  * Return value: a string representing @uri, which the caller must free.
 422  **/
 423 char *
 424 soup_uri_to_string (SoupURI *uri, gboolean just_path_and_query)
 425 {
 426         GString *str;
 427         char *return_result;
 428
 429         g_return_val_if_fail (uri != NULL, NULL);
 430
 431         /* IF YOU CHANGE ANYTHING IN THIS FUNCTION, RUN
 432          * tests/uri-parsing AFTERWARD.
 433          */
 434
 435         str = g_string_sized_new (20);
 436
 437         if (uri->scheme && !just_path_and_query)
 438                 g_string_append_printf (str, "%s:", uri->scheme);
 439         if (uri->host && !just_path_and_query) {
 440                 g_string_append (str, "//");
 441                 if (uri->user) {
 442                         append_uri_encoded (str, uri->user, ":;@?/");
 443                         g_string_append_c (str, '@');
 444                 }
 445                 if (strchr (uri->host, ':')) {
 446                         g_string_append_c (str, '[');
 447                         g_string_append (str, uri->host);
 448                         g_string_append_c (str, ']');
 449                 } else
 450                         append_uri_encoded (str, uri->host, ":/");
 451                 if (uri->port && uri->port != soup_scheme_default_port (uri->scheme))
 452                         g_string_append_printf (str, ":%u", uri->port);
 453                 if (!uri->path && (uri->query || uri->fragment))
 454                         g_string_append_c (str, '/');
 455         }
 456
 457         if (uri->path && *uri->path)
 458                 g_string_append (str, uri->path);
 459
 460         if (uri->query) {
 461                 g_string_append_c (str, '?');
 462                 g_string_append (str, uri->query);
 463         }
 464         if (uri->fragment && !just_path_and_query) {
 465                 g_string_append_c (str, '#');
 466                 g_string_append (str, uri->fragment);
 467         }
 468
 469         return_result = str->str;
 470         g_string_free (str, FALSE);
 471
 472         return return_result;
 473 }
 474
 475 /**
 476  * soup_uri_copy:
 477  * @uri: a #SoupURI
 478  *
 479  * Copies @uri
 480  *
 481  * Return value: a copy of @uri, which must be freed with soup_uri_free()
 482  **/
 483 SoupURI *
 484 soup_uri_copy (SoupURI *uri)
 485 {
 486         SoupURI *dup;
 487
 488         g_return_val_if_fail (uri != NULL, NULL);
 489
 490         dup = g_slice_new0 (SoupURI);
 491         dup->scheme   = uri->scheme;
 492         dup->user     = g_strdup (uri->user);
 493         dup->password = g_strdup (uri->password);
 494         dup->host     = g_strdup (uri->host);
 495         dup->port     = uri->port;
 496         dup->path     = g_strdup (uri->path);
 497         dup->query    = g_strdup (uri->query);
 498         dup->fragment = g_strdup (uri->fragment);
 499
 500         return dup;
 501 }
 502
 503 static inline gboolean
 504 parts_equal (const char *one, const char *two, gboolean insensitive)
 505 {
 506         if (!one && !two)
 507                 return TRUE;
 508         if (!one || !two)
 509                 return FALSE;
 510         return insensitive ? !g_ascii_strcasecmp (one, two) : !strcmp (one, two);
 511 }
 512
 513 /**
 514  * soup_uri_equal:
 515  * @uri1: a #SoupURI
 516  * @uri2: another #SoupURI
 517  *
 518  * Tests whether or not @uri1 and @uri2 are equal in all parts
 519  *
 520  * Return value: %TRUE or %FALSE
 521  **/
 522 gboolean
 523 soup_uri_equal (SoupURI *uri1, SoupURI *uri2)
 524 {
 525         if (uri1->scheme != uri2->scheme                         ||
 526             uri1->port   != uri2->port                           ||
 527             !parts_equal (uri1->user, uri2->user, FALSE)         ||
 528             !parts_equal (uri1->password, uri2->password, FALSE) ||
 529             !parts_equal (uri1->host, uri2->host, TRUE)          ||
 530             !parts_equal (uri1->path, uri2->path, FALSE)         ||
 531             !parts_equal (uri1->query, uri2->query, FALSE)       ||
 532             !parts_equal (uri1->fragment, uri2->fragment, FALSE))
 533                 return FALSE;
 534
 535         return TRUE;
 536 }
 537
 538 /**
 539  * soup_uri_free:
 540  * @uri: a #SoupURI
 541  *
 542  * Frees @uri.
 543  **/
 544 void
 545 soup_uri_free (SoupURI *uri)
 546 {
 547         g_return_if_fail (uri != NULL);
 548
 549         g_free (uri->user);
 550         g_free (uri->password);
 551         g_free (uri->host);
 552         g_free (uri->path);
 553         g_free (uri->query);
 554         g_free (uri->fragment);
 555
 556         g_slice_free (SoupURI, uri);
 557 }
 558
 559 static void
 560 append_uri_encoded (GString *str, const char *in, const char *extra_enc_chars)
 561 {
 562         const unsigned char *s = (const unsigned char *)in;
 563
 564         while (*s) {
 565                 if (soup_char_is_uri_percent_encoded (*s) ||
 566                     soup_char_is_uri_gen_delims (*s) ||
 567                     (extra_enc_chars && strchr (extra_enc_chars, *s)))
 568                         g_string_append_printf (str, "%%%02X", (int)*s++);
 569                 else
 570                         g_string_append_c (str, *s++);
 571         }
 572 }
 573
 574 /**
 575  * soup_uri_encode:
 576  * @part: a URI part
 577  * @escape_extra: (allow-none): additional reserved characters to
 578  * escape (or %NULL)
 579  *
 580  * This %<!-- -->-encodes the given URI part and returns the escaped
 581  * version in allocated memory, which the caller must free when it is
 582  * done.
 583  *
 584  * Return value: the encoded URI part
 585  **/
 586 char *
 587 soup_uri_encode (const char *part, const char *escape_extra)
 588 {
 589         GString *str;
 590         char *encoded;
 591
 592         str = g_string_new (NULL);
 593         append_uri_encoded (str, part, escape_extra);
 594         encoded = str->str;
 595         g_string_free (str, FALSE);
 596
 597         return encoded;
 598 }
 599
 600 #define XDIGIT(c) ((c) <= '9' ? (c) - '0' : ((c) & 0x4F) - 'A' + 10)
 601 #define HEXCHAR(s) ((XDIGIT (s[1]) << 4) + XDIGIT (s[2]))
 602
 603 char *
 604 uri_decoded_copy (const char *part, int length)
 605 {
 606         unsigned char *s, *d;
 607         char *decoded = g_strndup (part, length);
 608
 609         s = d = (unsigned char *)decoded;
 610         do {
 611                 if (*s == '%') {
 612                         if (!g_ascii_isxdigit (s[1]) ||
 613                             !g_ascii_isxdigit (s[2])) {
 614                                 *d++ = *s;
 615                                 continue;
 616                         }
 617                         *d++ = HEXCHAR (s);
 618                         s += 2;
 619                 } else
 620                         *d++ = *s;
 621         } while (*s++);
 622
 623         return decoded;
 624 }
 625
 626 /**
 627  * soup_uri_decode:
 628  * @part: a URI part
 629  *
 630  * Fully %<!-- -->-decodes @part.
 631  *
 632  * In the past, this would return %NULL if @part contained invalid
 633  * percent-encoding, but now it just ignores the problem (as
 634  * soup_uri_new() already did).
 635  *
 636  * Return value: the decoded URI part.
 637  */
 638 char *
 639 soup_uri_decode (const char *part)
 640 {
 641         return uri_decoded_copy (part, strlen (part));
 642 }
 643
 644 static char *
 645 uri_normalized_copy (const char *part, int length,
 646                      const char *unescape_extra)
 647 {
 648         unsigned char *s, *d, c;
 649         char *normalized = g_strndup (part, length);
 650         gboolean need_fixup = FALSE;
 651
 652         s = d = (unsigned char *)normalized;
 653         do {
 654                 if (*s == '%') {
 655                         if (!g_ascii_isxdigit (s[1]) ||
 656                             !g_ascii_isxdigit (s[2])) {
 657                                 *d++ = *s;
 658                                 continue;
 659                         }
 660
 661                         c = HEXCHAR (s);
 662                         if (soup_char_is_uri_unreserved (c) ||
 663                             (unescape_extra && strchr (unescape_extra, c))) {
 664                                 *d++ = c;
 665                                 s += 2;
 666                         } else {
 667                                 /* We leave it unchanged. We used to uppercase percent-encoded
 668                                  * triplets but we do not do it any more as RFC3986 Section 6.2.2.1
 669                                  * says that they only SHOULD be case normalized.
 670                                  */
 671                                 *d++ = *s++;
 672                                 *d++ = *s++;
 673                                 *d++ = *s;
 674                         }
 675                 } else {
 676                         if (*s == ' ')
 677                                 need_fixup = TRUE;
 678                         *d++ = *s;
 679                 }
 680         } while (*s++);
 681
 682         if (need_fixup) {
 683                 GString *fixed;
 684                 char *sp, *p;
 685
 686                 fixed = g_string_new (NULL);
 687                 p = normalized;
 688                 while ((sp = strchr (p, ' '))) {
 689                         g_string_append_len (fixed, p, sp - p);
 690                         g_string_append (fixed, "%20");
 691                         p = sp + 1;
 692                 }
 693                 g_string_append (fixed, p);
 694                 g_free (normalized);
 695                 normalized = g_string_free (fixed, FALSE);
 696         }
 697
 698         return normalized;
 699 }
 700
 701 /**
 702  * soup_uri_normalize:
 703  * @part: a URI part
 704  * @unescape_extra: reserved characters to unescape (or %NULL)
 705  *
 706  * %<!-- -->-decodes any "unreserved" characters (or characters in
 707  * @unescape_extra) in @part.
 708  *
 709  * "Unreserved" characters are those that are not allowed to be used
 710  * for punctuation according to the URI spec. For example, letters are
 711  * unreserved, so soup_uri_normalize() will turn
 712  * <literal>http://example.com/foo/b%<!-- -->61r</literal> into
 713  * <literal>http://example.com/foo/bar</literal>, which is guaranteed
 714  * to mean the same thing. However, "/" is "reserved", so
 715  * <literal>http://example.com/foo%<!-- -->2Fbar</literal> would not
 716  * be changed, because it might mean something different to the
 717  * server.
 718  *
 719  * In the past, this would return %NULL if @part contained invalid
 720  * percent-encoding, but now it just ignores the problem (as
 721  * soup_uri_new() already did).
 722  *
 723  * Return value: the normalized URI part
 724  */
 725 char *
 726 soup_uri_normalize (const char *part, const char *unescape_extra)
 727 {
 728         return uri_normalized_copy (part, strlen (part), unescape_extra);
 729 }
 730
 731
 732 /**
 733  * soup_uri_uses_default_port:
 734  * @uri: a #SoupURI
 735  *
 736  * Tests if @uri uses the default port for its scheme. (Eg, 80 for
 737  * http.) (This only works for http and https; libsoup does not know
 738  * the default ports of other protocols.)
 739  *
 740  * Return value: %TRUE or %FALSE
 741  **/
 742 gboolean
 743 soup_uri_uses_default_port (SoupURI *uri)
 744 {
 745         g_return_val_if_fail (uri->scheme == SOUP_URI_SCHEME_HTTP ||
 746                               uri->scheme == SOUP_URI_SCHEME_HTTPS ||
 747                               uri->scheme == SOUP_URI_SCHEME_FTP, FALSE);
 748
 749         return uri->port == soup_scheme_default_port (uri->scheme);
 750 }
 751
 752 /**
 753  * SOUP_URI_SCHEME_HTTP:
 754  *
 755  * "http" as an interned string. This can be compared directly against
 756  * the value of a #SoupURI's <structfield>scheme</structfield>
 757  **/
 758
 759 /**
 760  * SOUP_URI_SCHEME_HTTPS:
 761  *
 762  * "https" as an interned string. This can be compared directly
 763  * against the value of a #SoupURI's <structfield>scheme</structfield>
 764  **/
 765
 766 /**
 767  * soup_uri_get_scheme:
 768  * @uri: a #SoupURI
 769  *
 770  * Gets @uri's scheme.
 771  *
 772  * Return value: @uri's scheme.
 773  *
 774  * Since: 2.32
 775  **/
 776 const char *
 777 soup_uri_get_scheme (SoupURI *uri)
 778 {
 779         return uri->scheme;
 780 }
 781
 782 /**
 783  * soup_uri_set_scheme:
 784  * @uri: a #SoupURI
 785  * @scheme: the URI scheme
 786  *
 787  * Sets @uri's scheme to @scheme. This will also set @uri's port to
 788  * the default port for @scheme, if known.
 789  **/
 790 void
 791 soup_uri_set_scheme (SoupURI *uri, const char *scheme)
 792 {
 793         uri->scheme = soup_uri_parse_scheme (scheme, strlen (scheme));
 794         uri->port = soup_scheme_default_port (uri->scheme);
 795 }
 796
 797 /**
 798  * soup_uri_get_user:
 799  * @uri: a #SoupURI
 800  *
 801  * Gets @uri's user.
 802  *
 803  * Return value: @uri's user.
 804  *
 805  * Since: 2.32
 806  **/
 807 const char *
 808 soup_uri_get_user (SoupURI *uri)
 809 {
 810         return uri->user;
 811 }
 812
 813 /**
 814  * soup_uri_set_user:
 815  * @uri: a #SoupURI
 816  * @user: the username, or %NULL
 817  *
 818  * Sets @uri's user to @user.
 819  **/
 820 void
 821 soup_uri_set_user (SoupURI *uri, const char *user)
 822 {
 823         g_free (uri->user);
 824         uri->user = g_strdup (user);
 825 }
 826
 827 /**
 828  * soup_uri_get_password:
 829  * @uri: a #SoupURI
 830  *
 831  * Gets @uri's password.
 832  *
 833  * Return value: @uri's password.
 834  *
 835  * Since: 2.32
 836  **/
 837 const char *
 838 soup_uri_get_password (SoupURI *uri)
 839 {
 840         return uri->password;
 841 }
 842
 843 /**
 844  * soup_uri_set_password:
 845  * @uri: a #SoupURI
 846  * @password: the password, or %NULL
 847  *
 848  * Sets @uri's password to @password.
 849  **/
 850 void
 851 soup_uri_set_password (SoupURI *uri, const char *password)
 852 {
 853         g_free (uri->password);
 854         uri->password = g_strdup (password);
 855 }
 856
 857 /**
 858  * soup_uri_get_host:
 859  * @uri: a #SoupURI
 860  *
 861  * Gets @uri's host.
 862  *
 863  * Return value: @uri's host.
 864  *
 865  * Since: 2.32
 866  **/
 867 const char *
 868 soup_uri_get_host (SoupURI *uri)
 869 {
 870         return uri->host;
 871 }
 872
 873 /**
 874  * soup_uri_set_host:
 875  * @uri: a #SoupURI
 876  * @host: the hostname or IP address, or %NULL
 877  *
 878  * Sets @uri's host to @host.
 879  *
 880  * If @host is an IPv6 IP address, it should not include the brackets
 881  * required by the URI syntax; they will be added automatically when
 882  * converting @uri to a string.
 883  **/
 884 void
 885 soup_uri_set_host (SoupURI *uri, const char *host)
 886 {
 887         g_free (uri->host);
 888         uri->host = g_strdup (host);
 889 }
 890
 891 /**
 892  * soup_uri_get_port:
 893  * @uri: a #SoupURI
 894  *
 895  * Gets @uri's port.
 896  *
 897  * Return value: @uri's port.
 898  *
 899  * Since: 2.32
 900  **/
 901 guint
 902 soup_uri_get_port (SoupURI *uri)
 903 {
 904         return uri->port;
 905 }
 906
 907 /**
 908  * soup_uri_set_port:
 909  * @uri: a #SoupURI
 910  * @port: the port, or 0
 911  *
 912  * Sets @uri's port to @port. If @port is 0, @uri will not have an
 913  * explicitly-specified port.
 914  **/
 915 void
 916 soup_uri_set_port (SoupURI *uri, guint port)
 917 {
 918         uri->port = port;
 919 }
 920
 921 /**
 922  * soup_uri_get_path:
 923  * @uri: a #SoupURI
 924  *
 925  * Gets @uri's path.
 926  *
 927  * Return value: @uri's path.
 928  *
 929  * Since: 2.32
 930  **/
 931 const char *
 932 soup_uri_get_path (SoupURI *uri)
 933 {
 934         return uri->path;
 935 }
 936
 937 /**
 938  * soup_uri_set_path:
 939  * @uri: a #SoupURI
 940  * @path: the path
 941  *
 942  * Sets @uri's path to @path.
 943  **/
 944 void
 945 soup_uri_set_path (SoupURI *uri, const char *path)
 946 {
 947         g_free (uri->path);
 948         uri->path = g_strdup (path);
 949 }
 950
 951 /**
 952  * soup_uri_get_query:
 953  * @uri: a #SoupURI
 954  *
 955  * Gets @uri's query.
 956  *
 957  * Return value: @uri's query.
 958  *
 959  * Since: 2.32
 960  **/
 961 const char *
 962 soup_uri_get_query (SoupURI *uri)
 963 {
 964         return uri->query;
 965 }
 966
 967 /**
 968  * soup_uri_set_query:
 969  * @uri: a #SoupURI
 970  * @query: the query
 971  *
 972  * Sets @uri's query to @query.
 973  **/
 974 void
 975 soup_uri_set_query (SoupURI *uri, const char *query)
 976 {
 977         g_free (uri->query);
 978         uri->query = g_strdup (query);
 979 }
 980
 981 /**
 982  * soup_uri_set_query_from_form:
 983  * @uri: a #SoupURI
 984  * @form: (element-type utf8 utf8): a #GHashTable containing HTML form
 985  * information
 986  *
 987  * Sets @uri's query to the result of encoding @form according to the
 988  * HTML form rules. See soup_form_encode_hash() for more information.
 989  **/
 990 void
 991 soup_uri_set_query_from_form (SoupURI *uri, GHashTable *form)
 992 {
 993         g_free (uri->query);
 994         uri->query = soup_form_encode_hash (form);
 995 }
 996
 997 /**
 998  * soup_uri_set_query_from_fields:
 999  * @uri: a #SoupURI
1000  * @first_field: name of the first form field to encode into query
1001  * @...: value of @first_field, followed by additional field names
1002  * and values, terminated by %NULL.
1003  *
1004  * Sets @uri's query to the result of encoding the given form fields
1005  * and values according to the * HTML form rules. See
1006  * soup_form_encode() for more information.
1007  **/
1008 void
1009 soup_uri_set_query_from_fields (SoupURI    *uri,
1010                                 const char *first_field,
1011                                 ...)
1012 {
1013         va_list args;
1014
1015         g_free (uri->query);
1016         va_start (args, first_field);
1017         uri->query = soup_form_encode_valist (first_field, args);
1018         va_end (args);
1019 }
1020
1021 /**
1022  * soup_uri_get_fragment:
1023  * @uri: a #SoupURI
1024  *
1025  * Gets @uri's fragment.
1026  *
1027  * Return value: @uri's fragment.
1028  *
1029  * Since: 2.32
1030  **/
1031 const char *
1032 soup_uri_get_fragment (SoupURI *uri)
1033 {
1034         return uri->fragment;
1035 }
1036
1037 /**
1038  * soup_uri_set_fragment:
1039  * @uri: a #SoupURI
1040  * @fragment: the fragment
1041  *
1042  * Sets @uri's fragment to @fragment.
1043  **/
1044 void
1045 soup_uri_set_fragment (SoupURI *uri, const char *fragment)
1046 {
1047         g_free (uri->fragment);
1048         uri->fragment = g_strdup (fragment);
1049 }
1050
1051 /**
1052  * soup_uri_copy_host:
1053  * @uri: a #SoupUri
1054  *
1055  * Makes a copy of @uri, considering only the protocol, host, and port
1056  *
1057  * Return value: the new #SoupUri
1058  *
1059  * Since: 2.26.3
1060  **/
1061 SoupURI *
1062 soup_uri_copy_host (SoupURI *uri)
1063 {
1064         SoupURI *dup;
1065
1066         g_return_val_if_fail (uri != NULL, NULL);
1067
1068         dup = soup_uri_new (NULL);
1069         dup->scheme = uri->scheme;
1070         dup->host   = g_strdup (uri->host);
1071         dup->port   = uri->port;
1072         if (dup->scheme == SOUP_URI_SCHEME_HTTP ||
1073             dup->scheme == SOUP_URI_SCHEME_HTTPS)
1074                 dup->path = g_strdup ("");
1075
1076         return dup;
1077 }
1078
1079 /**
1080  * soup_uri_host_hash:
1081  * @key: (type Soup.URI): a #SoupURI
1082  *
1083  * Hashes @key, considering only the scheme, host, and port.
1084  *
1085  * Return value: a hash
1086  *
1087  * Since: 2.26.3
1088  **/
1089 guint
1090 soup_uri_host_hash (gconstpointer key)
1091 {
1092         const SoupURI *uri = key;
1093
1094         g_return_val_if_fail (uri != NULL && uri->host != NULL, 0);
1095
1096         return GPOINTER_TO_UINT (uri->scheme) + uri->port +
1097                 soup_str_case_hash (uri->host);
1098 }
1099
1100 /**
1101  * soup_uri_host_equal:
1102  * @v1: (type Soup.URI): a #SoupURI
1103  * @v2: (type Soup.URI): a #SoupURI
1104  *
1105  * Compares @v1 and @v2, considering only the scheme, host, and port.
1106  *
1107  * Return value: whether or not the URIs are equal in scheme, host,
1108  * and port.
1109  *
1110  * Since: 2.26.3
1111  **/
1112 gboolean
1113 soup_uri_host_equal (gconstpointer v1, gconstpointer v2)
1114 {
1115         const SoupURI *one = v1;
1116         const SoupURI *two = v2;
1117
1118         g_return_val_if_fail (one != NULL && two != NULL, one == two);
1119         g_return_val_if_fail (one->host != NULL && two->host != NULL, one->host == two->host);
1120
1121         if (one->scheme != two->scheme)
1122                 return FALSE;
1123         if (one->port != two->port)
1124                 return FALSE;
1125
1126         return g_ascii_strcasecmp (one->host, two->host) == 0;
1127 }
1128
1129
1130 GType
1131 soup_uri_get_type (void)
1132 {
1133         static volatile gsize type_volatile = 0;
1134
1135         if (g_once_init_enter (&type_volatile)) {
1136                 GType type = g_boxed_type_register_static (
1137                         g_intern_static_string ("SoupURI"),
1138                         (GBoxedCopyFunc) soup_uri_copy,
1139                         (GBoxedFreeFunc) soup_uri_free);
1140                 g_once_init_leave (&type_volatile, type);
1141         }
1142         return type_volatile;
1143 }