libsoup/soup-uri.c

   1 /* -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
   2 /* soup-uri.c : utility functions to parse URLs */
   3
   4 /*
   5  * Copyright 1999-2003 Ximian, Inc.
   6  */
   7
   8 #include <ctype.h>
   9 #include <string.h>
  10 #include <stdlib.h>
  11
  12 #include "soup-uri.h"
  13 #include "soup-form.h"
  14 #include "soup-misc.h"
  15
  16 /**
  17  * SECTION:soup-uri
  18  * @short_description: URIs
  19  *
  20  * A #SoupURI represents a (parsed) URI.
  21  *
  22  * Many applications will not need to use #SoupURI directly at all; on
  23  * the client side, soup_message_new() takes a stringified URI, and on
  24  * the server side, the path and query components are provided for you
  25  * in the server callback.
  26  **/
  27
  28 /**
  29  * SoupURI:
  30  * @scheme: the URI scheme (eg, "http")
  31  * @user: a username, or %NULL
  32  * @password: a password, or %NULL
  33  * @host: the hostname or IP address
  34  * @port: the port number on @host
  35  * @path: the path on @host
  36  * @query: a query for @path, or %NULL
  37  * @fragment: a fragment identifier within @path, or %NULL
  38  *
  39  * A #SoupURI represents a (parsed) URI. #SoupURI supports RFC 3986
  40  * (URI Generic Syntax), and can parse any valid URI. However, libsoup
  41  * only uses "http" and "https" URIs internally; You can use
  42  * SOUP_URI_VALID_FOR_HTTP() to test if a #SoupURI is a valid HTTP
  43  * URI.
  44  *
  45  * @scheme will always be set in any URI. It is an interned string and
  46  * is always all lowercase. (If you parse a URI with a non-lowercase
  47  * scheme, it will be converted to lowercase.) The macros
  48  * %SOUP_URI_SCHEME_HTTP and %SOUP_URI_SCHEME_HTTPS provide the
  49  * interned values for "http" and "https" and can be compared against
  50  * URI @scheme values.
  51  *
  52  * @user and @password are parsed as defined in the older URI specs
  53  * (ie, separated by a colon; RFC 3986 only talks about a single
  54  * "userinfo" field). Note that @password is not included in the
  55  * output of soup_uri_to_string(). libsoup does not normally use these
  56  * fields; authentication is handled via #SoupSession signals.
  57  *
  58  * @host contains the hostname, and @port the port specified in the
  59  * URI. If the URI doesn't contain a hostname, @host will be %NULL,
  60  * and if it doesn't specify a port, @port may be 0. However, for
  61  * "http" and "https" URIs, @host is guaranteed to be non-%NULL
  62  * (trying to parse an http URI with no @host will return %NULL), and
  63  * @port will always be non-0 (because libsoup knows the default value
  64  * to use when it is not specified in the URI).
  65  *
  66  * @path is always non-%NULL. For http/https URIs, @path will never be
  67  * an empty string either; if the input URI has no path, the parsed
  68  * #SoupURI will have a @path of "/".
  69  *
  70  * @query and @fragment are optional for all URI types.
  71  * soup_form_decode_urlencoded() may be useful for parsing @query.
  72  *
  73  * Note that @path, @query, and @fragment may contain
  74  * %<!-- -->-encoded characters. soup_uri_new() calls
  75  * soup_uri_normalize() on them, but not soup_uri_decode(). This is
  76  * necessary to ensure that soup_uri_to_string() will generate a URI
  77  * that has exactly the same meaning as the original. (In theory,
  78  * #SoupURI should leave @user, @password, and @host partially-encoded
  79  * as well, but this would be more annoying than useful.)
  80  **/
  81
  82 /**
  83  * SOUP_URI_VALID_FOR_HTTP:
  84  * @uri: a #SoupURI
  85  *
  86  * Tests if @uri is a valid #SoupURI for HTTP communication; that is, if
  87  * it can be used to construct a #SoupMessage.
  88  *
  89  * Return value: %TRUE if @uri is a valid "http" or "https" URI.
  90  *
  91  * Since: 2.24
  92  **/
  93
  94 static void append_uri_encoded (GString *str, const char *in, const char *extra_enc_chars);
  95 static char *uri_decoded_copy (const char *str, int length, gboolean fixup);
  96 static char *uri_normalized_copy (const char *str, int length, const char *unescape_extra, gboolean fixup);
  97
  98 gpointer _SOUP_URI_SCHEME_HTTP, _SOUP_URI_SCHEME_HTTPS;
  99 gpointer _SOUP_URI_SCHEME_FTP;
 100 gpointer _SOUP_URI_SCHEME_FILE, _SOUP_URI_SCHEME_DATA;
 101
 102 static inline const char *
 103 soup_uri_get_scheme (const char *scheme, int len)
 104 {
 105         if (len == 4 && !g_ascii_strncasecmp (scheme, "http", len)) {
 106                 return SOUP_URI_SCHEME_HTTP;
 107         } else if (len == 5 && !g_ascii_strncasecmp (scheme, "https", len)) {
 108                 return SOUP_URI_SCHEME_HTTPS;
 109         } else {
 110                 char *lower_scheme;
 111
 112                 lower_scheme = g_ascii_strdown (scheme, len);
 113                 scheme = g_intern_static_string (lower_scheme);
 114                 if (scheme != (const char *)lower_scheme)
 115                         g_free (lower_scheme);
 116                 return scheme;
 117         }
 118 }
 119
 120 static inline guint
 121 soup_scheme_default_port (const char *scheme)
 122 {
 123         if (scheme == SOUP_URI_SCHEME_HTTP)
 124                 return 80;
 125         else if (scheme == SOUP_URI_SCHEME_HTTPS)
 126                 return 443;
 127         else if (scheme == SOUP_URI_SCHEME_FTP)
 128                 return 21;
 129         else
 130                 return 0;
 131 }
 132
 133 /**
 134  * soup_uri_new_with_base:
 135  * @base: a base URI
 136  * @uri_string: the URI
 137  *
 138  * Parses @uri_string relative to @base.
 139  *
 140  * Return value: a parsed #SoupURI.
 141  **/
 142 SoupURI *
 143 soup_uri_new_with_base (SoupURI *base, const char *uri_string)
 144 {
 145         SoupURI *uri;
 146         const char *end, *hash, *colon, *at, *path, *question;
 147         const char *p, *hostend;
 148         gboolean remove_dot_segments = TRUE;
 149         int len;
 150
 151         /* First some cleanup steps (which are supposed to all be no-ops,
 152          * but...). Skip initial whitespace, strip out internal tabs and
 153          * line breaks, and ignore trailing whitespace.
 154          */
 155         while (g_ascii_isspace (*uri_string))
 156                 uri_string++;
 157
 158         len = strcspn (uri_string, "\t\n\r");
 159         if (uri_string[len]) {
 160                 char *clean = g_strdup (uri_string), *bad;
 161
 162                 while ((bad = strpbrk (clean, "\t\n\r")))
 163                         strcpy (bad, bad + 1);
 164                 uri = soup_uri_new_with_base (base, clean);
 165                 g_free (clean);
 166                 return uri;
 167         }
 168         end = uri_string + len;
 169         while (end > uri_string && g_ascii_isspace (end[-1]))
 170                 end--;
 171
 172         uri = g_slice_new0 (SoupURI);
 173
 174         /* Find fragment. */
 175         hash = strchr (uri_string, '#');
 176         if (hash) {
 177                 uri->fragment = uri_normalized_copy (hash + 1, end - hash + 1,
 178                                                      NULL, TRUE);
 179                 end = hash;
 180         }
 181
 182         /* Find scheme: initial [a-z+.-]* substring until ":" */
 183         p = uri_string;
 184         while (p < end && (g_ascii_isalnum (*p) ||
 185                            *p == '.' || *p == '+' || *p == '-'))
 186                 p++;
 187
 188         if (p > uri_string && *p == ':') {
 189                 uri->scheme = soup_uri_get_scheme (uri_string, p - uri_string);
 190                 uri_string = p + 1;
 191         }
 192
 193         if (uri_string == end && !base && !uri->fragment)
 194                 return uri;
 195
 196         /* Check for authority */
 197         if (strncmp (uri_string, "//", 2) == 0) {
 198                 uri_string += 2;
 199
 200                 path = uri_string + strcspn (uri_string, "/?#");
 201                 at = strchr (uri_string, '@');
 202                 if (at && at < path) {
 203                         colon = strchr (uri_string, ':');
 204                         if (colon && colon < at) {
 205                                 uri->password = uri_decoded_copy (colon + 1,
 206                                                                   at - colon - 1,
 207                                                                   TRUE);
 208                         } else {
 209                                 uri->password = NULL;
 210                                 colon = at;
 211                         }
 212
 213                         uri->user = uri_decoded_copy (uri_string,
 214                                                       colon - uri_string,
 215                                                       TRUE);
 216                         uri_string = at + 1;
 217                 } else
 218                         uri->user = uri->password = NULL;
 219
 220                 /* Find host and port. */
 221                 if (*uri_string == '[') {
 222                         uri_string++;
 223                         hostend = strchr (uri_string, ']');
 224                         if (!hostend || hostend > path) {
 225                                 soup_uri_free (uri);
 226                                 return NULL;
 227                         }
 228                         if (*(hostend + 1) == ':')
 229                                 colon = hostend + 1;
 230                         else
 231                                 colon = NULL;
 232                 } else {
 233                         colon = memchr (uri_string, ':', path - uri_string);
 234                         hostend = colon ? colon : path;
 235                 }
 236
 237                 uri->host = uri_decoded_copy (uri_string, hostend - uri_string,
 238                                               TRUE);
 239
 240                 if (colon && colon != path - 1) {
 241                         char *portend;
 242                         uri->port = strtoul (colon + 1, &portend, 10);
 243                         if (portend != (char *)path) {
 244                                 soup_uri_free (uri);
 245                                 return NULL;
 246                         }
 247                 }
 248
 249                 uri_string = path;
 250         }
 251
 252         /* Find query */
 253         question = memchr (uri_string, '?', end - uri_string);
 254         if (question) {
 255                 uri->query = uri_normalized_copy (question + 1,
 256                                                   end - (question + 1),
 257                                                   NULL, TRUE);
 258                 end = question;
 259         }
 260
 261         if (end != uri_string) {
 262                 uri->path = uri_normalized_copy (uri_string, end - uri_string,
 263                                                  NULL, TRUE);
 264         }
 265
 266         /* Apply base URI. This is spelled out in RFC 3986. */
 267         if (base && !uri->scheme && uri->host)
 268                 uri->scheme = base->scheme;
 269         else if (base && !uri->scheme) {
 270                 uri->scheme = base->scheme;
 271                 uri->user = g_strdup (base->user);
 272                 uri->password = g_strdup (base->password);
 273                 uri->host = g_strdup (base->host);
 274                 uri->port = base->port;
 275
 276                 if (!uri->path) {
 277                         uri->path = g_strdup (base->path);
 278                         if (!uri->query)
 279                                 uri->query = g_strdup (base->query);
 280                         remove_dot_segments = FALSE;
 281                 } else if (*uri->path != '/') {
 282                         char *newpath, *last;
 283
 284                         last = strrchr (base->path, '/');
 285                         if (last) {
 286                                 newpath = g_strdup_printf ("%.*s/%s",
 287                                                            (int)(last - base->path),
 288                                                            base->path,
 289                                                            uri->path);
 290                         } else
 291                                 newpath = g_strdup_printf ("/%s", uri->path);
 292
 293                         g_free (uri->path);
 294                         uri->path = newpath;
 295                 }
 296         }
 297
 298         if (remove_dot_segments && uri->path && *uri->path) {
 299                 char *p, *q;
 300
 301                 /* Remove "./" where "." is a complete segment. */
 302                 for (p = uri->path + 1; *p; ) {
 303                         if (*(p - 1) == '/' &&
 304                             *p == '.' && *(p + 1) == '/')
 305                                 memmove (p, p + 2, strlen (p + 2) + 1);
 306                         else
 307                                 p++;
 308                 }
 309                 /* Remove "." at end. */
 310                 if (p > uri->path + 2 &&
 311                     *(p - 1) == '.' && *(p - 2) == '/')
 312                         *(p - 1) = '\0';
 313
 314                 /* Remove "<segment>/../" where <segment> != ".." */
 315                 for (p = uri->path + 1; *p; ) {
 316                         if (!strncmp (p, "../", 3)) {
 317                                 p += 3;
 318                                 continue;
 319                         }
 320                         q = strchr (p + 1, '/');
 321                         if (!q)
 322                                 break;
 323                         if (strncmp (q, "/../", 4) != 0) {
 324                                 p = q + 1;
 325                                 continue;
 326                         }
 327                         memmove (p, q + 4, strlen (q + 4) + 1);
 328                         p = uri->path + 1;
 329                 }
 330                 /* Remove "<segment>/.." at end where <segment> != ".." */
 331                 q = strrchr (uri->path, '/');
 332                 if (q && !strcmp (q, "/..")) {
 333                         p = q - 1;
 334                         while (p > uri->path && *p != '/')
 335                                 p--;
 336                         if (strncmp (p, "/../", 4) != 0)
 337                                 *(p + 1) = 0;
 338                 }
 339
 340                 /* Remove extraneous initial "/.."s */
 341                 while (!strncmp (uri->path, "/../", 4))
 342                         memmove (uri->path, uri->path + 3, strlen (uri->path) - 2);
 343                 if (!strcmp (uri->path, "/.."))
 344                         uri->path[1] = '\0';
 345         }
 346
 347         /* HTTP-specific stuff */
 348         if (uri->scheme == SOUP_URI_SCHEME_HTTP ||
 349             uri->scheme == SOUP_URI_SCHEME_HTTPS) {
 350                 if (!uri->path)
 351                         uri->path = g_strdup ("/");
 352                 if (!SOUP_URI_VALID_FOR_HTTP (uri)) {
 353                         soup_uri_free (uri);
 354                         return NULL;
 355                 }
 356         }
 357
 358         if (uri->scheme == SOUP_URI_SCHEME_FTP) {
 359                 if (!uri->host) {
 360                         soup_uri_free (uri);
 361                         return NULL;
 362                 }
 363         }
 364
 365         if (!uri->port)
 366                 uri->port = soup_scheme_default_port (uri->scheme);
 367         if (!uri->path)
 368                 uri->path = g_strdup ("");
 369
 370         return uri;
 371 }
 372
 373 /**
 374  * soup_uri_new:
 375  * @uri_string: a URI
 376  *
 377  * Parses an absolute URI.
 378  *
 379  * You can also pass %NULL for @uri_string if you want to get back an
 380  * "empty" #SoupURI that you can fill in by hand. (You will need to
 381  * call at least soup_uri_set_scheme() and soup_uri_set_path(), since
 382  * those fields are required.)
 383  *
 384  * Return value: a #SoupURI, or %NULL.
 385  **/
 386 SoupURI *
 387 soup_uri_new (const char *uri_string)
 388 {
 389         SoupURI *uri;
 390
 391         if (!uri_string)
 392                 return g_slice_new0 (SoupURI);
 393
 394         uri = soup_uri_new_with_base (NULL, uri_string);
 395         if (!uri)
 396                 return NULL;
 397         if (!uri->scheme) {
 398                 soup_uri_free (uri);
 399                 return NULL;
 400         }
 401
 402         return uri;
 403 }
 404
 405
 406 /**
 407  * soup_uri_to_string:
 408  * @uri: a #SoupURI
 409  * @just_path_and_query: if %TRUE, output just the path and query portions
 410  *
 411  * Returns a string representing @uri.
 412  *
 413  * If @just_path_and_query is %TRUE, this concatenates the path and query
 414  * together. That is, it constructs the string that would be needed in
 415  * the Request-Line of an HTTP request for @uri.
 416  *
 417  * Return value: a string representing @uri, which the caller must free.
 418  **/
 419 char *
 420 soup_uri_to_string (SoupURI *uri, gboolean just_path_and_query)
 421 {
 422         GString *str;
 423         char *return_result;
 424
 425         g_return_val_if_fail (uri != NULL, NULL);
 426
 427         /* IF YOU CHANGE ANYTHING IN THIS FUNCTION, RUN
 428          * tests/uri-parsing AFTERWARD.
 429          */
 430
 431         str = g_string_sized_new (20);
 432
 433         if (uri->scheme && !just_path_and_query)
 434                 g_string_append_printf (str, "%s:", uri->scheme);
 435         if (uri->host && !just_path_and_query) {
 436                 g_string_append (str, "//");
 437                 if (uri->user) {
 438                         append_uri_encoded (str, uri->user, ":;@?/");
 439                         g_string_append_c (str, '@');
 440                 }
 441                 if (strchr (uri->host, ':')) {
 442                         g_string_append_c (str, '[');
 443                         g_string_append (str, uri->host);
 444                         g_string_append_c (str, ']');
 445                 } else
 446                         append_uri_encoded (str, uri->host, ":/");
 447                 if (uri->port && uri->port != soup_scheme_default_port (uri->scheme))
 448                         g_string_append_printf (str, ":%d", uri->port);
 449                 if (!uri->path && (uri->query || uri->fragment))
 450                         g_string_append_c (str, '/');
 451         }
 452
 453         if (uri->path && *uri->path)
 454                 g_string_append (str, uri->path);
 455
 456         if (uri->query) {
 457                 g_string_append_c (str, '?');
 458                 g_string_append (str, uri->query);
 459         }
 460         if (uri->fragment && !just_path_and_query) {
 461                 g_string_append_c (str, '#');
 462                 g_string_append (str, uri->fragment);
 463         }
 464
 465         return_result = str->str;
 466         g_string_free (str, FALSE);
 467
 468         return return_result;
 469 }
 470
 471 /**
 472  * soup_uri_copy:
 473  * @uri: a #SoupURI
 474  *
 475  * Copies @uri
 476  *
 477  * Return value: a copy of @uri, which must be freed with soup_uri_free()
 478  **/
 479 SoupURI *
 480 soup_uri_copy (SoupURI *uri)
 481 {
 482         SoupURI *dup;
 483
 484         g_return_val_if_fail (uri != NULL, NULL);
 485
 486         dup = g_slice_new0 (SoupURI);
 487         dup->scheme   = uri->scheme;
 488         dup->user     = g_strdup (uri->user);
 489         dup->password = g_strdup (uri->password);
 490         dup->host     = g_strdup (uri->host);
 491         dup->port     = uri->port;
 492         dup->path     = g_strdup (uri->path);
 493         dup->query    = g_strdup (uri->query);
 494         dup->fragment = g_strdup (uri->fragment);
 495
 496         return dup;
 497 }
 498
 499 static inline gboolean
 500 parts_equal (const char *one, const char *two, gboolean insensitive)
 501 {
 502         if (!one && !two)
 503                 return TRUE;
 504         if (!one || !two)
 505                 return FALSE;
 506         return insensitive ? !g_ascii_strcasecmp (one, two) : !strcmp (one, two);
 507 }
 508
 509 /**
 510  * soup_uri_equal:
 511  * @uri1: a #SoupURI
 512  * @uri2: another #SoupURI
 513  *
 514  * Tests whether or not @uri1 and @uri2 are equal in all parts
 515  *
 516  * Return value: %TRUE or %FALSE
 517  **/
 518 gboolean
 519 soup_uri_equal (SoupURI *uri1, SoupURI *uri2)
 520 {
 521         if (uri1->scheme != uri2->scheme                         ||
 522             uri1->port   != uri2->port                           ||
 523             !parts_equal (uri1->user, uri2->user, FALSE)         ||
 524             !parts_equal (uri1->password, uri2->password, FALSE) ||
 525             !parts_equal (uri1->host, uri2->host, TRUE)          ||
 526             !parts_equal (uri1->path, uri2->path, FALSE)         ||
 527             !parts_equal (uri1->query, uri2->query, FALSE)       ||
 528             !parts_equal (uri1->fragment, uri2->fragment, FALSE))
 529                 return FALSE;
 530
 531         return TRUE;
 532 }
 533
 534 /**
 535  * soup_uri_free:
 536  * @uri: a #SoupURI
 537  *
 538  * Frees @uri.
 539  **/
 540 void
 541 soup_uri_free (SoupURI *uri)
 542 {
 543         g_return_if_fail (uri != NULL);
 544
 545         g_free (uri->user);
 546         g_free (uri->password);
 547         g_free (uri->host);
 548         g_free (uri->path);
 549         g_free (uri->query);
 550         g_free (uri->fragment);
 551
 552         g_slice_free (SoupURI, uri);
 553 }
 554
 555 /* From RFC 3986 */
 556 #define SOUP_URI_UNRESERVED  0
 557 #define SOUP_URI_PCT_ENCODED 1
 558 #define SOUP_URI_GEN_DELIMS  2
 559 #define SOUP_URI_SUB_DELIMS  4
 560 static const char uri_encoded_char[] = {
 561         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,  /* 0x00 - 0x0f */
 562         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,  /* 0x10 - 0x1f */
 563         1, 4, 1, 2, 4, 1, 4, 4, 4, 4, 4, 4, 4, 0, 0, 2,  /*  !"#$%&'()*+,-./ */
 564         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 4, 1, 4, 1, 2,  /* 0123456789:;<=>? */
 565         2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  /* @ABCDEFGHIJKLMNO */
 566         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 1, 2, 1, 0,  /* PQRSTUVWXYZ[\]^_ */
 567         1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  /* `abcdefghijklmno */
 568         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 1,  /* pqrstuvwxyz{|}~  */
 569         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
 570         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
 571         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
 572         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
 573         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
 574         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
 575         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
 576         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
 577 };
 578
 579 static void
 580 append_uri_encoded (GString *str, const char *in, const char *extra_enc_chars)
 581 {
 582         const unsigned char *s = (const unsigned char *)in;
 583
 584         while (*s) {
 585                 if ((uri_encoded_char[*s] & (SOUP_URI_PCT_ENCODED | SOUP_URI_GEN_DELIMS)) ||
 586                     (extra_enc_chars && strchr (extra_enc_chars, *s)))
 587                         g_string_append_printf (str, "%%%02X", (int)*s++);
 588                 else
 589                         g_string_append_c (str, *s++);
 590         }
 591 }
 592
 593 /**
 594  * soup_uri_encode:
 595  * @part: a URI part
 596  * @escape_extra: additional reserved characters to escape (or %NULL)
 597  *
 598  * This %<!-- -->-encodes the given URI part and returns the escaped
 599  * version in allocated memory, which the caller must free when it is
 600  * done.
 601  *
 602  * Return value: the encoded URI part
 603  **/
 604 char *
 605 soup_uri_encode (const char *part, const char *escape_extra)
 606 {
 607         GString *str;
 608         char *encoded;
 609
 610         str = g_string_new (NULL);
 611         append_uri_encoded (str, part, escape_extra);
 612         encoded = str->str;
 613         g_string_free (str, FALSE);
 614
 615         return encoded;
 616 }
 617
 618 #define XDIGIT(c) ((c) <= '9' ? (c) - '0' : ((c) & 0x4F) - 'A' + 10)
 619 #define HEXCHAR(s) ((XDIGIT (s[1]) << 4) + XDIGIT (s[2]))
 620
 621 static char *
 622 uri_decoded_copy (const char *part, int length, gboolean fixup)
 623 {
 624         unsigned char *s, *d;
 625         char *decoded = g_strndup (part, length);
 626
 627         s = d = (unsigned char *)decoded;
 628         do {
 629                 if (*s == '%') {
 630                         if (!g_ascii_isxdigit (s[1]) ||
 631                             !g_ascii_isxdigit (s[2])) {
 632                                 if (!fixup) {
 633                                         g_free (decoded);
 634                                         return NULL;
 635                                 }
 636                                 *d++ = *s;
 637                                 continue;
 638                         }
 639                         *d++ = HEXCHAR (s);
 640                         s += 2;
 641                 } else
 642                         *d++ = *s;
 643         } while (*s++);
 644
 645         return decoded;
 646 }
 647
 648 /**
 649  * soup_uri_decode:
 650  * @part: a URI part
 651  *
 652  * Fully %<!-- -->-decodes @part.
 653  *
 654  * Return value: the decoded URI part, or %NULL if an invalid percent
 655  * code was encountered.
 656  */
 657 char *
 658 soup_uri_decode (const char *part)
 659 {
 660         return uri_decoded_copy (part, strlen (part), FALSE);
 661 }
 662
 663 static char *
 664 uri_normalized_copy (const char *part, int length,
 665                      const char *unescape_extra, gboolean fixup)
 666 {
 667         unsigned char *s, *d, c;
 668         char *normalized = g_strndup (part, length);
 669         gboolean need_fixup = FALSE;
 670
 671         s = d = (unsigned char *)normalized;
 672         do {
 673                 if (*s == '%') {
 674                         if (!g_ascii_isxdigit (s[1]) ||
 675                             !g_ascii_isxdigit (s[2])) {
 676                                 if (!fixup) {
 677                                         g_free (normalized);
 678                                         return NULL;
 679                                 }
 680                                 *d++ = *s;
 681                                 continue;
 682                         }
 683
 684                         c = HEXCHAR (s);
 685                         if (uri_encoded_char[c] == SOUP_URI_UNRESERVED ||
 686                             (unescape_extra && strchr (unescape_extra, c))) {
 687                                 *d++ = c;
 688                                 s += 2;
 689                         } else {
 690                                 *d++ = *s++;
 691                                 *d++ = g_ascii_toupper (*s++);
 692                                 *d++ = g_ascii_toupper (*s);
 693                         }
 694                 } else {
 695                         if (*s == ' ')
 696                                 need_fixup = TRUE;
 697                         *d++ = *s;
 698                 }
 699         } while (*s++);
 700
 701         if (fixup && need_fixup) {
 702                 char *tmp, *sp;
 703                 /* This code is lame, but so are people who put
 704                  * unencoded spaces in URLs!
 705                  */
 706                 while ((sp = strchr (normalized, ' '))) {
 707                         tmp = g_strdup_printf ("%.*s%%20%s",
 708                                                (int)(sp - normalized),
 709                                                normalized, sp + 1);
 710                         g_free (normalized);
 711                         normalized = tmp;
 712                 };
 713         }
 714
 715         return normalized;
 716 }
 717
 718 /**
 719  * soup_uri_normalize:
 720  * @part: a URI part
 721  * @unescape_extra: reserved characters to unescape (or %NULL)
 722  *
 723  * %<!-- -->-decodes any "unreserved" characters (or characters in
 724  * @unescape_extra) in @part.
 725  *
 726  * "Unreserved" characters are those that are not allowed to be used
 727  * for punctuation according to the URI spec. For example, letters are
 728  * unreserved, so soup_uri_normalize() will turn
 729  * <literal>http://example.com/foo/b%<!-- -->61r</literal> into
 730  * <literal>http://example.com/foo/bar</literal>, which is guaranteed
 731  * to mean the same thing. However, "/" is "reserved", so
 732  * <literal>http://example.com/foo%<!-- -->2Fbar</literal> would not
 733  * be changed, because it might mean something different to the
 734  * server.
 735  *
 736  * Return value: the normalized URI part, or %NULL if an invalid percent
 737  * code was encountered.
 738  */
 739 char *
 740 soup_uri_normalize (const char *part, const char *unescape_extra)
 741 {
 742         return uri_normalized_copy (part, strlen (part), unescape_extra, FALSE);
 743 }
 744
 745
 746 /**
 747  * soup_uri_uses_default_port:
 748  * @uri: a #SoupURI
 749  *
 750  * Tests if @uri uses the default port for its scheme. (Eg, 80 for
 751  * http.) (This only works for http and https; libsoup does not know
 752  * the default ports of other protocols.)
 753  *
 754  * Return value: %TRUE or %FALSE
 755  **/
 756 gboolean
 757 soup_uri_uses_default_port (SoupURI *uri)
 758 {
 759         g_return_val_if_fail (uri->scheme == SOUP_URI_SCHEME_HTTP ||
 760                               uri->scheme == SOUP_URI_SCHEME_HTTPS ||
 761                               uri->scheme == SOUP_URI_SCHEME_FTP, FALSE);
 762
 763         return uri->port == soup_scheme_default_port (uri->scheme);
 764 }
 765
 766 /**
 767  * SOUP_URI_SCHEME_HTTP:
 768  *
 769  * "http" as an interned string. This can be compared directly against
 770  * the value of a #SoupURI's <structfield>scheme</structfield>
 771  **/
 772
 773 /**
 774  * SOUP_URI_SCHEME_HTTPS:
 775  *
 776  * "https" as an interned string. This can be compared directly
 777  * against the value of a #SoupURI's <structfield>scheme</structfield>
 778  **/
 779
 780 /**
 781  * soup_uri_set_scheme:
 782  * @uri: a #SoupURI
 783  * @scheme: the URI scheme
 784  *
 785  * Sets @uri's scheme to @scheme. This will also set @uri's port to
 786  * the default port for @scheme, if known.
 787  **/
 788 void
 789 soup_uri_set_scheme (SoupURI *uri, const char *scheme)
 790 {
 791         uri->scheme = soup_uri_get_scheme (scheme, strlen (scheme));
 792         uri->port = soup_scheme_default_port (uri->scheme);
 793 }
 794
 795 /**
 796  * soup_uri_set_user:
 797  * @uri: a #SoupURI
 798  * @user: the username, or %NULL
 799  *
 800  * Sets @uri's user to @user.
 801  **/
 802 void
 803 soup_uri_set_user (SoupURI *uri, const char *user)
 804 {
 805         g_free (uri->user);
 806         uri->user = g_strdup (user);
 807 }
 808
 809 /**
 810  * soup_uri_set_password:
 811  * @uri: a #SoupURI
 812  * @password: the password, or %NULL
 813  *
 814  * Sets @uri's password to @password.
 815  **/
 816 void
 817 soup_uri_set_password (SoupURI *uri, const char *password)
 818 {
 819         g_free (uri->password);
 820         uri->password = g_strdup (password);
 821 }
 822
 823 /**
 824  * soup_uri_set_host:
 825  * @uri: a #SoupURI
 826  * @host: the hostname or IP address, or %NULL
 827  *
 828  * Sets @uri's host to @host.
 829  *
 830  * If @host is an IPv6 IP address, it should not include the brackets
 831  * required by the URI syntax; they will be added automatically when
 832  * converting @uri to a string.
 833  **/
 834 void
 835 soup_uri_set_host (SoupURI *uri, const char *host)
 836 {
 837         g_free (uri->host);
 838         uri->host = g_strdup (host);
 839 }
 840
 841 /**
 842  * soup_uri_set_port:
 843  * @uri: a #SoupURI
 844  * @port: the port, or 0
 845  *
 846  * Sets @uri's port to @port. If @port is 0, @uri will not have an
 847  * explicitly-specified port.
 848  **/
 849 void
 850 soup_uri_set_port (SoupURI *uri, guint port)
 851 {
 852         uri->port = port;
 853 }
 854
 855 /**
 856  * soup_uri_set_path:
 857  * @uri: a #SoupURI
 858  * @path: the path
 859  *
 860  * Sets @uri's path to @path.
 861  **/
 862 void
 863 soup_uri_set_path (SoupURI *uri, const char *path)
 864 {
 865         g_free (uri->path);
 866         uri->path = g_strdup (path);
 867 }
 868
 869 /**
 870  * soup_uri_set_query:
 871  * @uri: a #SoupURI
 872  * @query: the query
 873  *
 874  * Sets @uri's query to @query.
 875  **/
 876 void
 877 soup_uri_set_query (SoupURI *uri, const char *query)
 878 {
 879         g_free (uri->query);
 880         uri->query = g_strdup (query);
 881 }
 882
 883 /**
 884  * soup_uri_set_query_from_form:
 885  * @uri: a #SoupURI
 886  * @form: a #GHashTable containing HTML form information
 887  *
 888  * Sets @uri's query to the result of encoding @form according to the
 889  * HTML form rules. See soup_form_encode_hash() for more information.
 890  **/
 891 void
 892 soup_uri_set_query_from_form (SoupURI *uri, GHashTable *form)
 893 {
 894         g_free (uri->query);
 895         uri->query = soup_form_encode_urlencoded (form);
 896 }
 897
 898 /**
 899  * soup_uri_set_query_from_fields:
 900  * @uri: a #SoupURI
 901  * @first_field: name of the first form field to encode into query
 902  * @...: value of @first_field, followed by additional field names
 903  * and values, terminated by %NULL.
 904  *
 905  * Sets @uri's query to the result of encoding the given form fields
 906  * and values according to the * HTML form rules. See
 907  * soup_form_encode() for more information.
 908  **/
 909 void
 910 soup_uri_set_query_from_fields (SoupURI    *uri,
 911                                 const char *first_field,
 912                                 ...)
 913 {
 914         va_list args;
 915
 916         g_free (uri->query);
 917         va_start (args, first_field);
 918         uri->query = soup_form_encode_valist (first_field, args);
 919         va_end (args);
 920 }
 921
 922 /**
 923  * soup_uri_set_fragment:
 924  * @uri: a #SoupURI
 925  * @fragment: the fragment
 926  *
 927  * Sets @uri's fragment to @fragment.
 928  **/
 929 void
 930 soup_uri_set_fragment (SoupURI *uri, const char *fragment)
 931 {
 932         g_free (uri->fragment);
 933         uri->fragment = g_strdup (fragment);
 934 }
 935
 936 /**
 937  * soup_uri_copy_host:
 938  * @uri: a #SoupUri
 939  *
 940  * Makes a copy of @uri, considering only the protocol, host, and port
 941  *
 942  * Return value: the new #SoupUri
 943  *
 944  * Since: 2.26.3
 945  **/
 946 SoupURI *
 947 soup_uri_copy_host (SoupURI *uri)
 948 {
 949         SoupURI *dup;
 950
 951         g_return_val_if_fail (uri != NULL, NULL);
 952
 953         dup = soup_uri_new (NULL);
 954         dup->scheme = uri->scheme;
 955         dup->host   = g_strdup (uri->host);
 956         dup->port   = uri->port;
 957         if (dup->scheme == SOUP_URI_SCHEME_HTTP ||
 958             dup->scheme == SOUP_URI_SCHEME_HTTPS)
 959                 dup->path = g_strdup ("");
 960
 961         return dup;
 962 }
 963
 964 /**
 965  * soup_uri_host_hash:
 966  * @key: a #SoupURI
 967  *
 968  * Hashes @key, considering only the scheme, host, and port.
 969  *
 970  * Return value: a hash
 971  *
 972  * Since: 2.26.3
 973  **/
 974 guint
 975 soup_uri_host_hash (gconstpointer key)
 976 {
 977         const SoupURI *uri = key;
 978
 979         g_return_val_if_fail (uri != NULL && uri->host != NULL, 0);
 980
 981         return GPOINTER_TO_UINT (uri->scheme) + uri->port +
 982                 soup_str_case_hash (uri->host);
 983 }
 984
 985 /**
 986  * soup_uri_host_equal:
 987  * @v1: a #SoupURI
 988  * @v2: a #SoupURI
 989  *
 990  * Compares @v1 and @v2, considering only the scheme, host, and port.
 991  *
 992  * Return value: whether or not the URIs are equal in scheme, host,
 993  * and port.
 994  *
 995  * Since: 2.26.3
 996  **/
 997 gboolean
 998 soup_uri_host_equal (gconstpointer v1, gconstpointer v2)
 999 {
1000         const SoupURI *one = v1;
1001         const SoupURI *two = v2;
1002
1003         g_return_val_if_fail (one != NULL && two != NULL, one == two);
1004         g_return_val_if_fail (one->host != NULL && two->host != NULL, one->host == two->host);
1005
1006         if (one->scheme != two->scheme)
1007                 return FALSE;
1008         if (one->port != two->port)
1009                 return FALSE;
1010
1011         return g_ascii_strcasecmp (one->host, two->host) == 0;
1012 }
1013
1014
1015 GType
1016 soup_uri_get_type (void)
1017 {
1018         static volatile gsize type_volatile = 0;
1019
1020         if (g_once_init_enter (&type_volatile)) {
1021                 GType type = g_boxed_type_register_static (
1022                         g_intern_static_string ("SoupURI"),
1023                         (GBoxedCopyFunc) soup_uri_copy,
1024                         (GBoxedFreeFunc) soup_uri_free);
1025                 g_once_init_leave (&type_volatile, type);
1026         }
1027         return type_volatile;
1028 }