1 /* -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
2 /* soup-uri.c : utility functions to parse URLs */
5 * Copyright 1999-2003 Ximian, Inc.
14 static void append_uri_encoded (GString *str, const char *in, const char *extra_enc_chars);
16 static inline SoupProtocol
17 soup_uri_get_protocol (const char *proto, int len)
21 g_return_val_if_fail (len < sizeof (proto_buf), 0);
23 memcpy (proto_buf, proto, len);
24 proto_buf[len] = '\0';
25 return g_quark_from_string (proto_buf);
28 static inline const char *
29 soup_protocol_name (SoupProtocol proto)
31 return g_quark_to_string (proto);
35 soup_protocol_default_port (SoupProtocol proto)
37 if (proto == SOUP_PROTOCOL_HTTP)
39 else if (proto == SOUP_PROTOCOL_HTTPS)
46 * soup_uri_new_with_base:
48 * @uri_string: the URI
50 * Parses @uri_string relative to @base.
52 * Return value: a parsed #SoupUri.
55 soup_uri_new_with_base (const SoupUri *base, const char *uri_string)
58 const char *end, *hash, *colon, *at, *slash, *question;
61 uri = g_new0 (SoupUri, 1);
63 /* See RFC2396 for details. IF YOU CHANGE ANYTHING IN THIS
64 * FUNCTION, RUN tests/uri-parsing AFTERWARDS.
68 end = hash = strchr (uri_string, '#');
69 if (hash && hash[1]) {
70 uri->fragment = g_strdup (hash + 1);
71 soup_uri_decode (uri->fragment);
73 end = uri_string + strlen (uri_string);
75 /* Find protocol: initial [a-z+.-]* substring until ":" */
77 while (p < end && (isalnum ((unsigned char)*p) ||
78 *p == '.' || *p == '+' || *p == '-'))
81 if (p > uri_string && *p == ':') {
82 uri->protocol = soup_uri_get_protocol (uri_string, p - uri_string);
90 if (!*uri_string && !base)
93 /* Check for authority */
94 if (strncmp (uri_string, "//", 2) == 0) {
97 slash = uri_string + strcspn (uri_string, "/#");
98 at = strchr (uri_string, '@');
99 if (at && at < slash) {
100 colon = strchr (uri_string, ':');
101 if (colon && colon < at) {
102 uri->passwd = g_strndup (colon + 1,
104 soup_uri_decode (uri->passwd);
110 uri->user = g_strndup (uri_string, colon - uri_string);
111 soup_uri_decode (uri->user);
114 uri->user = uri->passwd = NULL;
116 /* Find host and port. */
117 colon = strchr (uri_string, ':');
118 if (colon && colon < slash) {
119 uri->host = g_strndup (uri_string, colon - uri_string);
120 uri->port = strtoul (colon + 1, NULL, 10);
122 uri->host = g_strndup (uri_string, slash - uri_string);
123 soup_uri_decode (uri->host);
130 question = memchr (uri_string, '?', end - uri_string);
133 uri->query = g_strndup (question + 1,
134 end - (question + 1));
135 soup_uri_decode (uri->query);
140 if (end != uri_string) {
141 uri->path = g_strndup (uri_string, end - uri_string);
142 soup_uri_decode (uri->path);
145 /* Apply base URI. Again, this is spelled out in RFC 2396. */
146 if (base && !uri->protocol && uri->host)
147 uri->protocol = base->protocol;
148 else if (base && !uri->protocol) {
149 uri->protocol = base->protocol;
150 uri->user = g_strdup (base->user);
151 uri->passwd = g_strdup (base->passwd);
152 uri->host = g_strdup (base->host);
153 uri->port = base->port;
157 uri->path = g_strdup ("");
159 uri->path = g_strdup (base->path);
160 uri->query = g_strdup (base->query);
164 if (*uri->path != '/') {
165 char *newpath, *last, *p, *q;
167 last = strrchr (base->path, '/');
169 newpath = g_strdup_printf ("%.*s/%s",
174 newpath = g_strdup_printf ("/%s", uri->path);
176 /* Remove "./" where "." is a complete segment. */
177 for (p = newpath + 1; *p; ) {
178 if (*(p - 1) == '/' &&
179 *p == '.' && *(p + 1) == '/')
180 memmove (p, p + 2, strlen (p + 2) + 1);
184 /* Remove "." at end. */
185 if (p > newpath + 2 &&
186 *(p - 1) == '.' && *(p - 2) == '/')
188 /* Remove "<segment>/../" where <segment> != ".." */
189 for (p = newpath + 1; *p; ) {
190 if (!strncmp (p, "../", 3)) {
194 q = strchr (p + 1, '/');
197 if (strncmp (q, "/../", 4) != 0) {
201 memmove (p, q + 4, strlen (q + 4) + 1);
204 /* Remove "<segment>/.." at end where <segment> != ".." */
205 q = strrchr (newpath, '/');
206 if (q && !strcmp (q, "/..")) {
208 while (p > newpath && *p != '/')
210 if (strncmp (p, "/../", 4) != 0)
220 if ((uri->protocol == SOUP_PROTOCOL_HTTP ||
221 uri->protocol == SOUP_PROTOCOL_HTTPS) && !uri->host) {
227 uri->port = soup_protocol_default_port (uri->protocol);
229 uri->path = g_strdup ("");
238 * Parses an absolute URI.
240 * Return value: a #SoupUri, or %NULL.
243 soup_uri_new (const char *uri_string)
247 uri = soup_uri_new_with_base (NULL, uri_string);
250 if (!uri->protocol) {
260 append_uri (GString *str, const char *in, const char *extra_enc_chars,
261 gboolean pre_encoded)
264 g_string_append (str, in);
266 append_uri_encoded (str, in, extra_enc_chars);
270 * soup_uri_to_string:
272 * @just_path: if %TRUE, output just the path and query portions
274 * Returns a string representing @uri.
276 * Return value: a string representing @uri, which the caller must free.
279 soup_uri_to_string (const SoupUri *uri, gboolean just_path)
283 gboolean pre_encoded = uri->broken_encoding;
285 /* IF YOU CHANGE ANYTHING IN THIS FUNCTION, RUN
286 * tests/uri-parsing AFTERWARD.
289 str = g_string_sized_new (20);
291 if (uri->protocol && !just_path)
292 g_string_sprintfa (str, "%s:", soup_protocol_name (uri->protocol));
293 if (uri->host && !just_path) {
294 g_string_append (str, "//");
296 append_uri (str, uri->user, ":;@/", pre_encoded);
297 g_string_append_c (str, '@');
299 append_uri (str, uri->host, ":/", pre_encoded);
300 if (uri->port && uri->port != soup_protocol_default_port (uri->protocol))
301 g_string_append_printf (str, ":%d", uri->port);
302 if (!uri->path && (uri->query || uri->fragment))
303 g_string_append_c (str, '/');
306 if (uri->path && *uri->path)
307 append_uri (str, uri->path, "?", pre_encoded);
309 g_string_append_c (str, '/');
312 g_string_append_c (str, '?');
313 append_uri (str, uri->query, NULL, pre_encoded);
315 if (uri->fragment && !just_path) {
316 g_string_append_c (str, '#');
317 append_uri (str, uri->fragment, NULL, pre_encoded);
320 return_result = str->str;
321 g_string_free (str, FALSE);
323 return return_result;
332 * Return value: a copy of @uri, which must be freed with soup_uri_free()
335 soup_uri_copy (const SoupUri *uri)
339 g_return_val_if_fail (uri != NULL, NULL);
341 dup = g_new0 (SoupUri, 1);
342 dup->protocol = uri->protocol;
343 dup->user = g_strdup (uri->user);
344 dup->passwd = g_strdup (uri->passwd);
345 dup->host = g_strdup (uri->host);
346 dup->port = uri->port;
347 dup->path = g_strdup (uri->path);
348 dup->query = g_strdup (uri->query);
349 dup->fragment = g_strdup (uri->fragment);
351 dup->broken_encoding = uri->broken_encoding;
357 * soup_uri_copy_root:
360 * Copies the protocol, host, and port of @uri into a new #SoupUri
361 * (all other fields in the new URI will be empty.)
363 * Return value: a partial copy of @uri, which must be freed with
367 soup_uri_copy_root (const SoupUri *uri)
371 g_return_val_if_fail (uri != NULL, NULL);
373 dup = g_new0 (SoupUri, 1);
374 dup->protocol = uri->protocol;
375 dup->host = g_strdup (uri->host);
376 dup->port = uri->port;
381 static inline gboolean
382 parts_equal (const char *one, const char *two)
388 return !strcmp (one, two);
394 * @uri2: another #SoupUri
396 * Tests whether or not @uri1 and @uri2 are equal in all parts
398 * Return value: %TRUE or %FALSE
401 soup_uri_equal (const SoupUri *uri1, const SoupUri *uri2)
403 if (uri1->protocol != uri2->protocol ||
404 uri1->port != uri2->port ||
405 !parts_equal (uri1->user, uri2->user) ||
406 !parts_equal (uri1->passwd, uri2->passwd) ||
407 !parts_equal (uri1->host, uri2->host) ||
408 !parts_equal (uri1->path, uri2->path) ||
409 !parts_equal (uri1->query, uri2->query) ||
410 !parts_equal (uri1->fragment, uri2->fragment))
423 soup_uri_free (SoupUri *uri)
425 g_return_if_fail (uri != NULL);
428 g_free (uri->passwd);
432 g_free (uri->fragment);
437 /* From RFC 2396 2.4.3, the characters that should always be encoded */
438 static const char uri_encoded_char[] = {
439 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x00 - 0x0f */
440 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x10 - 0x1f */
441 1, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* ' ' - '/' */
442 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, /* '0' - '?' */
443 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* '@' - 'O' */
444 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, /* 'P' - '_' */
445 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* '`' - 'o' */
446 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 1, /* 'p' - 0x7f */
447 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
448 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
449 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
450 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
451 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
452 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
453 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
454 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
458 append_uri_encoded (GString *str, const char *in, const char *extra_enc_chars)
460 const unsigned char *s = (const unsigned char *)in;
463 if (uri_encoded_char[*s] ||
464 (extra_enc_chars && strchr (extra_enc_chars, *s)))
465 g_string_append_printf (str, "%%%02x", (int)*s++);
467 g_string_append_c (str, *s++);
474 * @escape_extra: additional characters beyond " \"%#<>{}|\^[]`"
475 * to escape (or %NULL)
477 * This %-encodes the given URI part and returns the escaped version
478 * in allocated memory, which the caller must free when it is done.
480 * Return value: the encoded URI part
483 soup_uri_encode (const char *part, const char *escape_extra)
488 str = g_string_new (NULL);
489 append_uri_encoded (str, part, escape_extra);
491 g_string_free (str, FALSE);
500 * %-decodes the passed-in URI *in place*. The decoded version is
501 * never longer than the encoded version, so there does not need to
502 * be any additional space at the end of the string.
505 soup_uri_decode (char *part)
507 unsigned char *s, *d;
509 #define XDIGIT(c) ((c) <= '9' ? (c) - '0' : ((c) & 0x4F) - 'A' + 10)
511 s = d = (unsigned char *)part;
513 if (*s == '%' && s[1] && s[2]) {
514 *d++ = (XDIGIT (s[1]) << 4) + XDIGIT (s[2]);
522 * soup_uri_uses_default_port:
525 * Tests if @uri uses the default port for its protocol. (Eg, 80 for
528 * Return value: %TRUE or %FALSE
531 soup_uri_uses_default_port (const SoupUri *uri)
533 return uri->port == soup_protocol_default_port (uri->protocol);