1 /* -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
2 /* camel-url.c : utility functions to parse URLs */
6 * Dan Winship <danw@ximian.com>
7 * Jeffrey Stedfast <fejj@ximian.com>
9 * Copyright (C) 1999-2008 Novell, Inc. (www.novell.com)
11 * This program is free software; you can redistribute it and/or
12 * modify it under the terms of version 2 of the GNU Lesser General Public
13 * License as published by the Free Software Foundation.
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU Lesser General Public License for more details.
20 * You should have received a copy of the GNU Lesser General Public License
21 * along with this program; if not, write to the Free Software
22 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301
35 #include <glib/gi18n-lib.h>
37 #include "camel-mime-utils.h"
38 #include "camel-object.h"
39 #include "camel-service.h"
40 #include "camel-string-utils.h"
41 #include "camel-url.h"
43 static void copy_param (GQuark key_id, gpointer data, gpointer user_data);
44 static void output_param (GQuark key_id, gpointer data, gpointer user_data);
46 static void append_url_encoded (GString *str, const gchar *in, const gchar *extra_enc_chars);
49 camel_url_get_type (void)
51 static GType type = G_TYPE_INVALID;
53 if (G_UNLIKELY (type == G_TYPE_INVALID))
54 type = g_boxed_type_register_static (
56 (GBoxedCopyFunc) camel_url_copy,
57 (GBoxedFreeFunc) camel_url_free);
63 * camel_url_new_with_base:
65 * @url_string: the URL
67 * Parses @url_string relative to @base.
69 * Returns: a parsed #CamelURL
72 camel_url_new_with_base (CamelURL *base,
73 const gchar *url_string)
76 const gchar *end, *hash, *colon, *semi, *at, *slash, *question;
80 const gchar *start = url_string;
83 g_return_val_if_fail (url_string != NULL, NULL);
85 url = g_new0 (CamelURL, 1);
87 /* See RFC1808 for details. IF YOU CHANGE ANYTHING IN THIS
88 * FUNCTION, RUN tests/misc/url AFTERWARDS.
91 /* Find fragment. RFC 1808 2.4.1 */
92 end = hash = strchr (url_string, '#');
95 url->fragment = g_strdup (hash + 1);
96 camel_url_decode (url->fragment);
99 end = url_string + strlen (url_string);
101 /* Find protocol: initial [a-z+.-]* substring until ":" */
103 while (p < end && (isalnum ((guchar) * p) ||
104 *p == '.' || *p == '+' || *p == '-'))
107 if (p > url_string && *p == ':') {
108 url->protocol = g_strndup (url_string, p - url_string);
109 camel_strdown (url->protocol);
113 if (!*url_string && !base)
117 if (url->protocol && !strcmp (url->protocol, "file")) {
118 url->path = g_filename_from_uri (start, &url->host, NULL);
123 /* Check for authority */
124 if (strncmp (url_string, "//", 2) == 0) {
127 slash = url_string + strcspn (url_string, "/#");
128 at = strchr (url_string, '@');
129 if (at && at < slash) {
130 colon = strchr (url_string, ':');
131 if (colon && colon < at) {
132 /* XXX We used to extract and store the
133 * password here, now we just eat it. */
138 semi = strchr (url_string, ';');
139 if (semi && semi < colon &&
140 !g_ascii_strncasecmp (semi, ";auth=", 6)) {
141 url->authmech = g_strndup (
142 semi + 6, colon - semi - 6);
143 camel_url_decode (url->authmech);
145 url->authmech = NULL;
149 url->user = g_strndup (url_string, semi - url_string);
150 camel_url_decode (url->user);
153 url->user = url->authmech = NULL;
155 /* Find host and port. */
156 colon = strchr (url_string, ':');
157 if (colon && colon < slash) {
158 url->host = g_strndup (url_string, colon - url_string);
159 url->port = strtoul (colon + 1, NULL, 10);
161 url->host = g_strndup (url_string, slash - url_string);
162 camel_url_decode (url->host);
170 question = memchr (url_string, '?', end - url_string);
173 url->query = g_strndup (
174 question + 1, end - (question + 1));
175 camel_url_decode (url->query);
180 /* Find parameters */
181 semi = memchr (url_string, ';', end - url_string);
184 const gchar *cur, *p, *eq;
187 for (cur = semi + 1; cur < end; cur = p + 1) {
188 p = memchr (cur, ';', end - cur);
191 eq = memchr (cur, '=', p - cur);
193 name = g_strndup (cur, eq - cur);
194 value = g_strndup (eq + 1, p - (eq + 1));
195 camel_url_decode (value);
197 name = g_strndup (cur, p - cur);
198 value = g_strdup ("");
200 camel_url_decode (name);
201 g_datalist_set_data_full (
202 &url->params, name, value, g_free);
209 if (end != url_string) {
210 url->path = g_strndup (url_string, end - url_string);
211 camel_url_decode (url->path);
214 /* Apply base URL. Again, this is spelled out in RFC 1808. */
215 if (base && !url->protocol && url->host)
216 url->protocol = g_strdup (base->protocol);
217 else if (base && !url->protocol) {
218 if (!url->user && !url->authmech &&
219 !url->host && !url->port && !url->path &&
220 !url->params && !url->query && !url->fragment)
221 url->fragment = g_strdup (base->fragment);
223 url->protocol = g_strdup (base->protocol);
224 url->user = g_strdup (base->user);
225 url->authmech = g_strdup (base->authmech);
226 url->host = g_strdup (base->host);
227 url->port = base->port;
230 url->path = g_strdup (base->path);
232 g_datalist_foreach (&base->params, copy_param,
235 url->query = g_strdup (base->query);
237 } else if (*url->path != '/') {
238 gchar *newpath, *last, *p, *q;
240 /* the base->path is NULL if given Content-Base url was without last slash,
241 * i.e. like "http://example.com" (this expected only "http://example.com/") */
242 last = base->path ? strrchr (base->path, '/') : NULL;
244 newpath = g_strdup_printf (
246 (gint)(last - base->path),
250 newpath = g_strdup_printf ("/%s", url->path);
252 /* Remove "./" where "." is a complete segment. */
253 for (p = newpath + 1; *p; ) {
254 if (*(p - 1) == '/' &&
255 *p == '.' && *(p + 1) == '/')
256 memmove (p, p + 2, strlen (p + 2) + 1);
260 /* Remove "." at end. */
261 if (p > newpath + 2 &&
262 *(p - 1) == '.' && *(p - 2) == '/')
264 /* Remove "<segment>/../" where <segment> != ".." */
265 for (p = newpath + 1; *p; ) {
266 if (!strncmp (p, "../", 3)) {
270 q = strchr (p + 1, '/');
273 if (strncmp (q, "/../", 4) != 0) {
277 memmove (p, q + 4, strlen (q + 4) + 1);
280 /* Remove "<segment>/.." at end */
281 q = strrchr (newpath, '/');
282 if (q && !strcmp (q, "/..")) {
284 while (p > newpath && *p != '/')
286 if (strncmp (p, "/../", 4) != 0)
298 copy_param (GQuark key_id,
302 GData **copy = user_data;
304 g_datalist_id_set_data_full (copy, key_id, g_strdup (data), g_free);
309 * @url_string: a URL string
310 * @error: return location for a #GError, or %NULL
312 * Parses an absolute URL.
314 * Returns: a #CamelURL if it can be parsed, or %NULL otherwise
317 camel_url_new (const gchar *url_string,
322 if (!url_string || !*url_string)
325 url = camel_url_new_with_base (NULL, url_string);
327 if (!url->protocol) {
328 camel_url_free (url);
330 error, CAMEL_ERROR, CAMEL_ERROR_GENERIC,
331 _("Could not parse URL '%s'"), url_string);
338 * camel_url_to_string:
340 * @flags: additional translation options
342 * Flatten a #CamelURL into a string.
344 * Returns: a string representing @url, which the caller must free
347 camel_url_to_string (CamelURL *url,
351 gchar *return_result;
353 g_return_val_if_fail (url != NULL, NULL);
355 /* IF YOU CHANGE ANYTHING IN THIS FUNCTION, RUN
356 * tests/misc/url AFTERWARD.
360 if (url->protocol && !strcmp (url->protocol, "file"))
361 return g_filename_to_uri (url->path, url->host, NULL);
362 #endif /* G_OS_WIN32 */
364 str = g_string_sized_new (20);
367 g_string_append_printf (str, "%s:", url->protocol);
370 g_string_append (str, "//");
372 append_url_encoded (str, url->user, ":;@/");
373 if (url->authmech && *url->authmech && !(flags & CAMEL_URL_HIDE_AUTH)) {
374 g_string_append (str, ";auth=");
375 append_url_encoded (str, url->authmech, ":@/");
377 g_string_append_c (str, '@');
379 append_url_encoded (str, url->host, ":/");
381 g_string_append_printf (str, ":%d", url->port);
382 if (!url->path && (url->params || url->query || url->fragment))
383 g_string_append_c (str, '/');
387 append_url_encoded (str, url->path, ";?");
388 if (url->params && !(flags & CAMEL_URL_HIDE_PARAMS))
389 g_datalist_foreach (&url->params, output_param, str);
391 g_string_append_c (str, '?');
392 append_url_encoded (str, url->query, NULL);
395 g_string_append_c (str, '#');
396 append_url_encoded (str, url->fragment, NULL);
399 return_result = str->str;
400 g_string_free (str, FALSE);
402 return return_result;
406 output_param (GQuark key_id,
410 GString *str = user_data;
412 g_string_append_c (str, ';');
413 append_url_encoded (str, g_quark_to_string (key_id), "?=");
414 if (*(gchar *) data) {
415 g_string_append_c (str, '=');
416 append_url_encoded (str, data, "?");
427 camel_url_free (CamelURL *url)
431 memset (url->user, 0, strlen (url->user));
433 memset (url->host, 0, strlen (url->host));
434 g_free (url->protocol);
436 g_free (url->authmech);
439 g_datalist_clear (&url->params);
441 g_free (url->fragment);
448 * camel_url_set_protocol:
450 * @protocol: protocol schema
452 * Set the protocol of a #CamelURL.
455 camel_url_set_protocol (CamelURL *url,
456 const gchar *protocol)
458 g_return_if_fail (url != NULL);
460 g_free (url->protocol);
461 url->protocol = g_strdup (protocol);
465 * camel_url_set_user:
469 * Set the user of a #CamelURL.
472 camel_url_set_user (CamelURL *url,
475 g_return_if_fail (url != NULL);
478 url->user = g_strdup (user);
482 * camel_url_set_authmech:
484 * @authmech: authentication mechanism
486 * Set the authmech of a #CamelURL.
489 camel_url_set_authmech (CamelURL *url,
490 const gchar *authmech)
492 g_return_if_fail (url != NULL);
494 g_free (url->authmech);
495 url->authmech = g_strdup (authmech);
499 * camel_url_set_host:
503 * Set the hostname of a #CamelURL.
506 camel_url_set_host (CamelURL *url,
509 g_return_if_fail (url != NULL);
512 url->host = g_strdup (host);
516 * camel_url_set_path:
520 * Set the path component of a #CamelURL.
523 camel_url_set_path (CamelURL *url,
526 g_return_if_fail (url != NULL);
529 url->path = g_strdup (path);
533 * camel_url_set_query:
537 * Set the query of a #CamelURL.
540 camel_url_set_query (CamelURL *url,
543 g_return_if_fail (url != NULL);
546 url->query = g_strdup (query);
550 * camel_url_set_fragment:
552 * @fragment: url fragment
554 * Set the fragment of a #CamelURL.
557 camel_url_set_fragment (CamelURL *url,
558 const gchar *fragment)
560 g_return_if_fail (url != NULL);
562 g_free (url->fragment);
563 url->fragment = g_strdup (fragment);
567 * camel_url_set_port:
571 * Set the port on a #CamelURL.
574 camel_url_set_port (CamelURL *url,
577 g_return_if_fail (url != NULL);
583 * camel_url_set_param:
585 * @name: name of the param to set
586 * @value: value of the param to set
588 * Set a param on the #CamelURL.
591 camel_url_set_param (CamelURL *url,
595 g_return_if_fail (url != NULL);
598 g_datalist_set_data_full (&url->params, name, g_strdup (value), g_free);
600 g_datalist_remove_data (&url->params, name);
604 * camel_url_get_param:
606 * @name: name of the param
608 * Get the value of the specified param on the URL.
610 * Returns: the value of a param if found or %NULL otherwise
613 camel_url_get_param (CamelURL *url,
616 g_return_val_if_fail (url != NULL, NULL);
618 return g_datalist_get_data (&url->params, name);
621 /* From RFC 2396 2.4.3, the characters that should always be encoded */
622 static const gchar url_encoded_char[] = {
623 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x00 - 0x0f */
624 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x10 - 0x1f */
625 1, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* ' ' - '/' */
626 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, /* '0' - '?' */
627 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* '@' - 'O' */
628 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, /* 'P' - '_' */
629 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* '`' - 'o' */
630 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 1, /* 'p' - 0x7f */
631 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
632 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
633 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
634 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
635 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
636 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
637 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
638 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
642 append_url_encoded (GString *str,
644 const gchar *extra_enc_chars)
646 const guchar *s = (const guchar *) in;
649 if (url_encoded_char[*s] ||
650 (extra_enc_chars && strchr (extra_enc_chars, *s)))
651 g_string_append_printf (str, "%%%02x", (gint) * s++);
653 g_string_append_c (str, *s++);
660 * @escape_extra: additional characters beyond " \"%#<>{}|\^[]`"
661 * to escape (or %NULL)
663 * This %-encodes the given URL part and returns the escaped version
664 * in allocated memory, which the caller must free when it is done.
666 * Returns: the encoded string
669 camel_url_encode (const gchar *part,
670 const gchar *escape_extra)
675 g_return_val_if_fail (part != NULL, NULL);
677 str = g_string_new (NULL);
678 append_url_encoded (str, part, escape_extra);
680 g_string_free (str, FALSE);
689 * %-decodes the passed-in URL *in place*. The decoded version is
690 * never longer than the encoded version, so there does not need to
691 * be any additional space at the end of the string.
694 camel_url_decode (gchar *part)
698 g_return_if_fail (part != NULL);
700 #define XDIGIT(c) ((c) <= '9' ? (c) - '0' : ((c) & 0x4F) - 'A' + 10)
702 s = d = (guchar *) part;
704 if (*s == '%' && isxdigit (s[1]) && isxdigit (s[2])) {
705 *d++ = (XDIGIT (s[1]) << 4) + XDIGIT (s[2]);
713 camel_url_hash (gconstpointer v)
715 const CamelURL *u = v;
718 #define ADD_HASH(s) if (s) hash ^= g_str_hash (s);
720 ADD_HASH (u->protocol);
722 ADD_HASH (u->authmech);
732 check_equal (gchar *s1,
745 return strcmp (s1, s2) == 0;
749 camel_url_equal (gconstpointer v,
752 const CamelURL *u1 = v, *u2 = v2;
754 return check_equal (u1->protocol, u2->protocol)
755 && check_equal (u1->user, u2->user)
756 && check_equal (u1->authmech, u2->authmech)
757 && check_equal (u1->host, u2->host)
758 && check_equal (u1->path, u2->path)
759 && check_equal (u1->query, u2->query)
760 && u1->port == u2->port;
765 * @in: a #CamelURL to copy
769 * Returns: a duplicate copy of @in
772 camel_url_copy (CamelURL *in)
776 g_return_val_if_fail (in != NULL, NULL);
778 out = g_malloc0 (sizeof (*out));
779 out->protocol = g_strdup (in->protocol);
780 out->user = g_strdup (in->user);
781 out->authmech = g_strdup (in->authmech);
782 out->host = g_strdup (in->host);
783 out->port = in->port;
784 out->path = g_strdup (in->path);
787 g_datalist_foreach (&((CamelURL *) in)->params, copy_param, &out->params);
788 out->query = g_strdup (in->query);
789 out->fragment = g_strdup (in->fragment);
795 camel_url_decode_path (const gchar *path)
801 if (path == NULL || *path == '\0')
802 return g_strdup (""); /* ??? or NULL? */
804 str = g_string_new (NULL);
806 comps = g_strsplit (path, "/", -1);
807 length = g_strv_length (comps);
809 for (ii = 0; ii < length; ii++) {
811 g_string_append_c (str, '/');
812 camel_url_decode (comps[ii]);
813 g_string_append (str, comps[ii]);
818 return g_string_free (str, FALSE);