1 /* -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
2 /* camel-url.c : utility functions to parse URLs */
6 * Dan Winship <danw@ximian.com>
7 * Jeffrey Stedfast <fejj@ximian.com>
9 * Copyright 1999-2001 Ximian, Inc. (www.ximian.com)
11 * This program is free software; you can redistribute it and/or
12 * modify it under the terms of version 2 of the GNU Lesser General Public
13 * License as published by the Free Software Foundation.
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU Lesser General Public License for more details.
20 * You should have received a copy of the GNU Lesser General Public License
21 * along with this program; if not, write to the Free Software
22 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301
36 #include <glib/gi18n-lib.h>
38 #include "camel-exception.h"
39 #include "camel-mime-utils.h"
40 #include "camel-object.h"
41 #include "camel-string-utils.h"
42 #include "camel-url.h"
44 static void copy_param (GQuark key_id, gpointer data, gpointer user_data);
45 static void output_param (GQuark key_id, gpointer data, gpointer user_data);
47 static void append_url_encoded (GString *str, const char *in, const char *extra_enc_chars);
50 * camel_url_new_with_base:
52 * @url_string: the URL
54 * Parses @url_string relative to @base.
56 * Returns a parsed #CamelURL
59 camel_url_new_with_base (CamelURL *base, const char *url_string)
63 const char *end, *hash, *colon, *semi, *at, *slash, *question;
66 g_return_val_if_fail (url_string != NULL, NULL);
68 url = g_new0 (CamelURL, 1);
71 /* See RFC1808 for details. IF YOU CHANGE ANYTHING IN THIS
72 * FUNCTION, RUN tests/misc/url AFTERWARDS.
75 /* Find fragment. RFC 1808 2.4.1 */
76 end = hash = strchr (url_string, '#');
79 url->fragment = g_strdup (hash + 1);
80 camel_url_decode (url->fragment);
83 end = url_string + strlen (url_string);
85 /* Find protocol: initial [a-z+.-]* substring until ":" */
87 while (p < end && (isalnum ((unsigned char)*p) ||
88 *p == '.' || *p == '+' || *p == '-'))
91 if (p > url_string && *p == ':') {
92 url->protocol = g_strndup (url_string, p - url_string);
93 camel_strdown (url->protocol);
97 if (!*url_string && !base)
101 if (url->protocol && !strcmp(url->protocol, "file")) {
102 url->path = g_filename_from_uri(start, &url->host, NULL);
107 /* Check for authority */
108 if (strncmp (url_string, "//", 2) == 0) {
111 slash = url_string + strcspn (url_string, "/#");
112 at = strchr (url_string, '@');
113 if (at && at < slash) {
114 colon = strchr (url_string, ':');
115 if (colon && colon < at) {
116 url->passwd = g_strndup (colon + 1,
118 camel_url_decode (url->passwd);
124 semi = strchr(url_string, ';');
125 if (semi && semi < colon &&
126 !g_ascii_strncasecmp (semi, ";auth=", 6)) {
127 url->authmech = g_strndup (semi + 6,
129 camel_url_decode (url->authmech);
131 url->authmech = NULL;
135 url->user = g_strndup (url_string, semi - url_string);
136 camel_url_decode (url->user);
139 url->user = url->passwd = url->authmech = NULL;
141 /* Find host and port. */
142 colon = strchr (url_string, ':');
143 if (colon && colon < slash) {
144 url->host = g_strndup (url_string, colon - url_string);
145 url->port = strtoul (colon + 1, NULL, 10);
147 url->host = g_strndup (url_string, slash - url_string);
148 camel_url_decode (url->host);
156 question = memchr (url_string, '?', end - url_string);
159 url->query = g_strndup (question + 1,
160 end - (question + 1));
161 camel_url_decode (url->query);
166 /* Find parameters */
167 semi = memchr (url_string, ';', end - url_string);
170 const char *cur, *p, *eq;
173 for (cur = semi + 1; cur < end; cur = p + 1) {
174 p = memchr (cur, ';', end - cur);
177 eq = memchr (cur, '=', p - cur);
179 name = g_strndup (cur, eq - cur);
180 value = g_strndup (eq + 1, p - (eq + 1));
181 camel_url_decode (value);
183 name = g_strndup (cur, p - cur);
184 value = g_strdup ("");
186 camel_url_decode (name);
187 g_datalist_set_data_full (&url->params, name,
195 if (end != url_string) {
196 url->path = g_strndup (url_string, end - url_string);
197 camel_url_decode (url->path);
200 /* Apply base URL. Again, this is spelled out in RFC 1808. */
201 if (base && !url->protocol && url->host)
202 url->protocol = g_strdup (base->protocol);
203 else if (base && !url->protocol) {
204 if (!url->user && !url->authmech && !url->passwd &&
205 !url->host && !url->port && !url->path &&
206 !url->params && !url->query && !url->fragment)
207 url->fragment = g_strdup (base->fragment);
209 url->protocol = g_strdup (base->protocol);
210 url->user = g_strdup (base->user);
211 url->authmech = g_strdup (base->authmech);
212 url->passwd = g_strdup (base->passwd);
213 url->host = g_strdup (base->host);
214 url->port = base->port;
217 url->path = g_strdup (base->path);
219 g_datalist_foreach (&base->params, copy_param,
222 url->query = g_strdup (base->query);
224 } else if (*url->path != '/') {
225 char *newpath, *last, *p, *q;
227 /* the base->path is NULL if given Content-Base url was without last slash,
228 i.e. like "http://example.com" (this expected only "http://example.com/") */
229 last = base->path ? strrchr (base->path, '/') : NULL;
231 newpath = g_strdup_printf ("%.*s/%s",
232 (int)(last - base->path),
236 newpath = g_strdup_printf ("/%s", url->path);
238 /* Remove "./" where "." is a complete segment. */
239 for (p = newpath + 1; *p; ) {
240 if (*(p - 1) == '/' &&
241 *p == '.' && *(p + 1) == '/')
242 memmove (p, p + 2, strlen (p + 2) + 1);
246 /* Remove "." at end. */
247 if (p > newpath + 2 &&
248 *(p - 1) == '.' && *(p - 2) == '/')
250 /* Remove "<segment>/../" where <segment> != ".." */
251 for (p = newpath + 1; *p; ) {
252 if (!strncmp (p, "../", 3)) {
256 q = strchr (p + 1, '/');
259 if (strncmp (q, "/../", 4) != 0) {
263 memmove (p, q + 4, strlen (q + 4) + 1);
266 /* Remove "<segment>/.." at end */
267 q = strrchr (newpath, '/');
268 if (q && !strcmp (q, "/..")) {
270 while (p > newpath && *p != '/')
272 if (strncmp (p, "/../", 4) != 0)
284 copy_param (GQuark key_id, gpointer data, gpointer user_data)
286 GData **copy = user_data;
288 g_datalist_id_set_data_full (copy, key_id, g_strdup (data), g_free);
293 * @url_string: a URL string
294 * @ex: a #CamelException
296 * Parses an absolute URL.
298 * Returns a #CamelURL if it can be parsed, or %NULL otherwise
301 camel_url_new (const char *url_string, CamelException *ex)
305 if (!url_string || !*url_string)
308 url = camel_url_new_with_base (NULL, url_string);
310 if (!url->protocol) {
311 camel_url_free (url);
312 camel_exception_setv (ex, CAMEL_EXCEPTION_SERVICE_URL_INVALID,
313 _("Could not parse URL `%s'"),
321 * camel_url_to_string:
323 * @flags: additional translation options
325 * Flatten a #CamelURL into a string.
327 * Returns a string representing @url, which the caller must free
330 camel_url_to_string (CamelURL *url, guint32 flags)
335 g_return_val_if_fail (url != NULL, NULL);
337 /* IF YOU CHANGE ANYTHING IN THIS FUNCTION, RUN
338 * tests/misc/url AFTERWARD.
342 if (url->protocol && !strcmp(url->protocol, "file"))
343 return g_filename_to_uri(url->path, url->host, NULL);
344 #endif /* G_OS_WIN32 */
346 str = g_string_sized_new (20);
349 g_string_append_printf (str, "%s:", url->protocol);
352 g_string_append (str, "//");
354 append_url_encoded (str, url->user, ":;@/");
355 if (url->authmech && *url->authmech && !(flags & CAMEL_URL_HIDE_AUTH)) {
356 g_string_append (str, ";auth=");
357 append_url_encoded (str, url->authmech, ":@/");
359 if (url->passwd && !(flags & CAMEL_URL_HIDE_PASSWORD)) {
360 g_string_append_c (str, ':');
361 append_url_encoded (str, url->passwd, "@/");
363 g_string_append_c (str, '@');
365 append_url_encoded (str, url->host, ":/");
367 g_string_append_printf (str, ":%d", url->port);
368 if (!url->path && (url->params || url->query || url->fragment))
369 g_string_append_c (str, '/');
373 append_url_encoded (str, url->path, ";?");
374 if (url->params && !(flags & CAMEL_URL_HIDE_PARAMS))
375 g_datalist_foreach (&url->params, output_param, str);
377 g_string_append_c (str, '?');
378 append_url_encoded (str, url->query, NULL);
381 g_string_append_c (str, '#');
382 append_url_encoded (str, url->fragment, NULL);
385 return_result = str->str;
386 g_string_free (str, FALSE);
388 return return_result;
392 output_param (GQuark key_id, gpointer data, gpointer user_data)
394 GString *str = user_data;
396 g_string_append_c (str, ';');
397 append_url_encoded (str, g_quark_to_string (key_id), "?=");
399 g_string_append_c (str, '=');
400 append_url_encoded (str, data, "?");
411 camel_url_free (CamelURL *url)
415 memset(url->passwd, 0, strlen(url->passwd));
417 memset(url->user, 0, strlen(url->user));
419 memset(url->host, 0, strlen(url->host));
420 g_free (url->protocol);
422 g_free (url->authmech);
423 g_free (url->passwd);
426 g_datalist_clear (&url->params);
428 g_free (url->fragment);
435 #define DEFINE_CAMEL_URL_SET(part) \
437 camel_url_set_##part (CamelURL *url, const char *part) \
439 g_return_if_fail (url != NULL); \
441 g_free (url->part); \
442 url->part = g_strdup (part); \
447 * camel_url_set_protocol:
449 * @protocol: protocol schema
451 * Set the protocol of a #CamelURL.
453 DEFINE_CAMEL_URL_SET (protocol)
457 * camel_url_set_user:
461 * Set the user of a #CamelURL.
463 DEFINE_CAMEL_URL_SET (user)
467 * camel_url_set_authmech:
469 * @authmech: authentication mechanism
471 * Set the authmech of a #CamelURL.
473 DEFINE_CAMEL_URL_SET (authmech)
477 * camel_url_set_passwd:
481 * Set the password of a #CamelURL.
483 DEFINE_CAMEL_URL_SET (passwd)
487 * camel_url_set_host:
491 * Set the hostname of a #CamelURL.
493 DEFINE_CAMEL_URL_SET (host)
497 * camel_url_set_path:
501 * Set the path component of a #CamelURL.
503 DEFINE_CAMEL_URL_SET (path)
507 * camel_url_set_query:
511 * Set the query of a #CamelURL.
513 DEFINE_CAMEL_URL_SET (query)
517 * camel_url_set_fragment:
519 * @fragment: url fragment
521 * Set the fragment of a #CamelURL.
523 DEFINE_CAMEL_URL_SET (fragment)
527 * camel_url_set_port:
531 * Set the port on a #CamelURL.
534 camel_url_set_port (CamelURL *url, int port)
536 g_return_if_fail (url != NULL);
543 * camel_url_set_param:
545 * @name: name of the param to set
546 * @value: value of the param to set
548 * Set a param on the #CamelURL.
551 camel_url_set_param (CamelURL *url, const char *name, const char *value)
553 g_return_if_fail (url != NULL);
556 g_datalist_set_data_full (&url->params, name, g_strdup(value), g_free);
558 g_datalist_remove_data(&url->params, name);
563 * camel_url_get_param:
565 * @name: name of the param
567 * Get the value of the specified param on the URL.
569 * Returns the value of a param if found or %NULL otherwise
572 camel_url_get_param (CamelURL *url, const char *name)
574 g_return_val_if_fail (url != NULL, NULL);
576 return g_datalist_get_data (&url->params, name);
579 /* From RFC 2396 2.4.3, the characters that should always be encoded */
580 static const char url_encoded_char[] = {
581 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x00 - 0x0f */
582 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x10 - 0x1f */
583 1, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* ' ' - '/' */
584 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, /* '0' - '?' */
585 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* '@' - 'O' */
586 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, /* 'P' - '_' */
587 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* '`' - 'o' */
588 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 1, /* 'p' - 0x7f */
589 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
590 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
591 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
592 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
593 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
594 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
595 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
596 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
600 append_url_encoded (GString *str, const char *in, const char *extra_enc_chars)
602 const unsigned char *s = (const unsigned char *)in;
605 if (url_encoded_char[*s] ||
606 (extra_enc_chars && strchr (extra_enc_chars, *s)))
607 g_string_append_printf (str, "%%%02x", (int)*s++);
609 g_string_append_c (str, *s++);
616 * @escape_extra: additional characters beyond " \"%#<>{}|\^[]`"
617 * to escape (or %NULL)
619 * This %-encodes the given URL part and returns the escaped version
620 * in allocated memory, which the caller must free when it is done.
622 * Returns the encoded string
625 camel_url_encode (const char *part, const char *escape_extra)
630 g_return_val_if_fail (part != NULL, NULL);
632 str = g_string_new (NULL);
633 append_url_encoded (str, part, escape_extra);
635 g_string_free (str, FALSE);
644 * %-decodes the passed-in URL *in place*. The decoded version is
645 * never longer than the encoded version, so there does not need to
646 * be any additional space at the end of the string.
649 camel_url_decode (char *part)
651 unsigned char *s, *d;
653 g_return_if_fail (part != NULL);
655 #define XDIGIT(c) ((c) <= '9' ? (c) - '0' : ((c) & 0x4F) - 'A' + 10)
657 s = d = (unsigned char *)part;
659 if (*s == '%' && isxdigit(s[1]) && isxdigit(s[2])) {
660 *d++ = (XDIGIT (s[1]) << 4) + XDIGIT (s[2]);
669 camel_url_hash (const void *v)
671 const CamelURL *u = v;
674 #define ADD_HASH(s) if (s) hash ^= g_str_hash (s);
676 ADD_HASH (u->protocol);
678 ADD_HASH (u->authmech);
688 check_equal (char *s1, char *s2)
700 return strcmp (s1, s2) == 0;
704 camel_url_equal(const void *v, const void *v2)
706 const CamelURL *u1 = v, *u2 = v2;
708 return check_equal(u1->protocol, u2->protocol)
709 && check_equal(u1->user, u2->user)
710 && check_equal(u1->authmech, u2->authmech)
711 && check_equal(u1->host, u2->host)
712 && check_equal(u1->path, u2->path)
713 && check_equal(u1->query, u2->query)
714 && u1->port == u2->port;
720 * @in: a #CamelURL to copy
724 * Returns a duplicate copy of @in
727 camel_url_copy(const CamelURL *in)
731 g_return_val_if_fail (in != NULL, NULL);
733 out = g_malloc0(sizeof(*out));
734 out->protocol = g_strdup(in->protocol);
735 out->user = g_strdup(in->user);
736 out->authmech = g_strdup(in->authmech);
737 out->passwd = g_strdup(in->passwd);
738 out->host = g_strdup(in->host);
739 out->port = in->port;
740 out->path = g_strdup(in->path);
743 g_datalist_foreach(&((CamelURL *)in)->params, copy_param, &out->params);
744 out->query = g_strdup(in->query);
745 out->fragment = g_strdup(in->fragment);
751 camel_url_decode_path (const char *path)
754 char *new_path = NULL;
759 return g_strdup(""); /* ??? or NULL? */
761 str = g_string_new (NULL);
763 comps = g_strsplit (path, "/", -1);
765 camel_url_decode (comps[i]);
766 g_string_append (str, comps[i]);
767 g_string_append_c (str, '/');
771 /* Strip-off the trailing "/" */
772 new_path = g_strndup (str->str, str->len-1);
775 g_string_free (str, TRUE);