1 /* -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
2 /* camel-url.c : utility functions to parse URLs */
6 * Dan Winship <danw@ximian.com>
7 * Jeffrey Stedfast <fejj@ximian.com>
9 * Copyright 1999-2001 Ximian, Inc. (www.ximian.com)
11 * This program is free software; you can redistribute it and/or
12 * modify it under the terms of version 2 of the GNU Lesser General Public
13 * License as published by the Free Software Foundation.
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU Lesser General Public License for more details.
20 * You should have received a copy of the GNU Lesser General Public License
21 * along with this program; if not, write to the Free Software
22 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
36 #include <glib/gi18n-lib.h>
38 #include "camel-exception.h"
39 #include "camel-mime-utils.h"
40 #include "camel-object.h"
41 #include "camel-string-utils.h"
42 #include "camel-url.h"
44 static void copy_param (GQuark key_id, gpointer data, gpointer user_data);
45 static void output_param (GQuark key_id, gpointer data, gpointer user_data);
47 static void append_url_encoded (GString *str, const char *in, const char *extra_enc_chars);
50 * camel_url_new_with_base:
52 * @url_string: the URL
54 * Parses @url_string relative to @base.
56 * Returns a parsed #CamelURL
59 camel_url_new_with_base (CamelURL *base, const char *url_string)
63 const char *end, *hash, *colon, *semi, *at, *slash, *question;
66 g_return_val_if_fail (url_string != NULL, NULL);
68 url = g_new0 (CamelURL, 1);
71 /* See RFC1808 for details. IF YOU CHANGE ANYTHING IN THIS
72 * FUNCTION, RUN tests/misc/url AFTERWARDS.
75 /* Find fragment. RFC 1808 2.4.1 */
76 end = hash = strchr (url_string, '#');
79 url->fragment = g_strdup (hash + 1);
80 camel_url_decode (url->fragment);
83 end = url_string + strlen (url_string);
85 /* Find protocol: initial [a-z+.-]* substring until ":" */
87 while (p < end && (isalnum ((unsigned char)*p) ||
88 *p == '.' || *p == '+' || *p == '-'))
91 if (p > url_string && *p == ':') {
92 url->protocol = g_strndup (url_string, p - url_string);
93 camel_strdown (url->protocol);
97 if (!*url_string && !base)
101 if (url->protocol && !strcmp(url->protocol, "file")) {
102 url->path = g_filename_from_uri(start, &url->host, NULL);
107 /* Check for authority */
108 if (strncmp (url_string, "//", 2) == 0) {
111 slash = url_string + strcspn (url_string, "/#");
112 at = strchr (url_string, '@');
113 if (at && at < slash) {
114 colon = strchr (url_string, ':');
115 if (colon && colon < at) {
116 url->passwd = g_strndup (colon + 1,
118 camel_url_decode (url->passwd);
124 semi = strchr(url_string, ';');
125 if (semi && semi < colon &&
126 !g_ascii_strncasecmp (semi, ";auth=", 6)) {
127 url->authmech = g_strndup (semi + 6,
129 camel_url_decode (url->authmech);
131 url->authmech = NULL;
135 url->user = g_strndup (url_string, semi - url_string);
136 camel_url_decode (url->user);
139 url->user = url->passwd = url->authmech = NULL;
141 /* Find host and port. */
142 colon = strchr (url_string, ':');
143 if (colon && colon < slash) {
144 url->host = g_strndup (url_string, colon - url_string);
145 url->port = strtoul (colon + 1, NULL, 10);
147 url->host = g_strndup (url_string, slash - url_string);
148 camel_url_decode (url->host);
156 question = memchr (url_string, '?', end - url_string);
159 url->query = g_strndup (question + 1,
160 end - (question + 1));
161 camel_url_decode (url->query);
166 /* Find parameters */
167 semi = memchr (url_string, ';', end - url_string);
170 const char *cur, *p, *eq;
173 for (cur = semi + 1; cur < end; cur = p + 1) {
174 p = memchr (cur, ';', end - cur);
177 eq = memchr (cur, '=', p - cur);
179 name = g_strndup (cur, eq - cur);
180 value = g_strndup (eq + 1, p - (eq + 1));
181 camel_url_decode (value);
183 name = g_strndup (cur, p - cur);
184 value = g_strdup ("");
186 camel_url_decode (name);
187 g_datalist_set_data_full (&url->params, name,
195 if (end != url_string) {
196 url->path = g_strndup (url_string, end - url_string);
197 camel_url_decode (url->path);
200 /* Apply base URL. Again, this is spelled out in RFC 1808. */
201 if (base && !url->protocol && url->host)
202 url->protocol = g_strdup (base->protocol);
203 else if (base && !url->protocol) {
204 if (!url->user && !url->authmech && !url->passwd &&
205 !url->host && !url->port && !url->path &&
206 !url->params && !url->query && !url->fragment)
207 url->fragment = g_strdup (base->fragment);
209 url->protocol = g_strdup (base->protocol);
210 url->user = g_strdup (base->user);
211 url->authmech = g_strdup (base->authmech);
212 url->passwd = g_strdup (base->passwd);
213 url->host = g_strdup (base->host);
214 url->port = base->port;
217 url->path = g_strdup (base->path);
219 g_datalist_foreach (&base->params, copy_param,
222 url->query = g_strdup (base->query);
224 } else if (*url->path != '/') {
225 char *newpath, *last, *p, *q;
227 last = strrchr (base->path, '/');
229 newpath = g_strdup_printf ("%.*s/%s",
230 (int)(last - base->path),
234 newpath = g_strdup_printf ("/%s", url->path);
236 /* Remove "./" where "." is a complete segment. */
237 for (p = newpath + 1; *p; ) {
238 if (*(p - 1) == '/' &&
239 *p == '.' && *(p + 1) == '/')
240 memmove (p, p + 2, strlen (p + 2) + 1);
244 /* Remove "." at end. */
245 if (p > newpath + 2 &&
246 *(p - 1) == '.' && *(p - 2) == '/')
248 /* Remove "<segment>/../" where <segment> != ".." */
249 for (p = newpath + 1; *p; ) {
250 if (!strncmp (p, "../", 3)) {
254 q = strchr (p + 1, '/');
257 if (strncmp (q, "/../", 4) != 0) {
261 memmove (p, q + 4, strlen (q + 4) + 1);
264 /* Remove "<segment>/.." at end */
265 q = strrchr (newpath, '/');
266 if (q && !strcmp (q, "/..")) {
268 while (p > newpath && *p != '/')
270 if (strncmp (p, "/../", 4) != 0)
282 copy_param (GQuark key_id, gpointer data, gpointer user_data)
284 GData **copy = user_data;
286 g_datalist_id_set_data_full (copy, key_id, g_strdup (data), g_free);
291 * @url_string: a URL string
292 * @ex: a #CamelException
294 * Parses an absolute URL.
296 * Returns a #CamelURL if it can be parsed, or %NULL otherwise
299 camel_url_new (const char *url_string, CamelException *ex)
303 g_return_val_if_fail (url_string != NULL, NULL);
305 url = camel_url_new_with_base (NULL, url_string);
307 if (!url->protocol) {
308 camel_url_free (url);
309 camel_exception_setv (ex, CAMEL_EXCEPTION_SERVICE_URL_INVALID,
310 _("Could not parse URL `%s'"),
318 * camel_url_to_string:
320 * @flags: additional translation options
322 * Flatten a #CamelURL into a string.
324 * Returns a string representing @url, which the caller must free
327 camel_url_to_string (CamelURL *url, guint32 flags)
332 g_return_val_if_fail (url != NULL, NULL);
334 /* IF YOU CHANGE ANYTHING IN THIS FUNCTION, RUN
335 * tests/misc/url AFTERWARD.
339 if (url->protocol && !strcmp(url->protocol, "file"))
340 return g_filename_to_uri(url->path, url->host, NULL);
343 str = g_string_sized_new (20);
346 g_string_append_printf (str, "%s:", url->protocol);
349 g_string_append (str, "//");
351 append_url_encoded (str, url->user, ":;@/");
352 if (url->authmech && *url->authmech) {
353 g_string_append (str, ";auth=");
354 append_url_encoded (str, url->authmech, ":@/");
356 if (url->passwd && !(flags & CAMEL_URL_HIDE_PASSWORD)) {
357 g_string_append_c (str, ':');
358 append_url_encoded (str, url->passwd, "@/");
360 g_string_append_c (str, '@');
362 append_url_encoded (str, url->host, ":/");
364 g_string_append_printf (str, ":%d", url->port);
365 if (!url->path && (url->params || url->query || url->fragment))
366 g_string_append_c (str, '/');
370 append_url_encoded (str, url->path, ";?");
371 if (url->params && !(flags & CAMEL_URL_HIDE_PARAMS))
372 g_datalist_foreach (&url->params, output_param, str);
374 g_string_append_c (str, '?');
375 append_url_encoded (str, url->query, NULL);
378 g_string_append_c (str, '#');
379 append_url_encoded (str, url->fragment, NULL);
382 return_result = str->str;
383 g_string_free (str, FALSE);
385 return return_result;
389 output_param (GQuark key_id, gpointer data, gpointer user_data)
391 GString *str = user_data;
393 g_string_append_c (str, ';');
394 append_url_encoded (str, g_quark_to_string (key_id), "?=");
396 g_string_append_c (str, '=');
397 append_url_encoded (str, data, "?");
408 camel_url_free (CamelURL *url)
412 memset(url->passwd, 0, strlen(url->passwd));
414 memset(url->user, 0, strlen(url->user));
416 memset(url->host, 0, strlen(url->host));
417 g_free (url->protocol);
419 g_free (url->authmech);
420 g_free (url->passwd);
423 g_datalist_clear (&url->params);
425 g_free (url->fragment);
432 #define DEFINE_CAMEL_URL_SET(part) \
434 camel_url_set_##part (CamelURL *url, const char *part) \
436 g_return_if_fail (url != NULL); \
438 g_free (url->part); \
439 url->part = g_strdup (part); \
444 * camel_url_set_protocol:
446 * @protocol: protocol schema
448 * Set the protocol of a #CamelURL.
450 DEFINE_CAMEL_URL_SET (protocol)
454 * camel_url_set_user:
458 * Set the user of a #CamelURL.
460 DEFINE_CAMEL_URL_SET (user)
464 * camel_url_set_authmech:
466 * @authmech: authentication mechanism
468 * Set the authmech of a #CamelURL.
470 DEFINE_CAMEL_URL_SET (authmech)
474 * camel_url_set_passwd:
478 * Set the password of a #CamelURL.
480 DEFINE_CAMEL_URL_SET (passwd)
484 * camel_url_set_host:
488 * Set the hostname of a #CamelURL.
490 DEFINE_CAMEL_URL_SET (host)
494 * camel_url_set_path:
498 * Set the path component of a #CamelURL.
500 DEFINE_CAMEL_URL_SET (path)
504 * camel_url_set_query:
508 * Set the query of a #CamelURL.
510 DEFINE_CAMEL_URL_SET (query)
514 * camel_url_set_fragment:
516 * @fragment: url fragment
518 * Set the fragment of a #CamelURL.
520 DEFINE_CAMEL_URL_SET (fragment)
524 * camel_url_set_port:
528 * Set the port on a #CamelURL.
531 camel_url_set_port (CamelURL *url, int port)
533 g_return_if_fail (url != NULL);
540 * camel_url_set_param:
542 * @name: name of the param to set
543 * @value: value of the param to set
545 * Set a param on the #CamelURL.
548 camel_url_set_param (CamelURL *url, const char *name, const char *value)
550 g_return_if_fail (url != NULL);
553 g_datalist_set_data_full (&url->params, name, g_strdup(value), g_free);
555 g_datalist_remove_data(&url->params, name);
560 * camel_url_get_param:
562 * @name: name of the param
564 * Get the value of the specified param on the URL.
566 * Returns the value of a param if found or %NULL otherwise
569 camel_url_get_param (CamelURL *url, const char *name)
571 g_return_val_if_fail (url != NULL, NULL);
573 return g_datalist_get_data (&url->params, name);
576 /* From RFC 2396 2.4.3, the characters that should always be encoded */
577 static const char url_encoded_char[] = {
578 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x00 - 0x0f */
579 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x10 - 0x1f */
580 1, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* ' ' - '/' */
581 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, /* '0' - '?' */
582 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* '@' - 'O' */
583 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, /* 'P' - '_' */
584 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* '`' - 'o' */
585 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 1, /* 'p' - 0x7f */
586 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
587 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
588 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
589 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
590 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
591 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
592 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
593 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
597 append_url_encoded (GString *str, const char *in, const char *extra_enc_chars)
599 const unsigned char *s = (const unsigned char *)in;
602 if (url_encoded_char[*s] ||
603 (extra_enc_chars && strchr (extra_enc_chars, *s)))
604 g_string_append_printf (str, "%%%02x", (int)*s++);
606 g_string_append_c (str, *s++);
613 * @escape_extra: additional characters beyond " \"%#<>{}|\^[]`"
614 * to escape (or %NULL)
616 * This %-encodes the given URL part and returns the escaped version
617 * in allocated memory, which the caller must free when it is done.
619 * Returns the encoded string
622 camel_url_encode (const char *part, const char *escape_extra)
627 g_return_val_if_fail (part != NULL, NULL);
629 str = g_string_new (NULL);
630 append_url_encoded (str, part, escape_extra);
632 g_string_free (str, FALSE);
641 * %-decodes the passed-in URL *in place*. The decoded version is
642 * never longer than the encoded version, so there does not need to
643 * be any additional space at the end of the string.
646 camel_url_decode (char *part)
648 unsigned char *s, *d;
650 g_return_if_fail (part != NULL);
652 #define XDIGIT(c) ((c) <= '9' ? (c) - '0' : ((c) & 0x4F) - 'A' + 10)
654 s = d = (unsigned char *)part;
656 if (*s == '%' && isxdigit(s[1]) && isxdigit(s[2])) {
657 *d++ = (XDIGIT (s[1]) << 4) + XDIGIT (s[2]);
666 camel_url_hash (const void *v)
668 const CamelURL *u = v;
671 #define ADD_HASH(s) if (s) hash ^= g_str_hash (s);
673 ADD_HASH (u->protocol);
675 ADD_HASH (u->authmech);
685 check_equal (char *s1, char *s2)
697 return strcmp (s1, s2) == 0;
701 camel_url_equal(const void *v, const void *v2)
703 const CamelURL *u1 = v, *u2 = v2;
705 return check_equal(u1->protocol, u2->protocol)
706 && check_equal(u1->user, u2->user)
707 && check_equal(u1->authmech, u2->authmech)
708 && check_equal(u1->host, u2->host)
709 && check_equal(u1->path, u2->path)
710 && check_equal(u1->query, u2->query)
711 && u1->port == u2->port;
717 * @in: a #CamelURL to copy
721 * Returns a duplicate copy of @in
724 camel_url_copy(const CamelURL *in)
728 g_return_val_if_fail (in != NULL, NULL);
730 out = g_malloc(sizeof(*out));
731 out->protocol = g_strdup(in->protocol);
732 out->user = g_strdup(in->user);
733 out->authmech = g_strdup(in->authmech);
734 out->passwd = g_strdup(in->passwd);
735 out->host = g_strdup(in->host);
736 out->port = in->port;
737 out->path = g_strdup(in->path);
740 g_datalist_foreach(&((CamelURL *)in)->params, copy_param, &out->params);
741 out->query = g_strdup(in->query);
742 out->fragment = g_strdup(in->fragment);
748 camel_url_decode_path (const char *path)
751 char *new_path = NULL;
756 return g_strdup(""); /* ??? or NULL? */
758 str = g_string_new (NULL);
760 comps = g_strsplit (path, "/", -1);
762 camel_url_decode (comps[i]);
763 g_string_append (str, comps[i]);
764 g_string_append_c (str, '/');
768 /* Strip-off the trailing "/" */
769 new_path = g_strndup (str->str, str->len-1);
772 g_string_free (str, TRUE);