1 /* -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
2 /* camel-url.c : utility functions to parse URLs */
6 * Dan Winship <danw@ximian.com>
7 * Tiago Antà o <tiagoantao@bigfoot.com>
8 * Jeffrey Stedfast <fejj@ximian.com>
10 * Copyright 1999-2001 Ximian, Inc. (www.ximian.com)
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of version 2 of the GNU General Public
14 * published by the Free Software Foundation; either version 2 of the
15 * License as published by the Free Software Foundation.
17 * This program is distributed in the hope that it will be useful,
18 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20 * GNU General Public License for more details.
22 * You should have received a copy of the GNU General Public License
23 * along with this program; if not, write to the Free Software
24 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
37 #include "camel-url.h"
38 #include "camel-exception.h"
39 #include "camel-mime-utils.h"
40 #include "camel-object.h"
42 static void copy_param (GQuark key_id, gpointer data, gpointer user_data);
43 static void output_param (GQuark key_id, gpointer data, gpointer user_data);
46 * camel_url_new_with_base:
48 * @url_string: the URL
50 * Parses @url_string relative to @base.
52 * Return value: a parsed CamelURL.
55 camel_url_new_with_base (CamelURL *base, const char *url_string)
58 const char *end, *hash, *colon, *semi, *at, *slash, *question;
61 url = g_new0 (CamelURL, 1);
63 /* See RFC1808 for details. IF YOU CHANGE ANYTHING IN THIS
64 * FUNCTION, RUN tests/misc/url AFTERWARDS.
68 end = hash = strchr (url_string, '#');
69 if (hash && hash[1]) {
70 url->fragment = g_strdup (hash + 1);
71 camel_url_decode (url->fragment);
73 end = url_string + strlen (url_string);
75 /* Find protocol: initial [a-z+.-]* substring until ":" */
77 while (p < end && (isalnum ((unsigned char)*p) ||
78 *p == '.' || *p == '+' || *p == '-'))
81 if (p > url_string && *p == ':') {
82 url->protocol = g_strndup (url_string, p - url_string);
83 g_strdown (url->protocol);
87 if (!*url_string && !base)
90 /* Check for authority */
91 if (strncmp (url_string, "//", 2) == 0) {
94 slash = url_string + strcspn (url_string, "/#");
95 at = strchr (url_string, '@');
96 if (at && at < slash) {
97 colon = strchr (url_string, ':');
98 if (colon && colon < at) {
99 url->passwd = g_strndup (colon + 1,
101 camel_url_decode (url->passwd);
107 semi = strchr(url_string, ';');
108 if (semi && semi < colon &&
109 !strncasecmp (semi, ";auth=", 6)) {
110 url->authmech = g_strndup (semi + 6,
112 camel_url_decode (url->authmech);
114 url->authmech = NULL;
118 url->user = g_strndup (url_string, semi - url_string);
119 camel_url_decode (url->user);
122 url->user = url->passwd = url->authmech = NULL;
124 /* Find host and port. */
125 colon = strchr (url_string, ':');
126 if (colon && colon < slash) {
127 url->host = g_strndup (url_string, colon - url_string);
128 url->port = strtoul (colon + 1, NULL, 10);
130 url->host = g_strndup (url_string, slash - url_string);
131 camel_url_decode (url->host);
139 question = memchr (url_string, '?', end - url_string);
142 url->query = g_strndup (question + 1,
143 end - (question + 1));
144 camel_url_decode (url->query);
149 /* Find parameters */
150 semi = memchr (url_string, ';', end - url_string);
153 const char *cur, *p, *eq;
156 for (cur = semi + 1; cur < end; cur = p + 1) {
157 p = memchr (cur, ';', end - cur);
160 eq = memchr (cur, '=', p - cur);
162 name = g_strndup (cur, eq - cur);
163 value = g_strndup (eq + 1, p - (eq + 1));
164 camel_url_decode (value);
166 name = g_strndup (cur, p - cur);
167 value = g_strdup ("");
169 camel_url_decode (name);
170 g_datalist_set_data_full (&url->params, name,
178 if (end != url_string) {
179 url->path = g_strndup (url_string, end - url_string);
180 camel_url_decode (url->path);
183 /* Apply base URL. Again, this is spelled out in RFC 1808. */
184 if (base && !url->protocol && url->host)
185 url->protocol = g_strdup (base->protocol);
186 else if (base && !url->protocol) {
187 if (!url->user && !url->authmech && !url->passwd &&
188 !url->host && !url->port && !url->path &&
189 !url->params && !url->query && !url->fragment)
190 url->fragment = g_strdup (base->fragment);
192 url->protocol = g_strdup (base->protocol);
193 url->user = g_strdup (base->user);
194 url->authmech = g_strdup (base->authmech);
195 url->passwd = g_strdup (base->passwd);
196 url->host = g_strdup (base->host);
197 url->port = base->port;
200 url->path = g_strdup (base->path);
202 g_datalist_foreach (&base->params, copy_param,
205 url->query = g_strdup (base->query);
207 } else if (*url->path != '/') {
208 char *newpath, *last, *p, *q;
210 last = strrchr (base->path, '/');
212 newpath = g_strdup_printf ("%.*s/%s",
217 newpath = g_strdup_printf ("/%s", url->path);
219 /* Remove "./" where "." is a complete segment. */
220 for (p = newpath + 1; *p; ) {
221 if (*(p - 1) == '/' &&
222 *p == '.' && *(p + 1) == '/')
223 memmove (p, p + 2, strlen (p + 2) + 1);
227 /* Remove "." at end. */
228 if (p > newpath + 2 &&
229 *(p - 1) == '.' && *(p - 2) == '/')
231 /* Remove "<segment>/../" where <segment> != ".." */
232 for (p = newpath + 1; *p; ) {
233 if (!strncmp (p, "../", 3)) {
237 q = strchr (p + 1, '/');
240 if (strncmp (q, "/../", 4) != 0) {
244 memmove (p, q + 4, strlen (q + 4) + 1);
247 /* Remove "<segment>/.." at end */
248 q = strrchr (newpath, '/');
249 if (q && !strcmp (q, "/..")) {
251 while (p > newpath && *p != '/')
253 if (strncmp (p, "/../", 4) != 0)
265 copy_param (GQuark key_id, gpointer data, gpointer user_data)
267 GData **copy = user_data;
269 g_datalist_id_set_data_full (copy, key_id, g_strdup (data), g_free);
275 * @ex: a CamelException
277 * Parses an absolute URL.
279 * Return value: a CamelURL, or %NULL.
282 camel_url_new (const char *url_string, CamelException *ex)
284 CamelURL *url = camel_url_new_with_base (NULL, url_string);
286 if (!url->protocol) {
287 camel_url_free (url);
288 camel_exception_setv (ex, CAMEL_EXCEPTION_SERVICE_URL_INVALID,
289 _("Could not parse URL `%s'"),
297 * camel_url_to_string:
299 * @flags: additional translation options.
301 * Return value: a string representing @url, which the caller must free.
304 camel_url_to_string (CamelURL *url, guint32 flags)
307 char *enc, *return_result;
309 /* IF YOU CHANGE ANYTHING IN THIS FUNCTION, RUN
310 * tests/misc/url AFTERWARD.
313 str = g_string_sized_new (20);
316 g_string_sprintfa (str, "%s:", url->protocol);
318 g_string_append (str, "//");
320 enc = camel_url_encode (url->user, TRUE, ":;@/");
321 g_string_append (str, enc);
324 if (url->authmech && *url->authmech) {
325 enc = camel_url_encode (url->authmech, TRUE, ":@/");
326 g_string_sprintfa (str, ";auth=%s", enc);
329 if (url->passwd && !(flags & CAMEL_URL_HIDE_PASSWORD)) {
330 enc = camel_url_encode (url->passwd, TRUE, "@/");
331 g_string_sprintfa (str, ":%s", enc);
335 enc = camel_url_encode (url->host, TRUE, ":/");
336 g_string_sprintfa (str, "%s%s", url->user ? "@" : "", enc);
340 g_string_sprintfa (str, ":%d", url->port);
341 if (!url->path && (url->params || url->query || url->fragment))
342 g_string_append_c (str, '/');
346 enc = camel_url_encode (url->path, FALSE, ";?#");
347 g_string_sprintfa (str, "%s", enc);
350 if (url->params && !(flags & CAMEL_URL_HIDE_PARAMS))
351 g_datalist_foreach (&url->params, output_param, str);
353 enc = camel_url_encode (url->query, FALSE, "#");
354 g_string_sprintfa (str, "?%s", enc);
358 enc = camel_url_encode (url->fragment, FALSE, NULL);
359 g_string_sprintfa (str, "#%s", enc);
363 return_result = str->str;
364 g_string_free (str, FALSE);
365 return return_result;
369 output_param (GQuark key_id, gpointer data, gpointer user_data)
371 GString *str = user_data;
374 enc = camel_url_encode (g_quark_to_string (key_id), FALSE, "?#");
375 g_string_sprintfa (str, ";%s", enc);
378 enc = camel_url_encode (data, FALSE, "?#");
379 g_string_sprintfa (str, "=%s", enc);
391 camel_url_free (CamelURL *url)
394 g_free (url->protocol);
396 g_free (url->authmech);
397 g_free (url->passwd);
400 g_datalist_clear (&url->params);
402 g_free (url->fragment);
409 #define DEFINE_CAMEL_URL_SET(part) \
411 camel_url_set_##part (CamelURL *url, const char *part) \
413 g_free (url->part); \
414 url->part = g_strdup (part); \
417 DEFINE_CAMEL_URL_SET (protocol)
418 DEFINE_CAMEL_URL_SET (user)
419 DEFINE_CAMEL_URL_SET (authmech)
420 DEFINE_CAMEL_URL_SET (passwd)
421 DEFINE_CAMEL_URL_SET (host)
422 DEFINE_CAMEL_URL_SET (path)
423 DEFINE_CAMEL_URL_SET (query)
424 DEFINE_CAMEL_URL_SET (fragment)
427 camel_url_set_port (CamelURL *url, int port)
433 camel_url_set_param (CamelURL *url, const char *name, const char *value)
435 g_datalist_set_data_full (&url->params, name, value ? g_strdup (value) : NULL, g_free);
439 camel_url_get_param (CamelURL *url, const char *name)
441 return g_datalist_get_data (&url->params, name);
448 * @escape_unsafe: whether or not to %-escape "unsafe" characters.
450 * @escape_extra: additional characters to escape.
452 * This %-encodes the given URL part and returns the escaped version
453 * in allocated memory, which the caller must free when it is done.
456 camel_url_encode (const char *part, gboolean escape_unsafe,
457 const char *escape_extra)
461 /* worst case scenario = 3 times the initial */
462 p = work = g_malloc (3 * strlen (part) + 1);
465 if (((guchar) *part >= 127) || ((guchar) *part <= ' ') ||
466 (escape_unsafe && strchr ("\"%#<>{}|\\^~[]`", *part)) ||
467 (escape_extra && strchr (escape_extra, *part))) {
468 sprintf (p, "%%%.02hX", (guchar) *part++);
478 #define HEXVAL(c) (isdigit (c) ? (c) - '0' : tolower (c) - 'a' + 10)
484 * %-decodes the passed-in URL *in place*. The decoded version is
485 * never longer than the encoded version, so there does not need to
486 * be any additional space at the end of the string.
489 camel_url_decode (char *part)
493 s = d = (guchar *)part;
496 if (isxdigit (s[1]) && isxdigit (s[2])) {
497 *d++ = HEXVAL (s[1]) * 16 + HEXVAL (s[2]);
508 camel_url_hash (const void *v)
510 const CamelURL *u = v;
513 #define ADD_HASH(s) if (s) hash ^= g_str_hash (s);
515 ADD_HASH (u->protocol);
517 ADD_HASH (u->authmech);
527 check_equal (char *s1, char *s2)
539 return strcmp (s1, s2) == 0;
543 camel_url_equal(const void *v, const void *v2)
545 const CamelURL *u1 = v, *u2 = v2;
547 return check_equal(u1->protocol, u2->protocol)
548 && check_equal(u1->user, u2->user)
549 && check_equal(u1->authmech, u2->authmech)
550 && check_equal(u1->host, u2->host)
551 && check_equal(u1->path, u2->path)
552 && check_equal(u1->query, u2->query)
553 && u1->port == u2->port;