1 /* -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
2 /* camel-url.c : utility functions to parse URLs */
6 * Dan Winship <danw@ximian.com>
7 * Jeffrey Stedfast <fejj@ximian.com>
9 * Copyright 1999-2001 Ximian, Inc. (www.ximian.com)
11 * This program is free software; you can redistribute it and/or
12 * modify it under the terms of version 2 of the GNU General Public
13 * License as published by the Free Software Foundation.
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
20 * You should have received a copy of the GNU General Public License
21 * along with this program; if not, write to the Free Software
22 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
35 #include "camel-url.h"
36 #include "camel-exception.h"
37 #include "camel-mime-utils.h"
38 #include "camel-object.h"
40 static void copy_param (GQuark key_id, gpointer data, gpointer user_data);
41 static void output_param (GQuark key_id, gpointer data, gpointer user_data);
43 static void append_url_encoded (GString *str, const char *in, const char *extra_enc_chars);
46 * camel_url_new_with_base:
48 * @url_string: the URL
50 * Parses @url_string relative to @base.
52 * Return value: a parsed CamelURL.
55 camel_url_new_with_base (CamelURL *base, const char *url_string)
58 const char *end, *hash, *colon, *semi, *at, *slash, *question;
61 url = g_new0 (CamelURL, 1);
63 /* See RFC1808 for details. IF YOU CHANGE ANYTHING IN THIS
64 * FUNCTION, RUN tests/misc/url AFTERWARDS.
68 end = hash = strchr (url_string, '#');
69 if (hash && hash[1]) {
70 url->fragment = g_strdup (hash + 1);
71 camel_url_decode (url->fragment);
73 end = url_string + strlen (url_string);
75 /* Find protocol: initial [a-z+.-]* substring until ":" */
77 while (p < end && (isalnum ((unsigned char)*p) ||
78 *p == '.' || *p == '+' || *p == '-'))
81 if (p > url_string && *p == ':') {
82 url->protocol = g_strndup (url_string, p - url_string);
83 camel_strdown (url->protocol);
87 if (!*url_string && !base)
90 /* Check for authority */
91 if (strncmp (url_string, "//", 2) == 0) {
94 slash = url_string + strcspn (url_string, "/#");
95 at = strchr (url_string, '@');
96 if (at && at < slash) {
97 colon = strchr (url_string, ':');
98 if (colon && colon < at) {
99 url->passwd = g_strndup (colon + 1,
101 camel_url_decode (url->passwd);
107 semi = strchr(url_string, ';');
108 if (semi && semi < colon &&
109 !strncasecmp (semi, ";auth=", 6)) {
110 url->authmech = g_strndup (semi + 6,
112 camel_url_decode (url->authmech);
114 url->authmech = NULL;
118 url->user = g_strndup (url_string, semi - url_string);
119 camel_url_decode (url->user);
122 url->user = url->passwd = url->authmech = NULL;
124 /* Find host and port. */
125 colon = strchr (url_string, ':');
126 if (colon && colon < slash) {
127 url->host = g_strndup (url_string, colon - url_string);
128 url->port = strtoul (colon + 1, NULL, 10);
130 url->host = g_strndup (url_string, slash - url_string);
131 camel_url_decode (url->host);
139 question = memchr (url_string, '?', end - url_string);
142 url->query = g_strndup (question + 1,
143 end - (question + 1));
144 camel_url_decode (url->query);
149 /* Find parameters */
150 semi = memchr (url_string, ';', end - url_string);
153 const char *cur, *p, *eq;
156 for (cur = semi + 1; cur < end; cur = p + 1) {
157 p = memchr (cur, ';', end - cur);
160 eq = memchr (cur, '=', p - cur);
162 name = g_strndup (cur, eq - cur);
163 value = g_strndup (eq + 1, p - (eq + 1));
164 camel_url_decode (value);
166 name = g_strndup (cur, p - cur);
167 value = g_strdup ("");
169 camel_url_decode (name);
170 g_datalist_set_data_full (&url->params, name,
178 if (end != url_string) {
179 url->path = g_strndup (url_string, end - url_string);
180 camel_url_decode (url->path);
183 /* Apply base URL. Again, this is spelled out in RFC 1808. */
184 if (base && !url->protocol && url->host)
185 url->protocol = g_strdup (base->protocol);
186 else if (base && !url->protocol) {
187 if (!url->user && !url->authmech && !url->passwd &&
188 !url->host && !url->port && !url->path &&
189 !url->params && !url->query && !url->fragment)
190 url->fragment = g_strdup (base->fragment);
192 url->protocol = g_strdup (base->protocol);
193 url->user = g_strdup (base->user);
194 url->authmech = g_strdup (base->authmech);
195 url->passwd = g_strdup (base->passwd);
196 url->host = g_strdup (base->host);
197 url->port = base->port;
200 url->path = g_strdup (base->path);
202 g_datalist_foreach (&base->params, copy_param,
205 url->query = g_strdup (base->query);
207 } else if (*url->path != '/') {
208 char *newpath, *last, *p, *q;
210 last = strrchr (base->path, '/');
212 newpath = g_strdup_printf ("%.*s/%s",
217 newpath = g_strdup_printf ("/%s", url->path);
219 /* Remove "./" where "." is a complete segment. */
220 for (p = newpath + 1; *p; ) {
221 if (*(p - 1) == '/' &&
222 *p == '.' && *(p + 1) == '/')
223 memmove (p, p + 2, strlen (p + 2) + 1);
227 /* Remove "." at end. */
228 if (p > newpath + 2 &&
229 *(p - 1) == '.' && *(p - 2) == '/')
231 /* Remove "<segment>/../" where <segment> != ".." */
232 for (p = newpath + 1; *p; ) {
233 if (!strncmp (p, "../", 3)) {
237 q = strchr (p + 1, '/');
240 if (strncmp (q, "/../", 4) != 0) {
244 memmove (p, q + 4, strlen (q + 4) + 1);
247 /* Remove "<segment>/.." at end */
248 q = strrchr (newpath, '/');
249 if (q && !strcmp (q, "/..")) {
251 while (p > newpath && *p != '/')
253 if (strncmp (p, "/../", 4) != 0)
265 copy_param (GQuark key_id, gpointer data, gpointer user_data)
267 GData **copy = user_data;
269 g_datalist_id_set_data_full (copy, key_id, g_strdup (data), g_free);
275 * @ex: a CamelException
277 * Parses an absolute URL.
279 * Return value: a CamelURL, or %NULL.
282 camel_url_new (const char *url_string, CamelException *ex)
284 CamelURL *url = camel_url_new_with_base (NULL, url_string);
286 if (!url->protocol) {
287 camel_url_free (url);
288 camel_exception_setv (ex, CAMEL_EXCEPTION_SERVICE_URL_INVALID,
289 _("Could not parse URL `%s'"),
297 * camel_url_to_string:
299 * @flags: additional translation options.
301 * Return value: a string representing @url, which the caller must free.
304 camel_url_to_string (CamelURL *url, guint32 flags)
309 /* IF YOU CHANGE ANYTHING IN THIS FUNCTION, RUN
310 * tests/misc/url AFTERWARD.
313 str = g_string_sized_new (20);
316 g_string_append_printf (str, "%s:", url->protocol);
319 g_string_append (str, "//");
321 append_url_encoded (str, url->user, ":;@/");
322 if (url->authmech && *url->authmech) {
323 g_string_append (str, ";auth=");
324 append_url_encoded (str, url->authmech, ":@/");
326 if (url->passwd && !(flags & CAMEL_URL_HIDE_PASSWORD)) {
327 g_string_append_c (str, ':');
328 append_url_encoded (str, url->passwd, "@/");
330 g_string_append_c (str, '@');
332 append_url_encoded (str, url->host, ":/");
334 g_string_append_printf (str, ":%d", url->port);
335 if (!url->path && (url->params || url->query || url->fragment))
336 g_string_append_c (str, '/');
340 append_url_encoded (str, url->path, ";?");
341 if (url->params && !(flags & CAMEL_URL_HIDE_PARAMS))
342 g_datalist_foreach (&url->params, output_param, str);
344 g_string_append_c (str, '?');
345 append_url_encoded (str, url->query, NULL);
348 g_string_append_c (str, '#');
349 append_url_encoded (str, url->fragment, NULL);
352 return_result = str->str;
353 g_string_free (str, FALSE);
355 return return_result;
359 output_param (GQuark key_id, gpointer data, gpointer user_data)
361 GString *str = user_data;
363 g_string_append_c (str, ';');
364 append_url_encoded (str, g_quark_to_string (key_id), "?=");
366 g_string_append_c (str, '=');
367 append_url_encoded (str, data, "?");
378 camel_url_free (CamelURL *url)
381 g_free (url->protocol);
383 g_free (url->authmech);
384 g_free (url->passwd);
387 g_datalist_clear (&url->params);
389 g_free (url->fragment);
396 #define DEFINE_CAMEL_URL_SET(part) \
398 camel_url_set_##part (CamelURL *url, const char *part) \
400 g_free (url->part); \
401 url->part = g_strdup (part); \
404 DEFINE_CAMEL_URL_SET (protocol)
405 DEFINE_CAMEL_URL_SET (user)
406 DEFINE_CAMEL_URL_SET (authmech)
407 DEFINE_CAMEL_URL_SET (passwd)
408 DEFINE_CAMEL_URL_SET (host)
409 DEFINE_CAMEL_URL_SET (path)
410 DEFINE_CAMEL_URL_SET (query)
411 DEFINE_CAMEL_URL_SET (fragment)
414 camel_url_set_port (CamelURL *url, int port)
420 camel_url_set_param (CamelURL *url, const char *name, const char *value)
422 g_datalist_set_data_full (&url->params, name, value ? g_strdup (value) : NULL, g_free);
426 camel_url_get_param (CamelURL *url, const char *name)
428 return g_datalist_get_data (&url->params, name);
431 /* From RFC 2396 2.4.3, the characters that should always be encoded */
432 static const char url_encoded_char[] = {
433 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x00 - 0x0f */
434 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x10 - 0x1f */
435 1, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* ' ' - '/' */
436 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, /* '0' - '?' */
437 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* '@' - 'O' */
438 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, /* 'P' - '_' */
439 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* '`' - 'o' */
440 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 1, /* 'p' - 0x7f */
441 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
442 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
443 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
444 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
445 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
446 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
447 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
448 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
452 append_url_encoded (GString *str, const char *in, const char *extra_enc_chars)
454 const unsigned char *s = (const unsigned char *)in;
457 if (url_encoded_char[*s] ||
458 (extra_enc_chars && strchr (extra_enc_chars, *s)))
459 g_string_append_printf (str, "%%%02x", (int)*s++);
461 g_string_append_c (str, *s++);
468 * @escape_extra: additional characters beyond " \"%#<>{}|\^[]`"
469 * to escape (or %NULL)
471 * This %-encodes the given URL part and returns the escaped version
472 * in allocated memory, which the caller must free when it is done.
475 camel_url_encode (const char *part, const char *escape_extra)
480 str = g_string_new (NULL);
481 append_url_encoded (str, part, escape_extra);
483 g_string_free (str, FALSE);
492 * %-decodes the passed-in URL *in place*. The decoded version is
493 * never longer than the encoded version, so there does not need to
494 * be any additional space at the end of the string.
497 camel_url_decode (char *part)
499 unsigned char *s, *d;
501 #define XDIGIT(c) ((c) <= '9' ? (c) - '0' : ((c) & 0x4F) - 'A' + 10)
503 s = d = (unsigned char *)part;
505 if (*s == '%' && s[1] && s[2]) {
506 *d++ = (XDIGIT (s[1]) << 4) + XDIGIT (s[2]);
515 camel_url_hash (const void *v)
517 const CamelURL *u = v;
520 #define ADD_HASH(s) if (s) hash ^= g_str_hash (s);
522 ADD_HASH (u->protocol);
524 ADD_HASH (u->authmech);
534 check_equal (char *s1, char *s2)
546 return strcmp (s1, s2) == 0;
550 camel_url_equal(const void *v, const void *v2)
552 const CamelURL *u1 = v, *u2 = v2;
554 return check_equal(u1->protocol, u2->protocol)
555 && check_equal(u1->user, u2->user)
556 && check_equal(u1->authmech, u2->authmech)
557 && check_equal(u1->host, u2->host)
558 && check_equal(u1->path, u2->path)
559 && check_equal(u1->query, u2->query)
560 && u1->port == u2->port;
564 camel_url_copy(const CamelURL *in)
568 out = g_malloc(sizeof(*out));
569 out->protocol = g_strdup(in->protocol);
570 out->user = g_strdup(in->user);
571 out->authmech = g_strdup(in->authmech);
572 out->passwd = g_strdup(in->passwd);
573 out->host = g_strdup(in->host);
574 out->port = in->port;
575 out->path = g_strdup(in->path);
578 g_datalist_foreach(&((CamelURL *)in)->params, copy_param, &out->params);
579 out->query = g_strdup(in->query);
580 out->fragment = g_strdup(in->fragment);