1 /* -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
2 /* camel-url.c : utility functions to parse URLs */
6 * Dan Winship <danw@ximian.com>
7 * Jeffrey Stedfast <fejj@ximian.com>
9 * Copyright 1999-2001 Ximian, Inc. (www.ximian.com)
11 * This program is free software; you can redistribute it and/or
12 * modify it under the terms of version 2 of the GNU General Public
13 * License as published by the Free Software Foundation.
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
20 * You should have received a copy of the GNU General Public License
21 * along with this program; if not, write to the Free Software
22 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
35 #include "camel-url.h"
36 #include "string-utils.h"
37 #include "camel-exception.h"
38 #include "camel-mime-utils.h"
39 #include "camel-object.h"
41 static void copy_param (GQuark key_id, gpointer data, gpointer user_data);
42 static void output_param (GQuark key_id, gpointer data, gpointer user_data);
44 static void append_url_encoded (GString *str, const char *in, const char *extra_enc_chars);
47 * camel_url_new_with_base:
49 * @url_string: the URL
51 * Parses @url_string relative to @base.
53 * Return value: a parsed CamelURL.
56 camel_url_new_with_base (CamelURL *base, const char *url_string)
59 const char *end, *hash, *colon, *semi, *at, *slash, *question;
62 url = g_new0 (CamelURL, 1);
64 /* See RFC1808 for details. IF YOU CHANGE ANYTHING IN THIS
65 * FUNCTION, RUN tests/misc/url AFTERWARDS.
69 end = hash = strchr (url_string, '#');
70 if (hash && hash[1]) {
71 url->fragment = g_strdup (hash + 1);
72 camel_url_decode (url->fragment);
74 end = url_string + strlen (url_string);
76 /* Find protocol: initial [a-z+.-]* substring until ":" */
78 while (p < end && (isalnum ((unsigned char)*p) ||
79 *p == '.' || *p == '+' || *p == '-'))
82 if (p > url_string && *p == ':') {
83 url->protocol = g_strndup (url_string, p - url_string);
84 camel_strdown (url->protocol);
88 if (!*url_string && !base)
91 /* Check for authority */
92 if (strncmp (url_string, "//", 2) == 0) {
95 slash = url_string + strcspn (url_string, "/#");
96 at = strchr (url_string, '@');
97 if (at && at < slash) {
98 colon = strchr (url_string, ':');
99 if (colon && colon < at) {
100 url->passwd = g_strndup (colon + 1,
102 camel_url_decode (url->passwd);
108 semi = strchr(url_string, ';');
109 if (semi && semi < colon &&
110 !strncasecmp (semi, ";auth=", 6)) {
111 url->authmech = g_strndup (semi + 6,
113 camel_url_decode (url->authmech);
115 url->authmech = NULL;
119 url->user = g_strndup (url_string, semi - url_string);
120 camel_url_decode (url->user);
123 url->user = url->passwd = url->authmech = NULL;
125 /* Find host and port. */
126 colon = strchr (url_string, ':');
127 if (colon && colon < slash) {
128 url->host = g_strndup (url_string, colon - url_string);
129 url->port = strtoul (colon + 1, NULL, 10);
131 url->host = g_strndup (url_string, slash - url_string);
132 camel_url_decode (url->host);
140 question = memchr (url_string, '?', end - url_string);
143 url->query = g_strndup (question + 1,
144 end - (question + 1));
145 camel_url_decode (url->query);
150 /* Find parameters */
151 semi = memchr (url_string, ';', end - url_string);
154 const char *cur, *p, *eq;
157 for (cur = semi + 1; cur < end; cur = p + 1) {
158 p = memchr (cur, ';', end - cur);
161 eq = memchr (cur, '=', p - cur);
163 name = g_strndup (cur, eq - cur);
164 value = g_strndup (eq + 1, p - (eq + 1));
165 camel_url_decode (value);
167 name = g_strndup (cur, p - cur);
168 value = g_strdup ("");
170 camel_url_decode (name);
171 g_datalist_set_data_full (&url->params, name,
179 if (end != url_string) {
180 url->path = g_strndup (url_string, end - url_string);
181 camel_url_decode (url->path);
184 /* Apply base URL. Again, this is spelled out in RFC 1808. */
185 if (base && !url->protocol && url->host)
186 url->protocol = g_strdup (base->protocol);
187 else if (base && !url->protocol) {
188 if (!url->user && !url->authmech && !url->passwd &&
189 !url->host && !url->port && !url->path &&
190 !url->params && !url->query && !url->fragment)
191 url->fragment = g_strdup (base->fragment);
193 url->protocol = g_strdup (base->protocol);
194 url->user = g_strdup (base->user);
195 url->authmech = g_strdup (base->authmech);
196 url->passwd = g_strdup (base->passwd);
197 url->host = g_strdup (base->host);
198 url->port = base->port;
201 url->path = g_strdup (base->path);
203 g_datalist_foreach (&base->params, copy_param,
206 url->query = g_strdup (base->query);
208 } else if (*url->path != '/') {
209 char *newpath, *last, *p, *q;
211 last = strrchr (base->path, '/');
213 newpath = g_strdup_printf ("%.*s/%s",
218 newpath = g_strdup_printf ("/%s", url->path);
220 /* Remove "./" where "." is a complete segment. */
221 for (p = newpath + 1; *p; ) {
222 if (*(p - 1) == '/' &&
223 *p == '.' && *(p + 1) == '/')
224 memmove (p, p + 2, strlen (p + 2) + 1);
228 /* Remove "." at end. */
229 if (p > newpath + 2 &&
230 *(p - 1) == '.' && *(p - 2) == '/')
232 /* Remove "<segment>/../" where <segment> != ".." */
233 for (p = newpath + 1; *p; ) {
234 if (!strncmp (p, "../", 3)) {
238 q = strchr (p + 1, '/');
241 if (strncmp (q, "/../", 4) != 0) {
245 memmove (p, q + 4, strlen (q + 4) + 1);
248 /* Remove "<segment>/.." at end */
249 q = strrchr (newpath, '/');
250 if (q && !strcmp (q, "/..")) {
252 while (p > newpath && *p != '/')
254 if (strncmp (p, "/../", 4) != 0)
266 copy_param (GQuark key_id, gpointer data, gpointer user_data)
268 GData **copy = user_data;
270 g_datalist_id_set_data_full (copy, key_id, g_strdup (data), g_free);
276 * @ex: a CamelException
278 * Parses an absolute URL.
280 * Return value: a CamelURL, or %NULL.
283 camel_url_new (const char *url_string, CamelException *ex)
285 CamelURL *url = camel_url_new_with_base (NULL, url_string);
287 if (!url->protocol) {
288 camel_url_free (url);
289 camel_exception_setv (ex, CAMEL_EXCEPTION_SERVICE_URL_INVALID,
290 _("Could not parse URL `%s'"),
298 * camel_url_to_string:
300 * @flags: additional translation options.
302 * Return value: a string representing @url, which the caller must free.
305 camel_url_to_string (CamelURL *url, guint32 flags)
310 /* IF YOU CHANGE ANYTHING IN THIS FUNCTION, RUN
311 * tests/misc/url AFTERWARD.
314 str = g_string_sized_new (20);
317 g_string_append_printf (str, "%s:", url->protocol);
320 g_string_append (str, "//");
322 append_url_encoded (str, url->user, ":;@/");
323 if (url->authmech && *url->authmech) {
324 g_string_append (str, ";auth=");
325 append_url_encoded (str, url->authmech, ":@/");
327 if (url->passwd && !(flags & CAMEL_URL_HIDE_PASSWORD)) {
328 g_string_append_c (str, ':');
329 append_url_encoded (str, url->passwd, "@/");
331 g_string_append_c (str, '@');
333 append_url_encoded (str, url->host, ":/");
335 g_string_append_printf (str, ":%d", url->port);
336 if (!url->path && (url->params || url->query || url->fragment))
337 g_string_append_c (str, '/');
341 append_url_encoded (str, url->path, ";?");
342 if (url->params && !(flags & CAMEL_URL_HIDE_PARAMS))
343 g_datalist_foreach (&url->params, output_param, str);
345 g_string_append_c (str, '?');
346 append_url_encoded (str, url->query, NULL);
349 g_string_append_c (str, '#');
350 append_url_encoded (str, url->fragment, NULL);
353 return_result = str->str;
354 g_string_free (str, FALSE);
356 return return_result;
360 output_param (GQuark key_id, gpointer data, gpointer user_data)
362 GString *str = user_data;
364 g_string_append_c (str, ';');
365 append_url_encoded (str, g_quark_to_string (key_id), "?=");
367 g_string_append_c (str, '=');
368 append_url_encoded (str, data, "?");
379 camel_url_free (CamelURL *url)
382 g_free (url->protocol);
384 g_free (url->authmech);
385 g_free (url->passwd);
388 g_datalist_clear (&url->params);
390 g_free (url->fragment);
397 #define DEFINE_CAMEL_URL_SET(part) \
399 camel_url_set_##part (CamelURL *url, const char *part) \
401 g_free (url->part); \
402 url->part = g_strdup (part); \
405 DEFINE_CAMEL_URL_SET (protocol)
406 DEFINE_CAMEL_URL_SET (user)
407 DEFINE_CAMEL_URL_SET (authmech)
408 DEFINE_CAMEL_URL_SET (passwd)
409 DEFINE_CAMEL_URL_SET (host)
410 DEFINE_CAMEL_URL_SET (path)
411 DEFINE_CAMEL_URL_SET (query)
412 DEFINE_CAMEL_URL_SET (fragment)
415 camel_url_set_port (CamelURL *url, int port)
421 camel_url_set_param (CamelURL *url, const char *name, const char *value)
423 g_datalist_set_data_full (&url->params, name, value ? g_strdup (value) : NULL, g_free);
427 camel_url_get_param (CamelURL *url, const char *name)
429 return g_datalist_get_data (&url->params, name);
432 /* From RFC 2396 2.4.3, the characters that should always be encoded */
433 static const char url_encoded_char[] = {
434 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x00 - 0x0f */
435 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x10 - 0x1f */
436 1, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* ' ' - '/' */
437 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, /* '0' - '?' */
438 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* '@' - 'O' */
439 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, /* 'P' - '_' */
440 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* '`' - 'o' */
441 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 1, /* 'p' - 0x7f */
442 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
443 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
444 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
445 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
446 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
447 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
448 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
449 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
453 append_url_encoded (GString *str, const char *in, const char *extra_enc_chars)
455 const unsigned char *s = (const unsigned char *)in;
458 if (url_encoded_char[*s] ||
459 (extra_enc_chars && strchr (extra_enc_chars, *s)))
460 g_string_append_printf (str, "%%%02x", (int)*s++);
462 g_string_append_c (str, *s++);
469 * @escape_extra: additional characters beyond " \"%#<>{}|\^[]`"
470 * to escape (or %NULL)
472 * This %-encodes the given URL part and returns the escaped version
473 * in allocated memory, which the caller must free when it is done.
476 camel_url_encode (const char *part, const char *escape_extra)
481 str = g_string_new (NULL);
482 append_url_encoded (str, part, escape_extra);
484 g_string_free (str, FALSE);
493 * %-decodes the passed-in URL *in place*. The decoded version is
494 * never longer than the encoded version, so there does not need to
495 * be any additional space at the end of the string.
498 camel_url_decode (char *part)
500 unsigned char *s, *d;
502 #define XDIGIT(c) ((c) <= '9' ? (c) - '0' : ((c) & 0x4F) - 'A' + 10)
504 s = d = (unsigned char *)part;
506 if (*s == '%' && s[1] && s[2]) {
507 *d++ = (XDIGIT (s[1]) << 4) + XDIGIT (s[2]);
516 camel_url_hash (const void *v)
518 const CamelURL *u = v;
521 #define ADD_HASH(s) if (s) hash ^= g_str_hash (s);
523 ADD_HASH (u->protocol);
525 ADD_HASH (u->authmech);
535 check_equal (char *s1, char *s2)
547 return strcmp (s1, s2) == 0;
551 camel_url_equal(const void *v, const void *v2)
553 const CamelURL *u1 = v, *u2 = v2;
555 return check_equal(u1->protocol, u2->protocol)
556 && check_equal(u1->user, u2->user)
557 && check_equal(u1->authmech, u2->authmech)
558 && check_equal(u1->host, u2->host)
559 && check_equal(u1->path, u2->path)
560 && check_equal(u1->query, u2->query)
561 && u1->port == u2->port;
565 camel_url_copy(const CamelURL *in)
569 out = g_malloc(sizeof(*out));
570 out->protocol = g_strdup(in->protocol);
571 out->user = g_strdup(in->user);
572 out->authmech = g_strdup(in->authmech);
573 out->passwd = g_strdup(in->passwd);
574 out->host = g_strdup(in->host);
575 out->port = in->port;
576 out->path = g_strdup(in->path);
579 g_datalist_foreach(&((CamelURL *)in)->params, copy_param, &out->params);
580 out->query = g_strdup(in->query);
581 out->fragment = g_strdup(in->fragment);