1 /* -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
2 /* camel-url.c : utility functions to parse URLs */
7 * Bertrand Guiheneuf <bertrand@helixcode.com>
8 * Dan Winship <danw@helixcode.com>
9 * Tiago Antà o <tiagoantao@bigfoot.com>
10 * Jeffrey Stedfast <fejj@helixcode.com>
12 * Copyright 1999, 2000 Helix Code, Inc. (http://www.helixcode.com)
14 * This program is free software; you can redistribute it and/or
15 * modify it under the terms of the GNU General Public License as
16 * published by the Free Software Foundation; either version 2 of the
17 * License, or (at your option) any later version.
19 * This program is distributed in the hope that it will be useful,
20 * but WITHOUT ANY WARRANTY; without even the implied warranty of
21 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
22 * GNU General Public License for more details.
24 * You should have received a copy of the GNU General Public License
25 * along with this program; if not, write to the Free Software
26 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
36 #include "camel-url.h"
37 #include "camel-mime-utils.h"
38 #include "camel-exception.h"
39 #include "camel-object.h"
42 * camel_url_new: create a CamelURL object from a string
43 * @url_string: The string containing the URL to scan
45 * This routine takes a string and parses it as a URL of the form:
47 * protocol://user;AUTH=mech:password@host:port/path
49 * The protocol, followed by a ":" is required. If it is followed by * "//",
50 * there must be an "authority" containing at least a host,
51 * which ends at the end of the string or at the next "/". If there
52 * is an "@" in the authority, there must be a username before it,
53 * and the host comes after it. The authmech, password, and port are
54 * optional, and the punctuation that preceeds them is omitted if
55 * they are. Everything after the authority (or everything after the
56 * protocol if there was no authority) is the path. We consider the
57 * "/" between the authority and the path to be part of the path,
58 * although this is incorrect according to RFC 1738.
60 * The port, if present, must be numeric.
62 * Return value: a CamelURL structure containing the URL items.
65 camel_url_new (const char *url_string, CamelException *ex)
68 char *semi, *colon, *at, *slash;
71 /* Find protocol: initial substring until ":" */
72 colon = strchr (url_string, ':');
74 camel_exception_setv (ex, CAMEL_EXCEPTION_SERVICE_URL_INVALID,
75 _("URL string `%s' contains no protocol"),
80 url = g_new0 (CamelURL, 1);
81 url->protocol = g_strndup (url_string, colon - url_string);
82 g_strdown (url->protocol);
87 if (!((*p >= 'a' && *p <= 'z') ||
88 (*p == '-') || (*p == '+') || (*p == '.'))) {
89 camel_exception_setv (ex, CAMEL_EXCEPTION_SERVICE_URL_INVALID,
90 _("URL string `%s' contains an invalid protocol"),
97 if (strncmp (colon, "://", 3) != 0) {
99 url->path = g_strdup (colon + 1);
100 camel_url_decode (url->path);
105 url_string = colon + 3;
107 /* If there is an @ sign in the authority, look for user,
108 * authmech, and password before it.
110 slash = strchr (url_string, '/');
111 at = strchr (url_string, '@');
112 if (at && (!slash || at < slash)) {
113 colon = strchr (url_string, ':');
114 if (colon && colon < at) {
115 url->passwd = g_strndup (colon + 1, at - colon - 1);
116 camel_url_decode (url->passwd);
122 semi = strchr(url_string, ';');
123 if (semi && (semi < colon || (!colon && semi < at)) &&
124 !strncasecmp (semi, ";auth=", 6)) {
125 url->authmech = g_strndup (semi + 6,
127 camel_url_decode (url->authmech);
129 url->authmech = NULL;
133 url->user = g_strndup (url_string, semi - url_string);
134 camel_url_decode (url->user);
137 url->user = url->passwd = url->authmech = NULL;
139 /* Find host and port. */
140 slash = strchr (url_string, '/');
141 colon = strchr (url_string, ':');
142 if (slash && colon > slash)
146 url->host = g_strndup (url_string, colon - url_string);
147 url->port = strtoul (colon + 1, &colon, 10);
148 if (*colon && colon != slash) {
149 camel_exception_setv (ex, CAMEL_EXCEPTION_SERVICE_URL_INVALID,
150 _("Port number in URL `%s' is non-"
151 "numeric"), url_string);
152 camel_url_free (url);
156 url->host = g_strndup (url_string, slash - url_string);
157 camel_url_decode (url->host);
160 url->host = g_strdup (url_string);
161 camel_url_decode (url->host);
167 url->path = g_strdup (slash);
168 camel_url_decode (url->path);
174 camel_url_to_string (CamelURL *url, gboolean show_passwd)
177 char *user = NULL, *authmech = NULL, *passwd = NULL;
178 char *host = NULL, *path = NULL;
182 user = camel_url_encode (url->user, TRUE, ":;@/");
184 if (url->authmech && *url->authmech)
185 authmech = camel_url_encode (url->authmech, TRUE, ":@/");
187 if (show_passwd && url->passwd)
188 passwd = camel_url_encode (url->passwd, TRUE, "@/");
191 host = camel_url_encode (url->host, TRUE, ":/");
194 g_snprintf (port, sizeof (port), "%d", url->port);
199 path = camel_url_encode (url->path, FALSE, NULL);
201 return_result = g_strdup_printf ("%s:%s%s%s%s%s%s%s%s%s%s%s%s",
205 authmech ? ";auth=" : "",
206 authmech ? authmech : "",
208 passwd ? passwd : "",
213 path && host && *path != '/' ? "/" : "",
221 return return_result;
225 camel_url_free (CamelURL *url)
229 g_free (url->protocol);
231 g_free (url->authmech);
232 g_free (url->passwd);
239 void camel_url_set_protocol(CamelURL *url, const char *p)
241 g_free(url->protocol);
242 url->protocol = g_strdup(p);
245 void camel_url_set_host(CamelURL *url, const char *h)
248 url->host = g_strdup(h);
251 void camel_url_set_port(CamelURL *url, int port)
255 void camel_url_set_path(CamelURL *url, const char *p)
258 url->path = g_strdup(p);
265 * @escape_unsafe: whether or not to %-escape "unsafe" characters.
267 * @escape_extra: additional characters to escape.
269 * This %-encodes the given URL part and returns the escaped version
270 * in allocated memory, which the caller must free when it is done.
273 camel_url_encode (char *part, gboolean escape_unsafe, char *escape_extra)
277 /* worst case scenario = 3 times the initial */
278 p = work = g_malloc (3 * strlen (part) + 1);
281 if (((guchar) *part >= 127) || ((guchar) *part <= ' ') ||
282 (escape_unsafe && strchr ("\"%#<>{}|\\^~[]`", *part)) ||
283 (escape_extra && strchr (escape_extra, *part))) {
284 sprintf (p, "%%%.02hX", (guchar) *part++);
294 #define HEXVAL(c) (isdigit (c) ? (c) - '0' : tolower (c) - 'a' + 10)
300 * %-decodes the passed-in URL *in place*. The decoded version is
301 * never longer than the encoded version, so there does not need to
302 * be any additional space at the end of the string.
305 camel_url_decode (char *part)
309 s = d = (guchar *)part;
312 if (isxdigit (s[1]) && isxdigit (s[2])) {
313 *d++ = HEXVAL (s[1]) * 16 + HEXVAL (s[2]);
324 add_hash (guint *hash, char *s)
327 *hash ^= g_str_hash(s);
330 guint camel_url_hash (const void *v)
332 const CamelURL *u = v;
335 add_hash (&hash, u->protocol);
336 add_hash (&hash, u->user);
337 add_hash (&hash, u->authmech);
338 add_hash (&hash, u->host);
339 add_hash (&hash, u->path);
346 check_equal (char *s1, char *s2)
358 return strcmp (s1, s2) == 0;
361 int camel_url_equal(const void *v, const void *v2)
363 const CamelURL *u1 = v, *u2 = v2;
365 return check_equal(u1->protocol, u2->protocol)
366 && check_equal(u1->user, u2->user)
367 && check_equal(u1->authmech, u2->authmech)
368 && check_equal(u1->host, u2->host)
369 && check_equal(u1->path, u2->path)
370 && u1->port == u2->port;