1 /* -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
2 /* soup-uri.c : utility functions to parse URLs */
5 * Copyright 1999-2003 Ximian, Inc.
14 static inline SoupProtocol
15 soup_uri_get_protocol (const char *proto, int len)
19 g_return_val_if_fail (len < sizeof (proto_buf), 0);
21 memcpy (proto_buf, proto, len);
22 proto_buf[len] = '\0';
23 return g_quark_from_string (proto_buf);
26 static inline const char *
27 soup_protocol_name (SoupProtocol proto)
29 return g_quark_to_string (proto);
33 soup_protocol_default_port (SoupProtocol proto)
35 if (proto == SOUP_PROTOCOL_HTTP)
37 else if (proto == SOUP_PROTOCOL_HTTPS)
43 static void append_uri_encoded (GString *str, const char *in, const char *extra_enc_chars);
46 * soup_uri_new_with_base:
48 * @uri_string: the URI
50 * Parses @uri_string relative to @base.
52 * Return value: a parsed #SoupUri.
55 soup_uri_new_with_base (const SoupUri *base, const char *uri_string)
58 const char *end, *hash, *colon, *semi, *at, *slash, *question;
61 uri = g_new0 (SoupUri, 1);
63 /* See RFC2396 for details. IF YOU CHANGE ANYTHING IN THIS
64 * FUNCTION, RUN tests/uri-parsing AFTERWARDS.
68 end = hash = strchr (uri_string, '#');
69 if (hash && hash[1]) {
70 uri->fragment = g_strdup (hash + 1);
71 soup_uri_decode (uri->fragment);
73 end = uri_string + strlen (uri_string);
75 /* Find protocol: initial [a-z+.-]* substring until ":" */
77 while (p < end && (isalnum ((unsigned char)*p) ||
78 *p == '.' || *p == '+' || *p == '-'))
81 if (p > uri_string && *p == ':') {
82 uri->protocol = soup_uri_get_protocol (uri_string, p - uri_string);
90 if (!*uri_string && !base)
93 /* Check for authority */
94 if (strncmp (uri_string, "//", 2) == 0) {
97 slash = uri_string + strcspn (uri_string, "/#");
98 at = strchr (uri_string, '@');
99 if (at && at < slash) {
100 colon = strchr (uri_string, ':');
101 if (colon && colon < at) {
102 uri->passwd = g_strndup (colon + 1,
104 soup_uri_decode (uri->passwd);
110 semi = strchr (uri_string, ';');
111 if (semi && semi < colon &&
112 !strncasecmp (semi, ";auth=", 6)) {
113 uri->authmech = g_strndup (semi + 6,
115 soup_uri_decode (uri->authmech);
117 uri->authmech = NULL;
121 uri->user = g_strndup (uri_string, semi - uri_string);
122 soup_uri_decode (uri->user);
125 uri->user = uri->passwd = uri->authmech = NULL;
127 /* Find host and port. */
128 colon = strchr (uri_string, ':');
129 if (colon && colon < slash) {
130 uri->host = g_strndup (uri_string, colon - uri_string);
131 uri->port = strtoul (colon + 1, NULL, 10);
133 uri->host = g_strndup (uri_string, slash - uri_string);
134 soup_uri_decode (uri->host);
141 question = memchr (uri_string, '?', end - uri_string);
144 uri->query = g_strndup (question + 1,
145 end - (question + 1));
146 soup_uri_decode (uri->query);
151 if (end != uri_string) {
152 uri->path = g_strndup (uri_string, end - uri_string);
153 soup_uri_decode (uri->path);
156 /* Apply base URI. Again, this is spelled out in RFC 2396. */
157 if (base && !uri->protocol && uri->host)
158 uri->protocol = base->protocol;
159 else if (base && !uri->protocol) {
160 uri->protocol = base->protocol;
161 uri->user = g_strdup (base->user);
162 uri->authmech = g_strdup (base->authmech);
163 uri->passwd = g_strdup (base->passwd);
164 uri->host = g_strdup (base->host);
165 uri->port = base->port;
169 uri->path = g_strdup ("");
171 uri->path = g_strdup (base->path);
172 uri->query = g_strdup (base->query);
176 if (*uri->path != '/') {
177 char *newpath, *last, *p, *q;
179 last = strrchr (base->path, '/');
181 newpath = g_strdup_printf ("%.*s/%s",
186 newpath = g_strdup_printf ("/%s", uri->path);
188 /* Remove "./" where "." is a complete segment. */
189 for (p = newpath + 1; *p; ) {
190 if (*(p - 1) == '/' &&
191 *p == '.' && *(p + 1) == '/')
192 memmove (p, p + 2, strlen (p + 2) + 1);
196 /* Remove "." at end. */
197 if (p > newpath + 2 &&
198 *(p - 1) == '.' && *(p - 2) == '/')
200 /* Remove "<segment>/../" where <segment> != ".." */
201 for (p = newpath + 1; *p; ) {
202 if (!strncmp (p, "../", 3)) {
206 q = strchr (p + 1, '/');
209 if (strncmp (q, "/../", 4) != 0) {
213 memmove (p, q + 4, strlen (q + 4) + 1);
216 /* Remove "<segment>/.." at end where <segment> != ".." */
217 q = strrchr (newpath, '/');
218 if (q && !strcmp (q, "/..")) {
220 while (p > newpath && *p != '/')
222 if (strncmp (p, "/../", 4) != 0)
232 uri->port = soup_protocol_default_port (uri->protocol);
234 uri->path = g_strdup ("");
243 * Parses an absolute URI.
245 * Return value: a #SoupUri, or %NULL.
248 soup_uri_new (const char *uri_string)
252 uri = soup_uri_new_with_base (NULL, uri_string);
255 if (!uri->protocol) {
264 * soup_uri_to_string:
266 * @just_path: if %TRUE, output just the path and query portions
268 * Return value: a string representing @uri, which the caller must free.
271 soup_uri_to_string (const SoupUri *uri, gboolean just_path)
276 /* IF YOU CHANGE ANYTHING IN THIS FUNCTION, RUN
277 * tests/uri-parsing AFTERWARD.
280 str = g_string_sized_new (20);
282 if (uri->protocol && !just_path)
283 g_string_sprintfa (str, "%s:", soup_protocol_name (uri->protocol));
284 if (uri->host && !just_path) {
285 g_string_append (str, "//");
287 append_uri_encoded (str, uri->user, ":;@/");
288 if (uri->authmech && *uri->authmech) {
289 g_string_append (str, ";auth=");
290 append_uri_encoded (str, uri->authmech, ":@/");
292 g_string_append_c (str, '@');
294 append_uri_encoded (str, uri->host, ":/");
295 if (uri->port && uri->port != soup_protocol_default_port (uri->protocol))
296 g_string_append_printf (str, ":%d", uri->port);
297 if (!uri->path && (uri->query || uri->fragment))
298 g_string_append_c (str, '/');
301 if (uri->path && *uri->path)
302 append_uri_encoded (str, uri->path, "?");
304 g_string_append_c (str, '/');
307 g_string_append_c (str, '?');
308 append_uri_encoded (str, uri->query, NULL);
310 if (uri->fragment && !just_path) {
311 g_string_append_c (str, '#');
312 append_uri_encoded (str, uri->fragment, NULL);
315 return_result = str->str;
316 g_string_free (str, FALSE);
318 return return_result;
322 soup_uri_copy (const SoupUri *uri)
326 g_return_val_if_fail (uri != NULL, NULL);
328 dup = g_new0 (SoupUri, 1);
329 dup->protocol = uri->protocol;
330 dup->user = g_strdup (uri->user);
331 dup->authmech = g_strdup (uri->authmech);
332 dup->passwd = g_strdup (uri->passwd);
333 dup->host = g_strdup (uri->host);
334 dup->port = uri->port;
335 dup->path = g_strdup (uri->path);
336 dup->query = g_strdup (uri->query);
337 dup->fragment = g_strdup (uri->fragment);
342 static inline gboolean
343 parts_equal (const char *one, const char *two)
349 return !strcmp (one, two);
353 soup_uri_equal (const SoupUri *u1, const SoupUri *u2)
355 if (u1->protocol != u2->protocol ||
356 u1->port != u2->port ||
357 !parts_equal (u1->user, u2->user) ||
358 !parts_equal (u1->authmech, u2->authmech) ||
359 !parts_equal (u1->passwd, u2->passwd) ||
360 !parts_equal (u1->host, u2->host) ||
361 !parts_equal (u1->path, u2->path) ||
362 !parts_equal (u1->query, u2->query) ||
363 !parts_equal (u1->fragment, u2->fragment))
370 soup_uri_free (SoupUri *uri)
372 g_return_if_fail (uri != NULL);
375 g_free (uri->authmech);
376 g_free (uri->passwd);
380 g_free (uri->fragment);
386 soup_uri_set_auth (SoupUri *uri,
389 const char *authmech)
391 g_return_if_fail (uri != NULL);
394 g_free (uri->passwd);
395 g_free (uri->authmech);
397 uri->user = g_strdup (user);
398 uri->passwd = g_strdup (passwd);
399 uri->authmech = g_strdup (authmech);
402 /* From RFC 2396 2.4.3, the characters that should always be encoded */
403 static const char uri_encoded_char[] = {
404 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x00 - 0x0f */
405 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x10 - 0x1f */
406 1, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* ' ' - '/' */
407 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, /* '0' - '?' */
408 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* '@' - 'O' */
409 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, /* 'P' - '_' */
410 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* '`' - 'o' */
411 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 1, /* 'p' - 0x7f */
412 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
413 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
414 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
415 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
416 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
417 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
418 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
419 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
423 append_uri_encoded (GString *str, const char *in, const char *extra_enc_chars)
425 const unsigned char *s = (const unsigned char *)in;
428 if (uri_encoded_char[*s] ||
429 (extra_enc_chars && strchr (extra_enc_chars, *s)))
430 g_string_append_printf (str, "%%%02x", (int)*s++);
432 g_string_append_c (str, *s++);
439 * @escape_extra: additional characters beyond " \"%#<>{}|\^[]`"
440 * to escape (or %NULL)
442 * This %-encodes the given URI part and returns the escaped version
443 * in allocated memory, which the caller must free when it is done.
446 soup_uri_encode (const char *part, const char *escape_extra)
451 str = g_string_new (NULL);
452 append_uri_encoded (str, part, escape_extra);
454 g_string_free (str, FALSE);
463 * %-decodes the passed-in URI *in place*. The decoded version is
464 * never longer than the encoded version, so there does not need to
465 * be any additional space at the end of the string.
468 soup_uri_decode (char *part)
470 unsigned char *s, *d;
472 #define XDIGIT(c) ((c) <= '9' ? (c) - '0' : ((c) & 0x4F) - 'A' + 10)
474 s = d = (unsigned char *)part;
476 if (*s == '%' && s[1] && s[2]) {
477 *d++ = (XDIGIT (s[1]) << 4) + XDIGIT (s[2]);