1 /* -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
2 /* soup-uri.c : utility functions to parse URLs */
5 * Copyright 1999-2003 Ximian, Inc.
14 static inline SoupProtocol
15 soup_uri_get_protocol (const char *proto, int len)
19 g_return_val_if_fail (len < sizeof (proto_buf), 0);
21 memcpy (proto_buf, proto, len);
22 proto_buf[len] = '\0';
23 return g_quark_from_string (proto_buf);
26 static inline const char *
27 soup_protocol_name (SoupProtocol proto)
29 return g_quark_to_string (proto);
33 soup_protocol_default_port (SoupProtocol proto)
35 if (proto == SOUP_PROTOCOL_HTTP)
37 else if (proto == SOUP_PROTOCOL_HTTPS)
43 static void append_uri_encoded (GString *str, const char *in, const char *extra_enc_chars);
46 * soup_uri_new_with_base:
48 * @uri_string: the URI
50 * Parses @uri_string relative to @base.
52 * Return value: a parsed #SoupUri.
55 soup_uri_new_with_base (const SoupUri *base, const char *uri_string)
58 const char *end, *hash, *colon, *semi, *at, *slash, *question;
61 uri = g_new0 (SoupUri, 1);
63 /* See RFC2396 for details. IF YOU CHANGE ANYTHING IN THIS
64 * FUNCTION, RUN tests/uri-parsing AFTERWARDS.
68 end = hash = strchr (uri_string, '#');
69 if (hash && hash[1]) {
70 uri->fragment = g_strdup (hash + 1);
71 soup_uri_decode (uri->fragment);
73 end = uri_string + strlen (uri_string);
75 /* Find protocol: initial [a-z+.-]* substring until ":" */
77 while (p < end && (isalnum ((unsigned char)*p) ||
78 *p == '.' || *p == '+' || *p == '-'))
81 if (p > uri_string && *p == ':') {
82 uri->protocol = soup_uri_get_protocol (uri_string, p - uri_string);
90 if (!*uri_string && !base)
93 /* Check for authority */
94 if (strncmp (uri_string, "//", 2) == 0) {
97 slash = uri_string + strcspn (uri_string, "/#");
98 at = strchr (uri_string, '@');
99 if (at && at < slash) {
100 colon = strchr (uri_string, ':');
101 if (colon && colon < at) {
102 uri->passwd = g_strndup (colon + 1,
104 soup_uri_decode (uri->passwd);
110 semi = strchr (uri_string, ';');
111 if (semi && semi < colon &&
112 !strncasecmp (semi, ";auth=", 6)) {
113 uri->authmech = g_strndup (semi + 6,
115 soup_uri_decode (uri->authmech);
117 uri->authmech = NULL;
121 uri->user = g_strndup (uri_string, semi - uri_string);
122 soup_uri_decode (uri->user);
125 uri->user = uri->passwd = uri->authmech = NULL;
127 /* Find host and port. */
128 colon = strchr (uri_string, ':');
129 if (colon && colon < slash) {
130 uri->host = g_strndup (uri_string, colon - uri_string);
131 uri->port = strtoul (colon + 1, NULL, 10);
133 uri->host = g_strndup (uri_string, slash - uri_string);
134 soup_uri_decode (uri->host);
141 question = memchr (uri_string, '?', end - uri_string);
144 uri->query = g_strndup (question + 1,
145 end - (question + 1));
146 soup_uri_decode (uri->query);
151 if (end != uri_string) {
152 uri->path = g_strndup (uri_string, end - uri_string);
153 soup_uri_decode (uri->path);
156 /* Apply base URI. Again, this is spelled out in RFC 2396. */
157 if (base && !uri->protocol && uri->host)
158 uri->protocol = base->protocol;
159 else if (base && !uri->protocol) {
160 uri->protocol = base->protocol;
161 uri->user = g_strdup (base->user);
162 uri->authmech = g_strdup (base->authmech);
163 uri->passwd = g_strdup (base->passwd);
164 uri->host = g_strdup (base->host);
165 uri->port = base->port;
169 uri->path = g_strdup ("");
171 uri->path = g_strdup (base->path);
172 uri->query = g_strdup (base->query);
176 if (*uri->path != '/') {
177 char *newpath, *last, *p, *q;
179 last = strrchr (base->path, '/');
181 newpath = g_strdup_printf ("%.*s/%s",
186 newpath = g_strdup_printf ("/%s", uri->path);
188 /* Remove "./" where "." is a complete segment. */
189 for (p = newpath + 1; *p; ) {
190 if (*(p - 1) == '/' &&
191 *p == '.' && *(p + 1) == '/')
192 memmove (p, p + 2, strlen (p + 2) + 1);
196 /* Remove "." at end. */
197 if (p > newpath + 2 &&
198 *(p - 1) == '.' && *(p - 2) == '/')
200 /* Remove "<segment>/../" where <segment> != ".." */
201 for (p = newpath + 1; *p; ) {
202 if (!strncmp (p, "../", 3)) {
206 q = strchr (p + 1, '/');
209 if (strncmp (q, "/../", 4) != 0) {
213 memmove (p, q + 4, strlen (q + 4) + 1);
216 /* Remove "<segment>/.." at end where <segment> != ".." */
217 q = strrchr (newpath, '/');
218 if (q && !strcmp (q, "/..")) {
220 while (p > newpath && *p != '/')
222 if (strncmp (p, "/../", 4) != 0)
232 uri->port = soup_protocol_default_port (uri->protocol);
234 uri->path = g_strdup ("");
243 * Parses an absolute URI.
245 * Return value: a #SoupUri, or %NULL.
248 soup_uri_new (const char *uri_string)
252 uri = soup_uri_new_with_base (NULL, uri_string);
255 if (!uri->protocol) {
264 * soup_uri_to_string:
266 * @just_path: if %TRUE, output just the path and query portions
268 * Return value: a string representing @uri, which the caller must free.
271 soup_uri_to_string (const SoupUri *uri, gboolean just_path)
276 /* IF YOU CHANGE ANYTHING IN THIS FUNCTION, RUN
277 * tests/uri-parsing AFTERWARD.
280 str = g_string_sized_new (20);
282 if (uri->protocol && !just_path)
283 g_string_sprintfa (str, "%s:", soup_protocol_name (uri->protocol));
284 if (uri->host && !just_path) {
285 g_string_append (str, "//");
287 append_uri_encoded (str, uri->user, ":;@/");
288 if (uri->authmech && *uri->authmech) {
289 g_string_append (str, ";auth=");
290 append_uri_encoded (str, uri->authmech, ":@/");
292 g_string_append_c (str, '@');
294 append_uri_encoded (str, uri->host, ":/");
295 if (uri->port && uri->port != soup_protocol_default_port (uri->protocol))
296 g_string_append_printf (str, ":%d", uri->port);
297 if (!uri->path && (uri->query || uri->fragment))
298 g_string_append_c (str, '/');
301 if (uri->path && *uri->path)
302 append_uri_encoded (str, uri->path, "?");
304 g_string_append_c (str, '/');
307 g_string_append_c (str, '?');
308 append_uri_encoded (str, uri->query, NULL);
310 if (uri->fragment && !just_path) {
311 g_string_append_c (str, '#');
312 append_uri_encoded (str, uri->fragment, NULL);
315 return_result = str->str;
316 g_string_free (str, FALSE);
318 return return_result;
322 soup_uri_copy (const SoupUri *uri)
326 g_return_val_if_fail (uri != NULL, NULL);
328 dup = g_new0 (SoupUri, 1);
329 dup->protocol = uri->protocol;
330 dup->user = g_strdup (uri->user);
331 dup->authmech = g_strdup (uri->authmech);
332 dup->passwd = g_strdup (uri->passwd);
333 dup->host = g_strdup (uri->host);
334 dup->port = uri->port;
335 dup->path = g_strdup (uri->path);
336 dup->query = g_strdup (uri->query);
337 dup->fragment = g_strdup (uri->fragment);
343 soup_uri_copy_root (const SoupUri *uri)
347 g_return_val_if_fail (uri != NULL, NULL);
349 dup = g_new0 (SoupUri, 1);
350 dup->protocol = uri->protocol;
351 dup->host = g_strdup (uri->host);
352 dup->port = uri->port;
357 static inline gboolean
358 parts_equal (const char *one, const char *two)
364 return !strcmp (one, two);
368 soup_uri_equal (const SoupUri *u1, const SoupUri *u2)
370 if (u1->protocol != u2->protocol ||
371 u1->port != u2->port ||
372 !parts_equal (u1->user, u2->user) ||
373 !parts_equal (u1->authmech, u2->authmech) ||
374 !parts_equal (u1->passwd, u2->passwd) ||
375 !parts_equal (u1->host, u2->host) ||
376 !parts_equal (u1->path, u2->path) ||
377 !parts_equal (u1->query, u2->query) ||
378 !parts_equal (u1->fragment, u2->fragment))
385 soup_uri_free (SoupUri *uri)
387 g_return_if_fail (uri != NULL);
390 g_free (uri->authmech);
391 g_free (uri->passwd);
395 g_free (uri->fragment);
401 soup_uri_set_auth (SoupUri *uri,
404 const char *authmech)
406 g_return_if_fail (uri != NULL);
409 g_free (uri->passwd);
410 g_free (uri->authmech);
412 uri->user = g_strdup (user);
413 uri->passwd = g_strdup (passwd);
414 uri->authmech = g_strdup (authmech);
417 /* From RFC 2396 2.4.3, the characters that should always be encoded */
418 static const char uri_encoded_char[] = {
419 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x00 - 0x0f */
420 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x10 - 0x1f */
421 1, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* ' ' - '/' */
422 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, /* '0' - '?' */
423 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* '@' - 'O' */
424 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, /* 'P' - '_' */
425 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* '`' - 'o' */
426 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 1, /* 'p' - 0x7f */
427 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
428 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
429 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
430 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
431 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
432 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
433 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
434 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
438 append_uri_encoded (GString *str, const char *in, const char *extra_enc_chars)
440 const unsigned char *s = (const unsigned char *)in;
443 if (uri_encoded_char[*s] ||
444 (extra_enc_chars && strchr (extra_enc_chars, *s)))
445 g_string_append_printf (str, "%%%02x", (int)*s++);
447 g_string_append_c (str, *s++);
454 * @escape_extra: additional characters beyond " \"%#<>{}|\^[]`"
455 * to escape (or %NULL)
457 * This %-encodes the given URI part and returns the escaped version
458 * in allocated memory, which the caller must free when it is done.
461 soup_uri_encode (const char *part, const char *escape_extra)
466 str = g_string_new (NULL);
467 append_uri_encoded (str, part, escape_extra);
469 g_string_free (str, FALSE);
478 * %-decodes the passed-in URI *in place*. The decoded version is
479 * never longer than the encoded version, so there does not need to
480 * be any additional space at the end of the string.
483 soup_uri_decode (char *part)
485 unsigned char *s, *d;
487 #define XDIGIT(c) ((c) <= '9' ? (c) - '0' : ((c) & 0x4F) - 'A' + 10)
489 s = d = (unsigned char *)part;
491 if (*s == '%' && s[1] && s[2]) {
492 *d++ = (XDIGIT (s[1]) << 4) + XDIGIT (s[2]);
500 soup_uri_uses_default_port (const SoupUri *uri)
502 return uri->port == soup_protocol_default_port (uri->protocol);