1 /* -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
2 /* soup-uri.c : utility functions to parse URLs */
6 * Bertrand Guiheneuf <bertrand@helixcode.com>
7 * Dan Winship <danw@helixcode.com>
8 * Alex Graveley <alex@ximian.com>
10 * Copyright 1999, 2000 Helix Code, Inc. (http://www.helixcode.com)
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License as
14 * published by the Free Software Foundation; either version 2 of the
15 * License, or (at your option) any later version.
17 * This program is distributed in the hope that it will be useful,
18 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20 * GNU General Public License for more details.
22 * You should have received a copy of the GNU General Public License
23 * along with this program; if not, write to the Free Software
24 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
31 * Here we deal with URLs following the general scheme:
32 * protocol://user;AUTH=mech:password@host:port/name
33 * where name is a path-like string (ie dir1/dir2/....) See RFC 1738
34 * for the complete description of Uniform Resource Locators. The
35 * ";AUTH=mech" addition comes from RFC 2384, "POP URL Scheme".
39 * recover the words between #'s or ?'s after the path
47 #include "soup-misc.h"
55 SoupKnownProtocols known_protocols [] = {
56 { SOUP_PROTOCOL_HTTP, "http://", 80 },
57 { SOUP_PROTOCOL_HTTPS, "https://", 443 },
58 { SOUP_PROTOCOL_SMTP, "mailto:", 25 },
59 { SOUP_PROTOCOL_SOCKS4, "socks4://", -1 },
60 { SOUP_PROTOCOL_SOCKS5, "socks5://", -1 },
65 soup_uri_get_protocol (const gchar *proto, int *len)
67 SoupKnownProtocols *known = known_protocols;
69 while (known->proto) {
70 if (!g_strncasecmp (proto, known->str, strlen (known->str))) {
71 *len = strlen (known->str);
82 soup_uri_protocol_to_string (SoupProtocol proto)
84 SoupKnownProtocols *known = known_protocols;
86 while (known->proto) {
87 if (known->proto == proto) return known->str;
95 soup_uri_get_default_port (SoupProtocol proto)
97 SoupKnownProtocols *known = known_protocols;
99 while (known->proto) {
100 if (known->proto == proto) return known->port;
108 * soup_uri_new: create a SoupUri object from a string
109 * @uri_string: The string containing the URL to scan
111 * This routine takes a gchar and parses it as a
113 * protocol://user;AUTH=mech:password@host:port/path?querystring
114 * There is no test on the values. For example,
115 * "port" can be a string, not only a number!
116 * The SoupUri structure fields are filled with
117 * the scan results. When a member of the
118 * general URL can not be found, the corresponding
119 * SoupUri member is NULL.
120 * Fields filled in the SoupUri structure are allocated
121 * and url_string is not modified.
123 * Return value: a SoupUri structure containing the URL items.
126 soup_uri_new (const gchar* uri_string)
129 char *semi, *colon, *at, *slash, *path, *query = NULL;
132 g_uri = g_new0 (SoupUri,1);
134 /* Find protocol: initial substring until "://" */
135 colon = strchr (uri_string, ':');
138 g_uri->protocol = soup_uri_get_protocol (uri_string, &protolen);
139 uri_string += protolen;
142 /* Must have a protocol */
143 if (!g_uri->protocol) return NULL;
145 /* If there is an @ sign, look for user, authmech, and
146 * password before it.
148 at = strchr (uri_string, '@');
150 colon = strchr (uri_string, ':');
151 if (colon && colon < at)
152 g_uri->passwd = g_strndup (colon + 1, at - colon - 1);
154 g_uri->passwd = NULL;
158 semi = strchr(uri_string, ';');
159 if (semi && semi < colon && !g_strncasecmp (semi, ";auth=", 6))
160 g_uri->authmech = g_strndup (semi + 6,
163 g_uri->authmech = NULL;
167 g_uri->user = g_strndup (uri_string, semi - uri_string);
170 g_uri->user = g_uri->passwd = g_uri->authmech = NULL;
172 /* Find host (required) and port. */
173 slash = strchr (uri_string, '/');
174 colon = strchr (uri_string, ':');
175 if (slash && colon > slash)
179 g_uri->host = g_strndup (uri_string, colon - uri_string);
181 g_uri->port = atoi(colon + 1);
183 g_uri->port = atoi(colon + 1);
185 g_uri->host = g_strndup (uri_string, slash - uri_string);
186 g_uri->port = soup_uri_get_default_port (g_uri->protocol);
188 g_uri->host = g_strdup (uri_string);
189 g_uri->port = soup_uri_get_default_port (g_uri->protocol);
192 /* setup a fallback, if relative, then empty string, else
193 it will be from root */
197 if (slash && *slash && !g_uri->protocol)
200 split = g_strsplit(slash, " ", 0);
201 path = g_strjoinv("%20", split);
205 query = strchr (path, '?');
208 g_uri->path = g_strndup (path, query - path);
209 g_uri->querystring = g_strdup (++query);
210 g_uri->query_elems = g_strsplit (g_uri->querystring, "&", 0);
214 g_uri->querystring = NULL;
220 /* Need to handle mailto which apparantly doesn't use the "//" after the : */
222 soup_uri_to_string (const SoupUri *uri, gboolean show_passwd)
224 g_return_val_if_fail (uri != NULL, NULL);
226 if (uri->port != -1 &&
227 uri->port != soup_uri_get_default_port (uri->protocol))
228 return g_strdup_printf(
229 "%s%s%s%s%s%s%s%s:%d%s%s%s",
230 soup_uri_protocol_to_string (uri->protocol),
231 uri->user ? uri->user : "",
232 uri->authmech ? ";auth=" : "",
233 uri->authmech ? uri->authmech : "",
234 uri->passwd && show_passwd ? ":" : "",
235 uri->passwd && show_passwd ? uri->passwd : "",
236 uri->user ? "@" : "",
239 uri->path ? uri->path : "",
240 uri->querystring ? "?" : "",
241 uri->querystring ? uri->querystring : "");
243 return g_strdup_printf(
244 "%s%s%s%s%s%s%s%s%s%s%s",
245 soup_uri_protocol_to_string (uri->protocol),
246 uri->user ? uri->user : "",
247 uri->authmech ? ";auth=" : "",
248 uri->authmech ? uri->authmech : "",
249 uri->passwd && show_passwd ? ":" : "",
250 uri->passwd && show_passwd ? uri->passwd : "",
251 uri->user ? "@" : "",
253 uri->path ? uri->path : "",
254 uri->querystring ? "?" : "",
255 uri->querystring ? uri->querystring : "");
259 soup_uri_copy (const SoupUri* uri)
264 g_return_val_if_fail (uri != NULL, NULL);
266 uri_str = soup_uri_to_string (uri, TRUE);
267 dup = soup_uri_new (uri_str);
274 soup_uri_free (SoupUri *uri)
279 g_free (uri->authmech);
280 g_free (uri->passwd);
283 g_free (uri->querystring);
284 g_strfreev (uri->query_elems);
290 soup_debug_print_uri (SoupUri *uri)
292 g_return_if_fail (uri != NULL);
294 g_print ("Protocol: %s\n", soup_uri_protocol_to_string (uri->protocol));
295 g_print ("User: %s\n", uri->user);
296 g_print ("Authmech: %s\n", uri->authmech);
297 g_print ("Password: %s\n", uri->passwd);
298 g_print ("Host: %s\n", uri->host);
299 g_print ("Path: %s\n", uri->path);
300 g_print ("Querystr: %s\n", uri->querystring);