1 /* -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
2 /* soup-uri.c : utility functions to parse URLs */
6 * Bertrand Guiheneuf <bertrand@helixcode.com>
7 * Dan Winship <danw@helixcode.com>
9 * Copyright 1999, 2000 Helix Code, Inc. (http://www.helixcode.com)
11 * This program is free software; you can redistribute it and/or
12 * modify it under the terms of the GNU General Public License as
13 * published by the Free Software Foundation; either version 2 of the
14 * License, or (at your option) any later version.
16 * This program is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 * GNU General Public License for more details.
21 * You should have received a copy of the GNU General Public License
22 * along with this program; if not, write to the Free Software
23 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
30 * Here we deal with URLs following the general scheme:
31 * protocol://user;AUTH=mech:password@host:port/name
32 * where name is a path-like string (ie dir1/dir2/....) See RFC 1738
33 * for the complete description of Uniform Resource Locators. The
34 * ";AUTH=mech" addition comes from RFC 2384, "POP URL Scheme".
38 * recover the words between #'s or ?'s after the path
48 soup_uri_get_default_port (gchar *proto)
50 g_return_val_if_fail (proto != NULL, -1);
52 if (strcasecmp (proto, "http") == 0)
54 else if (strcasecmp (proto, "https") == 0)
56 else if (strcasecmp (proto, "mailto") == 0)
58 else if (strcasecmp (proto, "ftp") == 0)
65 * soup_uri_new: create a SoupUri object from a string
67 * @uri_string: The string containing the URL to scan
69 * This routine takes a gchar and parses it as a
71 * protocol://user;AUTH=mech:password@host:port/path?querystring
72 * There is no test on the values. For example,
73 * "port" can be a string, not only a number!
74 * The SoupUri structure fields are filled with
75 * the scan results. When a member of the
76 * general URL can not be found, the corresponding
77 * SoupUri member is NULL.
78 * Fields filled in the SoupUri structure are allocated
79 * and url_string is not modified.
81 * Return value: a SoupUri structure containing the URL items.
83 SoupUri *soup_uri_new (const gchar* uri_string)
86 char *semi, *colon, *at, *slash, *path, *query;
89 g_uri = g_new (SoupUri,1);
91 /* Find protocol: initial substring until "://" */
92 colon = strchr (uri_string, ':');
93 if (colon && !strncmp (colon, "://", 3)) {
94 g_uri->protocol = g_strndup (uri_string, colon - uri_string);
95 uri_string = colon + 3;
97 g_uri->protocol = NULL;
99 /* If there is an @ sign, look for user, authmech, and
100 * password before it.
102 at = strchr (uri_string, '@');
104 colon = strchr (uri_string, ':');
105 if (colon && colon < at)
106 g_uri->passwd = g_strndup (colon + 1, at - colon - 1);
108 g_uri->passwd = NULL;
112 semi = strchr(uri_string, ';');
113 if (semi && semi < colon && !strncasecmp (semi, ";auth=", 6))
114 g_uri->authmech = g_strndup (semi + 6, colon - semi - 6);
116 g_uri->authmech = NULL;
120 g_uri->user = g_strndup (uri_string, semi - uri_string);
123 g_uri->user = g_uri->passwd = g_uri->authmech = NULL;
125 /* Find host (required) and port. */
126 slash = strchr (uri_string, '/');
127 colon = strchr (uri_string, ':');
128 if (slash && colon > slash)
132 g_uri->host = g_strndup (uri_string, colon - uri_string);
134 g_uri->port = atoi(colon + 1);
136 g_uri->port = atoi(colon + 1);
138 g_uri->host = g_strndup (uri_string, slash - uri_string);
139 g_uri->port = soup_uri_get_default_port (g_uri->protocol);
141 g_uri->host = g_strdup (uri_string);
142 g_uri->port = soup_uri_get_default_port (g_uri->protocol);
145 /* setup a fallback, if relative, then empty string, else
146 it will be from root */
150 if (slash && *slash && g_uri->protocol == NULL)
153 split = g_strsplit(slash, " ", 0);
154 path = g_strjoinv("%20", split);
158 query = strchr (path, '?');
161 g_uri->path = g_strndup (path, query - path);
162 g_uri->querystring = g_strdup (++query);
166 g_uri->querystring = NULL;
172 /* Need to handle mailto which apparantly doesn't use the "//" after the : */
174 soup_uri_to_string (const SoupUri *uri, gboolean show_passwd)
176 if (uri->port != -1 &&
177 uri->port != soup_uri_get_default_port(uri->protocol))
178 return g_strdup_printf(
179 "%s%s%s%s%s%s%s%s%s:%d%s%s%s",
180 uri->protocol ? uri->protocol : "",
181 uri->protocol ? "://" : "",
182 uri->user ? uri->user : "",
183 uri->authmech ? ";auth=" : "",
184 uri->authmech ? uri->authmech : "",
185 uri->passwd && show_passwd ? ":" : "",
186 uri->passwd && show_passwd ? uri->passwd : "",
187 uri->user ? "@" : "",
190 uri->path ? uri->path : "",
191 uri->querystring ? "?" : "",
192 uri->querystring ? uri->querystring : "");
194 return g_strdup_printf(
195 "%s%s%s%s%s%s%s%s%s%s%s%s",
196 uri->protocol ? uri->protocol : "",
197 uri->protocol ? "://" : "",
198 uri->user ? uri->user : "",
199 uri->authmech ? ";auth=" : "",
200 uri->authmech ? uri->authmech : "",
201 uri->passwd && show_passwd ? ":" : "",
202 uri->passwd && show_passwd ? uri->passwd : "",
203 uri->user ? "@" : "",
205 uri->path ? uri->path : "",
206 uri->querystring ? "?" : "",
207 uri->querystring ? uri->querystring : "");
211 soup_uri_free (SoupUri *uri)
215 g_free (uri->protocol);
217 g_free (uri->authmech);
218 g_free (uri->passwd);
221 g_free (uri->querystring);
227 soup_debug_print_uri (SoupUri *uri)
229 g_return_if_fail (uri != NULL);
231 g_print ("Protocol: %s\n", uri->protocol);
232 g_print ("User: %s\n", uri->user);
233 g_print ("Authmech: %s\n", uri->authmech);
234 g_print ("Password: %s\n", uri->passwd);
235 g_print ("Host: %s\n", uri->host);
236 g_print ("Path: %s\n", uri->path);
237 g_print ("Querystr: %s\n", uri->querystring);