1 /* -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
3 * Copyright (C) 2001-2003, Ximian, Inc.
15 #include <libsoup/soup.h>
19 #define mkdir(path, mode) _mkdir (path)
24 gboolean recurse = FALSE, debug = FALSE;
29 GHashTable *fetched_urls;
32 find_hrefs (SoupURI *base, const char *body, int length)
34 GPtrArray *hrefs = g_ptr_array_new ();
35 char *buf = g_strndup (body, length);
36 char *start = buf, *end;
40 while ((start = strstr (start, "href"))) {
42 while (isspace ((unsigned char) *start))
46 while (isspace ((unsigned char) *start))
51 end = strchr (start, '"');
55 href = g_strndup (start, end - start);
57 frag = strchr (href, '#');
61 uri = soup_uri_new_with_base (base, href);
66 if (base->scheme != uri->scheme ||
67 base->port != uri->port ||
68 g_ascii_strcasecmp (base->host, uri->host) != 0) {
73 if (strncmp (base->path, uri->path, strlen (base->path)) != 0) {
78 g_ptr_array_add (hrefs, soup_uri_to_string (uri, FALSE));
87 mkdirs (const char *path)
91 for (slash = strchr (path, '/'); slash; slash = strchr (slash + 1, '/')) {
93 if (*path && mkdir (path, 0755) == -1 && errno != EEXIST) {
94 fprintf (stderr, "Could not create '%s'\n", path);
95 g_main_loop_quit (loop);
103 get_url (const char *url)
105 char *url_to_get, *slash, *name;
112 if (strncmp (url, base, strlen (base)) != 0)
114 if (strchr (url, '?') && strcmp (url, base) != 0)
117 slash = strrchr (url, '/');
118 if (slash && !slash[1])
119 url_to_get = g_strdup_printf ("%sindex.html", url);
121 url_to_get = g_strdup (url);
123 if (g_hash_table_lookup (fetched_urls, url_to_get))
125 g_hash_table_insert (fetched_urls, url_to_get, url_to_get);
128 /* See if we're already downloading it, and create the
132 name = url_to_get + strlen (base);
135 if (access (name, F_OK) == 0)
139 fd = open (name, O_WRONLY | O_CREAT | O_TRUNC, 0644);
143 msg = soup_message_new (method, url_to_get);
144 soup_message_set_flags (msg, SOUP_MESSAGE_NO_REDIRECT);
146 soup_session_send_message (session, msg);
148 name = soup_message_get_uri (msg)->path;
149 if (strncmp (base_uri->path, name, strlen (base_uri->path)) != 0) {
150 fprintf (stderr, " Error: not under %s\n", base_uri->path);
155 SoupMessageHeadersIter iter;
156 const char *name, *value;
157 char *path = soup_uri_to_string (soup_message_get_uri (msg), TRUE);
159 printf ("%s %s HTTP/1.%d\n\n", method, path,
160 soup_message_get_http_version (msg));
161 printf ("HTTP/1.%d %d %s\n",
162 soup_message_get_http_version (msg),
163 msg->status_code, msg->reason_phrase);
165 soup_message_headers_iter_init (&iter, msg->response_headers);
166 while (soup_message_headers_iter_next (&iter, &name, &value))
167 printf ("%s: %s\r\n", name, value);
170 printf ("%s: %d %s\n", name, msg->status_code, msg->reason_phrase);
172 name += strlen (base_uri->path);
176 if (SOUP_STATUS_IS_REDIRECTION (msg->status_code)) {
179 header = soup_message_headers_get (msg->response_headers, "Location");
182 printf (" -> %s\n", header);
188 if (!SOUP_STATUS_IS_SUCCESSFUL (msg->status_code))
192 fd = open (name, O_WRONLY | O_CREAT | O_TRUNC, 0644);
195 write (fd, msg->response_body->data, msg->response_body->length);
200 header = soup_message_headers_get (msg->response_headers, "Content-Type");
201 if (header && g_ascii_strncasecmp (header, "text/html", 9) != 0)
204 uri = soup_uri_new (url);
205 hrefs = find_hrefs (uri, msg->response_body->data, msg->response_body->length);
207 for (i = 0; i < hrefs->len; i++) {
208 get_url (hrefs->pdata[i]);
209 g_free (hrefs->pdata[i]);
211 g_ptr_array_free (hrefs, TRUE);
217 fprintf (stderr, "Usage: get [-c CAfile] [-p proxy URL] [-r] [-h] [-d] URL\n");
222 main (int argc, char **argv)
224 const char *cafile = NULL;
225 SoupURI *proxy = NULL;
226 gboolean synchronous = FALSE;
230 g_thread_init (NULL);
232 method = SOUP_METHOD_GET;
234 while ((opt = getopt (argc, argv, "c:dhp:rs")) != -1) {
245 method = SOUP_METHOD_HEAD;
250 proxy = soup_uri_new (optarg);
252 fprintf (stderr, "Could not parse %s as URI\n",
277 base_uri = soup_uri_new (base);
279 fprintf (stderr, "Could not parse '%s' as a URL\n", base);
283 fetched_urls = g_hash_table_new (g_str_hash, g_str_equal);
286 session = soup_session_sync_new_with_options (
287 SOUP_SESSION_SSL_CA_FILE, cafile,
288 SOUP_SESSION_PROXY_URI, proxy,
289 SOUP_SESSION_USER_AGENT, "get ",
292 session = soup_session_async_new_with_options (
293 SOUP_SESSION_SSL_CA_FILE, cafile,
294 SOUP_SESSION_PROXY_URI, proxy,
295 SOUP_SESSION_USER_AGENT, "get ",
302 outdir = g_strdup_printf ("%lu", (unsigned long)getpid ());
303 if (mkdir (outdir, 0755) != 0) {
304 fprintf (stderr, "Could not make output directory\n");
307 printf ("Output directory is '%s'\n", outdir);
313 loop = g_main_loop_new (NULL, TRUE);
318 g_main_loop_unref (loop);
320 soup_uri_free (base_uri);