1 /* vi: set sw=4 ts=4: */
3 * wget - retrieve a file using HTTP or FTP
5 * Chip Rosenthal Covad Communications <chip@laserlink.net>
6 * Licensed under GPLv2, see file LICENSE in this source tree.
8 * Copyright (C) 2010 Bradley M. Kuhn <bkuhn@ebb.org>
9 * Kuhn's copyrights are licensed GPLv2-or-later. File as a whole remains GPLv2.
12 //usage:#define wget_trivial_usage
13 //usage: IF_FEATURE_WGET_LONG_OPTIONS(
14 //usage: "[-c|--continue] [-s|--spider] [-q|--quiet] [-O|--output-document FILE]\n"
15 //usage: " [--header 'header: value'] [-Y|--proxy on/off] [-P DIR]\n"
16 /* Since we ignore these opts, we don't show them in --help */
17 /* //usage: " [--no-check-certificate] [--no-cache]" */
18 //usage: " [-U|--user-agent AGENT]" IF_FEATURE_WGET_TIMEOUT(" [-T SEC]") " URL..."
20 //usage: IF_NOT_FEATURE_WGET_LONG_OPTIONS(
21 //usage: "[-csq] [-O FILE] [-Y on/off] [-P DIR] [-U AGENT]"
22 //usage: IF_FEATURE_WGET_TIMEOUT(" [-T SEC]") " URL..."
24 //usage:#define wget_full_usage "\n\n"
25 //usage: "Retrieve files via HTTP or FTP\n"
26 //usage: "\n -s Spider mode - only check file existence"
27 //usage: "\n -c Continue retrieval of aborted transfer"
28 //usage: "\n -q Quiet"
29 //usage: "\n -P DIR Save to DIR (default .)"
30 //usage: IF_FEATURE_WGET_TIMEOUT(
31 //usage: "\n -T SEC Network read timeout is SEC seconds"
33 //usage: "\n -O FILE Save to FILE ('-' for stdout)"
34 //usage: "\n -U STR Use STR for User-Agent header"
35 //usage: "\n -Y Use proxy ('on' or 'off')"
40 # define log_io(...) bb_error_msg(__VA_ARGS__)
42 # define log_io(...) ((void)0)
58 off_t content_len; /* Content-length of the file */
59 off_t beg_range; /* Range at which continue begins */
60 #if ENABLE_FEATURE_WGET_STATUSBAR
61 off_t transferred; /* Number of bytes transferred so far */
62 const char *curfile; /* Name of current file being transferred */
66 #if ENABLE_FEATURE_WGET_LONG_OPTIONS
70 char *fname_out; /* where to direct output (-O) */
71 const char *proxy_flag; /* Use proxies if env vars are set */
72 const char *user_agent; /* "User-Agent" header field */
73 #if ENABLE_FEATURE_WGET_TIMEOUT
74 unsigned timeout_seconds;
79 smallint chunked; /* chunked transfer encoding */
80 smallint got_clen; /* got content-length: from server */
81 /* Local downloads do benefit from big buffer.
82 * With 512 byte buffer, it was measured to be
83 * an order of magnitude slower than with big one.
85 uint64_t just_to_align_next_member;
86 char wget_buf[CONFIG_FEATURE_COPYBUF_KB*1024];
88 #define G (*ptr_to_globals)
89 #define INIT_G() do { \
90 SET_PTR_TO_GLOBALS(xzalloc(sizeof(G))); \
92 #define FINI_G() do { \
93 FREE_PTR_TO_GLOBALS(); \
97 /* Must match option string! */
99 WGET_OPT_CONTINUE = (1 << 0),
100 WGET_OPT_SPIDER = (1 << 1),
101 WGET_OPT_QUIET = (1 << 2),
102 WGET_OPT_OUTNAME = (1 << 3),
103 WGET_OPT_PREFIX = (1 << 4),
104 WGET_OPT_PROXY = (1 << 5),
105 WGET_OPT_USER_AGENT = (1 << 6),
106 WGET_OPT_NETWORK_READ_TIMEOUT = (1 << 7),
107 WGET_OPT_RETRIES = (1 << 8),
108 WGET_OPT_PASSIVE = (1 << 9),
109 WGET_OPT_HEADER = (1 << 10) * ENABLE_FEATURE_WGET_LONG_OPTIONS,
110 WGET_OPT_POST_DATA = (1 << 11) * ENABLE_FEATURE_WGET_LONG_OPTIONS,
118 #if ENABLE_FEATURE_WGET_STATUSBAR
119 static void progress_meter(int flag)
121 if (option_mask32 & WGET_OPT_QUIET)
124 if (flag == PROGRESS_START)
125 bb_progress_init(&G.pmt, G.curfile);
127 bb_progress_update(&G.pmt,
130 (G.chunked || !G.got_clen) ? 0 : G.beg_range + G.transferred + G.content_len
133 if (flag == PROGRESS_END) {
134 bb_progress_free(&G.pmt);
135 bb_putchar_stderr('\n');
140 static ALWAYS_INLINE void progress_meter(int flag UNUSED_PARAM) { }
144 /* IPv6 knows scoped address types i.e. link and site local addresses. Link
145 * local addresses can have a scope identifier to specify the
146 * interface/link an address is valid on (e.g. fe80::1%eth0). This scope
147 * identifier is only valid on a single node.
149 * RFC 4007 says that the scope identifier MUST NOT be sent across the wire,
150 * unless all nodes agree on the semantic. Apache e.g. regards zone identifiers
151 * in the Host header as invalid requests, see
152 * https://issues.apache.org/bugzilla/show_bug.cgi?id=35122
154 static void strip_ipv6_scope_id(char *host)
158 /* bbox wget actually handles IPv6 addresses without [], like
159 * wget "http://::1/xxx", but this is not standard.
160 * To save code, _here_ we do not support it. */
163 return; /* not IPv6 */
165 scope = strchr(host, '%');
169 /* Remove the IPv6 zone identifier from the host address */
170 cp = strchr(host, ']');
171 if (!cp || (cp[1] != ':' && cp[1] != '\0')) {
172 /* malformed address (not "[xx]:nn" or "[xx]") */
176 /* cp points to "]...", scope points to "%eth0]..." */
177 overlapping_strcpy(scope, cp);
180 #if ENABLE_FEATURE_WGET_AUTHENTICATION
181 /* Base64-encode character string. */
182 static char *base64enc(const char *str)
184 unsigned len = strlen(str);
185 if (len > sizeof(G.wget_buf)/4*3 - 10) /* paranoia */
186 len = sizeof(G.wget_buf)/4*3 - 10;
187 bb_uuencode(G.wget_buf, str, len, bb_uuenc_tbl_base64);
192 static char* sanitize_string(char *s)
194 unsigned char *p = (void *) s;
201 #if ENABLE_FEATURE_WGET_TIMEOUT
202 static void alarm_handler(int sig UNUSED_PARAM)
204 /* This is theoretically unsafe (uses stdio and malloc in signal handler) */
206 bb_error_msg_and_die("download timed out");
210 static FILE *open_socket(len_and_sockaddr *lsa)
215 IF_FEATURE_WGET_TIMEOUT(alarm(G.timeout_seconds); G.connecting = 1;)
216 fd = xconnect_stream(lsa);
217 IF_FEATURE_WGET_TIMEOUT(G.connecting = 0;)
219 /* glibc 2.4 seems to try seeking on it - ??! */
220 /* hopefully it understands what ESPIPE means... */
221 fp = fdopen(fd, "r+");
223 bb_perror_msg_and_die(bb_msg_memory_exhausted);
228 /* Returns '\n' if it was seen, else '\0'. Trims at first '\r' or '\n' */
229 /* FIXME: does not respect FEATURE_WGET_TIMEOUT and -T N: */
230 static char fgets_and_trim(FILE *fp)
235 if (fgets(G.wget_buf, sizeof(G.wget_buf) - 1, fp) == NULL)
236 bb_perror_msg_and_die("error getting response");
238 buf_ptr = strchrnul(G.wget_buf, '\n');
241 buf_ptr = strchrnul(G.wget_buf, '\r');
244 log_io("< %s", G.wget_buf);
249 static int ftpcmd(const char *s1, const char *s2, FILE *fp)
255 fprintf(fp, "%s%s\r\n", s1, s2);
257 log_io("> %s%s", s1, s2);
262 } while (!isdigit(G.wget_buf[0]) || G.wget_buf[3] != ' ');
264 G.wget_buf[3] = '\0';
265 result = xatoi_positive(G.wget_buf);
270 static void parse_url(const char *src_url, struct host_info *h)
275 h->allocated = url = xstrdup(src_url);
277 if (strncmp(url, "http://", 7) == 0) {
278 h->port = bb_lookup_port("http", "tcp", 80);
281 } else if (strncmp(url, "ftp://", 6) == 0) {
282 h->port = bb_lookup_port("ftp", "tcp", 21);
286 bb_error_msg_and_die("not an http or ftp url: %s", sanitize_string(url));
289 // "Real" wget 'http://busybox.net?var=a/b' sends this request:
290 // 'GET /?var=a/b HTTP 1.0'
291 // and saves 'index.html?var=a%2Fb' (we save 'b')
292 // wget 'http://busybox.net?login=john@doe':
293 // request: 'GET /?login=john@doe HTTP/1.0'
294 // saves: 'index.html?login=john@doe' (we save '?login=john@doe')
295 // wget 'http://busybox.net#test/test':
296 // request: 'GET / HTTP/1.0'
297 // saves: 'index.html' (we save 'test')
299 // We also don't add unique .N suffix if file exists...
300 sp = strchr(h->host, '/');
301 p = strchr(h->host, '?'); if (!sp || (p && sp > p)) sp = p;
302 p = strchr(h->host, '#'); if (!sp || (p && sp > p)) sp = p;
305 } else if (*sp == '/') {
308 } else { // '#' or '?'
309 // http://busybox.net?login=john@doe is a valid URL
310 // memmove converts to:
311 // http:/busybox.nett?login=john@doe...
312 memmove(h->host - 1, h->host, sp - h->host);
318 // We used to set h->user to NULL here, but this interferes
319 // with handling of code 302 ("object was moved")
321 sp = strrchr(h->host, '@');
323 // URL-decode "user:password" string before base64-encoding:
324 // wget http://test:my%20pass@example.com should send
325 // Authorization: Basic dGVzdDpteSBwYXNz
326 // which decodes to "test:my pass".
327 // Standard wget and curl do this too.
329 h->user = percent_decode_in_place(h->host, /*strict:*/ 0);
336 static char *gethdr(FILE *fp)
341 /* retrieve header line */
342 c = fgets_and_trim(fp);
344 /* end of the headers? */
345 if (G.wget_buf[0] == '\0')
348 /* convert the header name to lower case */
349 for (s = G.wget_buf; isalnum(*s) || *s == '-' || *s == '.' || *s == '_'; ++s) {
351 * No-op for 20-3f and 60-7f. "0-9a-z-." are in these ranges.
352 * 40-5f range ("@A-Z[\]^_") maps to 60-7f.
353 * "A-Z" maps to "a-z".
354 * "@[\]" can't occur in header names.
355 * "^_" maps to "~,DEL" (which is wrong).
356 * "^" was never seen yet, "_" was seen from web.archive.org
357 * (x-archive-orig-x_commoncrawl_Signature: HEXSTRING).
362 /* verify we are at the end of the header name */
364 bb_error_msg_and_die("bad header line: %s", sanitize_string(G.wget_buf));
366 /* locate the start of the header value */
368 hdrval = skip_whitespace(s);
371 /* Rats! The buffer isn't big enough to hold the entire header value */
372 while (c = getc(fp), c != EOF && c != '\n')
379 static void reset_beg_range_to_zero(void)
381 bb_error_msg("restart failed");
383 xlseek(G.output_fd, 0, SEEK_SET);
384 /* Done at the end instead: */
385 /* ftruncate(G.output_fd, 0); */
388 static FILE* prepare_ftp_session(FILE **dfpp, struct host_info *target, len_and_sockaddr *lsa)
395 target->user = xstrdup("anonymous:busybox@");
397 sfp = open_socket(lsa);
398 if (ftpcmd(NULL, NULL, sfp) != 220)
399 bb_error_msg_and_die("%s", sanitize_string(G.wget_buf + 4));
402 * Splitting username:password pair,
405 str = strchr(target->user, ':');
408 switch (ftpcmd("USER ", target->user, sfp)) {
412 if (ftpcmd("PASS ", str, sfp) == 230)
414 /* fall through (failed login) */
416 bb_error_msg_and_die("ftp login: %s", sanitize_string(G.wget_buf + 4));
419 ftpcmd("TYPE I", NULL, sfp);
424 if (ftpcmd("SIZE ", target->path, sfp) == 213) {
425 G.content_len = BB_STRTOOFF(G.wget_buf + 4, NULL, 10);
426 if (G.content_len < 0 || errno) {
427 bb_error_msg_and_die("SIZE value is garbage");
433 * Entering passive mode
435 if (ftpcmd("PASV", NULL, sfp) != 227) {
437 bb_error_msg_and_die("bad response to %s: %s", "PASV", sanitize_string(G.wget_buf));
439 // Response is "227 garbageN1,N2,N3,N4,P1,P2[)garbage]
440 // Server's IP is N1.N2.N3.N4 (we ignore it)
441 // Server's port for data connection is P1*256+P2
442 str = strrchr(G.wget_buf, ')');
443 if (str) str[0] = '\0';
444 str = strrchr(G.wget_buf, ',');
445 if (!str) goto pasv_error;
446 port = xatou_range(str+1, 0, 255);
448 str = strrchr(G.wget_buf, ',');
449 if (!str) goto pasv_error;
450 port += xatou_range(str+1, 0, 255) * 256;
451 set_nport(&lsa->u.sa, htons(port));
453 *dfpp = open_socket(lsa);
455 if (G.beg_range != 0) {
456 sprintf(G.wget_buf, "REST %"OFF_FMT"u", G.beg_range);
457 if (ftpcmd(G.wget_buf, NULL, sfp) == 350)
458 G.content_len -= G.beg_range;
460 reset_beg_range_to_zero();
463 if (ftpcmd("RETR ", target->path, sfp) > 150)
464 bb_error_msg_and_die("bad response to %s: %s", "RETR", sanitize_string(G.wget_buf));
469 static void NOINLINE retrieve_file_data(FILE *dfp)
471 #if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT
472 # if ENABLE_FEATURE_WGET_TIMEOUT
473 unsigned second_cnt = G.timeout_seconds;
475 struct pollfd polldata;
477 polldata.fd = fileno(dfp);
478 polldata.events = POLLIN | POLLPRI;
480 progress_meter(PROGRESS_START);
485 /* Loops only if chunked */
488 #if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT
489 /* Must use nonblocking I/O, otherwise fread will loop
490 * and *block* until it reads full buffer,
491 * which messes up progress bar and/or timeout logic.
492 * Because of nonblocking I/O, we need to dance
493 * very carefully around EAGAIN. See explanation at
496 ndelay_on(polldata.fd);
502 #if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT
503 /* fread internally uses read loop, which in our case
504 * is usually exited when we get EAGAIN.
505 * In this case, libc sets error marker on the stream.
506 * Need to clear it before next fread to avoid possible
507 * rare false positive ferror below. Rare because usually
508 * fread gets more than zero bytes, and we don't fall
509 * into if (n <= 0) ...
514 rdsz = sizeof(G.wget_buf);
516 if (G.content_len < (off_t)sizeof(G.wget_buf)) {
517 if ((int)G.content_len <= 0)
519 rdsz = (unsigned)G.content_len;
522 n = fread(G.wget_buf, 1, rdsz, dfp);
525 xwrite(G.output_fd, G.wget_buf, n);
526 #if ENABLE_FEATURE_WGET_STATUSBAR
531 if (G.content_len == 0)
534 #if ENABLE_FEATURE_WGET_TIMEOUT
535 second_cnt = G.timeout_seconds;
542 * If error occurs, or EOF is reached, the return value
543 * is a short item count (or zero).
544 * fread does not distinguish between EOF and error.
546 if (errno != EAGAIN) {
548 progress_meter(PROGRESS_END);
549 bb_perror_msg_and_die(bb_msg_read_error);
551 break; /* EOF, not error */
554 #if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT
555 /* It was EAGAIN. There is no data. Wait up to one second
556 * then abort if timed out, or update the bar and try reading again.
558 if (safe_poll(&polldata, 1, 1000) == 0) {
559 # if ENABLE_FEATURE_WGET_TIMEOUT
560 if (second_cnt != 0 && --second_cnt == 0) {
561 progress_meter(PROGRESS_END);
562 bb_error_msg_and_die("download timed out");
565 /* We used to loop back to poll here,
566 * but there is no great harm in letting fread
567 * to try reading anyway.
570 /* Need to do it _every_ second for "stalled" indicator
571 * to be shown properly.
573 progress_meter(PROGRESS_BUMP);
575 } /* while (reading data) */
577 #if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT
579 ndelay_off(polldata.fd); /* else fgets can get very unhappy */
584 fgets_and_trim(dfp); /* Eat empty line */
587 G.content_len = STRTOOFF(G.wget_buf, NULL, 16);
588 /* FIXME: error check? */
589 if (G.content_len == 0)
590 break; /* all done! */
593 * Note that fgets may result in some data being buffered in dfp.
594 * We loop back to fread, which will retrieve this data.
595 * Also note that code has to be arranged so that fread
596 * is done _before_ one-second poll wait - poll doesn't know
597 * about stdio buffering and can result in spurious one second waits!
601 /* If -c failed, we restart from the beginning,
602 * but we do not truncate file then, we do it only now, at the end.
603 * This lets user to ^C if his 99% complete 10 GB file download
604 * failed to restart *without* losing the almost complete file.
607 off_t pos = lseek(G.output_fd, 0, SEEK_CUR);
608 if (pos != (off_t)-1)
609 ftruncate(G.output_fd, pos);
612 /* Draw full bar and free its resources */
613 G.chunked = 0; /* makes it show 100% even for chunked download */
614 G.got_clen = 1; /* makes it show 100% even for download of (formerly) unknown size */
615 progress_meter(PROGRESS_END);
618 static void download_one_url(const char *url)
620 bool use_proxy; /* Use proxies if env vars are set */
622 len_and_sockaddr *lsa;
623 FILE *sfp; /* socket to web/ftp server */
624 FILE *dfp; /* socket to ftp server (data) */
626 char *fname_out_alloc;
627 char *redirected_path = NULL;
628 struct host_info server;
629 struct host_info target;
631 server.allocated = NULL;
632 target.allocated = NULL;
636 parse_url(url, &target);
638 /* Use the proxy if necessary */
639 use_proxy = (strcmp(G.proxy_flag, "off") != 0);
641 proxy = getenv(target.is_ftp ? "ftp_proxy" : "http_proxy");
642 use_proxy = (proxy && proxy[0]);
644 parse_url(proxy, &server);
647 server.port = target.port;
648 if (ENABLE_FEATURE_IPV6) {
649 //free(server.allocated); - can't be non-NULL
650 server.host = server.allocated = xstrdup(target.host);
652 server.host = target.host;
656 if (ENABLE_FEATURE_IPV6)
657 strip_ipv6_scope_id(target.host);
659 /* If there was no -O FILE, guess output filename */
660 fname_out_alloc = NULL;
661 if (!(option_mask32 & WGET_OPT_OUTNAME)) {
662 G.fname_out = bb_get_last_path_component_nostrip(target.path);
663 /* handle "wget http://kernel.org//" */
664 if (G.fname_out[0] == '/' || !G.fname_out[0])
665 G.fname_out = (char*)"index.html";
666 /* -P DIR is considered only if there was no -O FILE */
668 G.fname_out = fname_out_alloc = concat_path_file(G.dir_prefix, G.fname_out);
670 /* redirects may free target.path later, need to make a copy */
671 G.fname_out = fname_out_alloc = xstrdup(G.fname_out);
674 #if ENABLE_FEATURE_WGET_STATUSBAR
675 G.curfile = bb_get_last_path_component_nostrip(G.fname_out);
678 /* Determine where to start transfer */
680 if (option_mask32 & WGET_OPT_CONTINUE) {
681 G.output_fd = open(G.fname_out, O_WRONLY);
682 if (G.output_fd >= 0) {
683 G.beg_range = xlseek(G.output_fd, 0, SEEK_END);
685 /* File doesn't exist. We do not create file here yet.
686 * We are not sure it exists on remote side */
691 lsa = xhost2sockaddr(server.host, server.port);
692 if (!(option_mask32 & WGET_OPT_QUIET)) {
693 char *s = xmalloc_sockaddr2dotted(&lsa->u.sa);
694 fprintf(stderr, "Connecting to %s (%s)\n", server.host, s);
698 /*G.content_len = 0; - redundant, got_clen = 0 is enough */
701 if (use_proxy || !target.is_ftp) {
709 /* Open socket to http server */
710 sfp = open_socket(lsa);
712 /* Send HTTP request */
714 fprintf(sfp, "GET %stp://%s/%s HTTP/1.1\r\n",
715 target.is_ftp ? "f" : "ht", target.host,
718 if (option_mask32 & WGET_OPT_POST_DATA)
719 fprintf(sfp, "POST /%s HTTP/1.1\r\n", target.path);
721 fprintf(sfp, "GET /%s HTTP/1.1\r\n", target.path);
724 fprintf(sfp, "Host: %s\r\nUser-Agent: %s\r\n",
725 target.host, G.user_agent);
727 /* Ask server to close the connection as soon as we are done
728 * (IOW: we do not intend to send more requests)
730 fprintf(sfp, "Connection: close\r\n");
732 #if ENABLE_FEATURE_WGET_AUTHENTICATION
734 fprintf(sfp, "Proxy-Authorization: Basic %s\r\n"+6,
735 base64enc(target.user));
737 if (use_proxy && server.user) {
738 fprintf(sfp, "Proxy-Authorization: Basic %s\r\n",
739 base64enc(server.user));
743 if (G.beg_range != 0)
744 fprintf(sfp, "Range: bytes=%"OFF_FMT"u-\r\n", G.beg_range);
746 #if ENABLE_FEATURE_WGET_LONG_OPTIONS
748 fputs(G.extra_headers, sfp);
750 if (option_mask32 & WGET_OPT_POST_DATA) {
752 "Content-Type: application/x-www-form-urlencoded\r\n"
753 "Content-Length: %u\r\n"
756 (int) strlen(G.post_data), G.post_data
761 fprintf(sfp, "\r\n");
767 * Retrieve HTTP response line and check for "200" status code.
773 str = skip_non_whitespace(str);
774 str = skip_whitespace(str);
775 // FIXME: no error check
776 // xatou wouldn't work: "200 OK"
781 while (gethdr(sfp) != NULL)
782 /* eat all remaining headers */;
786 Response 204 doesn't say "null file", it says "metadata
787 has changed but data didn't":
789 "10.2.5 204 No Content
790 The server has fulfilled the request but does not need to return
791 an entity-body, and might want to return updated metainformation.
792 The response MAY include new or updated metainformation in the form
793 of entity-headers, which if present SHOULD be associated with
794 the requested variant.
796 If the client is a user agent, it SHOULD NOT change its document
797 view from that which caused the request to be sent. This response
798 is primarily intended to allow input for actions to take place
799 without causing a change to the user agent's active document view,
800 although any new or updated metainformation SHOULD be applied
801 to the document currently in the user agent's active view.
803 The 204 response MUST NOT include a message-body, and thus
804 is always terminated by the first empty line after the header fields."
806 However, in real world it was observed that some web servers
807 (e.g. Boa/0.94.14rc21) simply use code 204 when file size is zero.
810 if (G.beg_range != 0) {
811 /* "Range:..." was not honored by the server.
812 * Restart download from the beginning.
814 reset_beg_range_to_zero();
817 case 300: /* redirection */
822 case 206: /* Partial Content */
823 if (G.beg_range != 0)
824 /* "Range:..." worked. Good. */
826 /* Partial Content even though we did not ask for it??? */
829 bb_error_msg_and_die("server returned error: %s", sanitize_string(G.wget_buf));
833 * Retrieve HTTP headers.
835 while ((str = gethdr(sfp)) != NULL) {
836 static const char keywords[] ALIGN1 =
837 "content-length\0""transfer-encoding\0""location\0";
839 KEY_content_length = 1, KEY_transfer_encoding, KEY_location
843 /* gethdr converted "FOO:" string to lowercase */
845 /* strip trailing whitespace */
846 char *s = strchrnul(str, '\0') - 1;
847 while (s >= str && (*s == ' ' || *s == '\t')) {
851 key = index_in_strings(keywords, G.wget_buf) + 1;
852 if (key == KEY_content_length) {
853 G.content_len = BB_STRTOOFF(str, NULL, 10);
854 if (G.content_len < 0 || errno) {
855 bb_error_msg_and_die("content-length %s is garbage", sanitize_string(str));
860 if (key == KEY_transfer_encoding) {
861 if (strcmp(str_tolower(str), "chunked") != 0)
862 bb_error_msg_and_die("transfer encoding '%s' is not supported", sanitize_string(str));
865 if (key == KEY_location && status >= 300) {
866 if (--redir_limit == 0)
867 bb_error_msg_and_die("too many redirections");
870 free(redirected_path);
871 target.path = redirected_path = xstrdup(str+1);
872 /* lsa stays the same: it's on the same server */
874 parse_url(str, &target);
876 free(server.allocated);
877 server.allocated = NULL;
878 server.host = target.host;
879 /* strip_ipv6_scope_id(target.host); - no! */
880 /* we assume remote never gives us IPv6 addr with scope id */
881 server.port = target.port;
884 } /* else: lsa stays the same: we use proxy */
886 goto establish_session;
889 // if (status >= 300)
890 // bb_error_msg_and_die("bad redirection (no Location: header from server)");
892 /* For HTTP, data is pumped over the same connection */
899 sfp = prepare_ftp_session(&dfp, &target, lsa);
904 if (!(option_mask32 & WGET_OPT_SPIDER)) {
906 G.output_fd = xopen(G.fname_out, G.o_flags);
907 retrieve_file_data(dfp);
908 if (!(option_mask32 & WGET_OPT_OUTNAME)) {
915 /* It's ftp. Close data connection properly */
917 if (ftpcmd(NULL, NULL, sfp) != 226)
918 bb_error_msg_and_die("ftp error: %s", sanitize_string(G.wget_buf + 4));
919 /* ftpcmd("QUIT", NULL, sfp); - why bother? */
923 free(server.allocated);
924 free(target.allocated);
925 free(fname_out_alloc);
926 free(redirected_path);
929 int wget_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE;
930 int wget_main(int argc UNUSED_PARAM, char **argv)
932 #if ENABLE_FEATURE_WGET_LONG_OPTIONS
933 static const char wget_longopts[] ALIGN1 =
934 /* name, has_arg, val */
935 "continue\0" No_argument "c"
936 //FIXME: -s isn't --spider, it's --save-headers!
937 "spider\0" No_argument "s"
938 "quiet\0" No_argument "q"
939 "output-document\0" Required_argument "O"
940 "directory-prefix\0" Required_argument "P"
941 "proxy\0" Required_argument "Y"
942 "user-agent\0" Required_argument "U"
943 #if ENABLE_FEATURE_WGET_TIMEOUT
944 "timeout\0" Required_argument "T"
947 // "tries\0" Required_argument "t"
948 /* Ignored (we always use PASV): */
949 "passive-ftp\0" No_argument "\xff"
950 "header\0" Required_argument "\xfe"
951 "post-data\0" Required_argument "\xfd"
952 /* Ignored (we don't do ssl) */
953 "no-check-certificate\0" No_argument "\xfc"
954 /* Ignored (we don't support caching) */
955 "no-cache\0" No_argument "\xfb"
959 #if ENABLE_FEATURE_WGET_LONG_OPTIONS
960 llist_t *headers_llist = NULL;
965 #if ENABLE_FEATURE_WGET_TIMEOUT
966 G.timeout_seconds = 900;
967 signal(SIGALRM, alarm_handler);
969 G.proxy_flag = "on"; /* use proxies if env vars are set */
970 G.user_agent = "Wget"; /* "User-Agent" header field */
972 #if ENABLE_FEATURE_WGET_LONG_OPTIONS
973 applet_long_options = wget_longopts;
975 opt_complementary = "-1" IF_FEATURE_WGET_TIMEOUT(":T+") IF_FEATURE_WGET_LONG_OPTIONS(":\xfe::");
976 getopt32(argv, "csqO:P:Y:U:T:" /*ignored:*/ "t:",
977 &G.fname_out, &G.dir_prefix,
978 &G.proxy_flag, &G.user_agent,
979 IF_FEATURE_WGET_TIMEOUT(&G.timeout_seconds) IF_NOT_FEATURE_WGET_TIMEOUT(NULL),
980 NULL /* -t RETRIES */
981 IF_FEATURE_WGET_LONG_OPTIONS(, &headers_llist)
982 IF_FEATURE_WGET_LONG_OPTIONS(, &G.post_data)
986 #if ENABLE_FEATURE_WGET_LONG_OPTIONS
990 llist_t *ll = headers_llist;
992 size += strlen(ll->data) + 2;
995 G.extra_headers = cp = xmalloc(size);
996 while (headers_llist) {
997 cp += sprintf(cp, "%s\r\n", (char*)llist_pop(&headers_llist));
1003 G.o_flags = O_WRONLY | O_CREAT | O_TRUNC | O_EXCL;
1004 if (G.fname_out) { /* -O FILE ? */
1005 if (LONE_DASH(G.fname_out)) { /* -O - ? */
1007 option_mask32 &= ~WGET_OPT_CONTINUE;
1009 /* compat with wget: -O FILE can overwrite */
1010 G.o_flags = O_WRONLY | O_CREAT | O_TRUNC;
1014 download_one_url(*argv++);
1016 if (G.output_fd >= 0)
1017 xclose(G.output_fd);
1019 #if ENABLE_FEATURE_CLEAN_UP && ENABLE_FEATURE_WGET_LONG_OPTIONS
1020 free(G.extra_headers);
1024 return EXIT_SUCCESS;