wget: add support for connect timeout
[platform/upstream/busybox.git] / networking / wget.c
1 /* vi: set sw=4 ts=4: */
2 /*
3  * wget - retrieve a file using HTTP or FTP
4  *
5  * Chip Rosenthal Covad Communications <chip@laserlink.net>
6  * Licensed under GPLv2, see file LICENSE in this source tree.
7  *
8  * Copyright (C) 2010 Bradley M. Kuhn <bkuhn@ebb.org>
9  * Kuhn's copyrights are licensed GPLv2-or-later.  File as a whole remains GPLv2.
10  */
11
12 //usage:#define wget_trivial_usage
13 //usage:        IF_FEATURE_WGET_LONG_OPTIONS(
14 //usage:       "[-c|--continue] [-s|--spider] [-q|--quiet] [-O|--output-document FILE]\n"
15 //usage:       "        [--header 'header: value'] [-Y|--proxy on/off] [-P DIR]\n"
16 /* Since we ignore these opts, we don't show them in --help */
17 /* //usage:    "        [--no-check-certificate] [--no-cache]" */
18 //usage:       "        [-U|--user-agent AGENT]" IF_FEATURE_WGET_TIMEOUT(" [-T SEC]") " URL..."
19 //usage:        )
20 //usage:        IF_NOT_FEATURE_WGET_LONG_OPTIONS(
21 //usage:       "[-csq] [-O FILE] [-Y on/off] [-P DIR] [-U AGENT]"
22 //usage:                        IF_FEATURE_WGET_TIMEOUT(" [-T SEC]") " URL..."
23 //usage:        )
24 //usage:#define wget_full_usage "\n\n"
25 //usage:       "Retrieve files via HTTP or FTP\n"
26 //usage:     "\n        -s      Spider mode - only check file existence"
27 //usage:     "\n        -c      Continue retrieval of aborted transfer"
28 //usage:     "\n        -q      Quiet"
29 //usage:     "\n        -P DIR  Save to DIR (default .)"
30 //usage:        IF_FEATURE_WGET_TIMEOUT(
31 //usage:     "\n        -T SEC  Network read timeout is SEC seconds"
32 //usage:        )
33 //usage:     "\n        -O FILE Save to FILE ('-' for stdout)"
34 //usage:     "\n        -U STR  Use STR for User-Agent header"
35 //usage:     "\n        -Y      Use proxy ('on' or 'off')"
36
37 #include "libbb.h"
38
39 #if 0
40 # define log_io(...) bb_error_msg(__VA_ARGS__)
41 #else
42 # define log_io(...) ((void)0)
43 #endif
44
45
46 struct host_info {
47         char *allocated;
48         const char *path;
49         const char *user;
50         char       *host;
51         int         port;
52         smallint    is_ftp;
53 };
54
55
56 /* Globals */
57 struct globals {
58         off_t content_len;        /* Content-length of the file */
59         off_t beg_range;          /* Range at which continue begins */
60 #if ENABLE_FEATURE_WGET_STATUSBAR
61         off_t transferred;        /* Number of bytes transferred so far */
62         const char *curfile;      /* Name of current file being transferred */
63         bb_progress_t pmt;
64 #endif
65         char *dir_prefix;
66 #if ENABLE_FEATURE_WGET_LONG_OPTIONS
67         char *post_data;
68         char *extra_headers;
69 #endif
70         char *fname_out;        /* where to direct output (-O) */
71         const char *proxy_flag; /* Use proxies if env vars are set */
72         const char *user_agent; /* "User-Agent" header field */
73 #if ENABLE_FEATURE_WGET_TIMEOUT
74         unsigned timeout_seconds;
75         bool connecting;
76 #endif
77         int output_fd;
78         int o_flags;
79         smallint chunked;         /* chunked transfer encoding */
80         smallint got_clen;        /* got content-length: from server  */
81         /* Local downloads do benefit from big buffer.
82          * With 512 byte buffer, it was measured to be
83          * an order of magnitude slower than with big one.
84          */
85         uint64_t just_to_align_next_member;
86         char wget_buf[CONFIG_FEATURE_COPYBUF_KB*1024];
87 } FIX_ALIASING;
88 #define G (*ptr_to_globals)
89 #define INIT_G() do { \
90         SET_PTR_TO_GLOBALS(xzalloc(sizeof(G))); \
91 } while (0)
92
93
94 /* Must match option string! */
95 enum {
96         WGET_OPT_CONTINUE   = (1 << 0),
97         WGET_OPT_SPIDER     = (1 << 1),
98         WGET_OPT_QUIET      = (1 << 2),
99         WGET_OPT_OUTNAME    = (1 << 3),
100         WGET_OPT_PREFIX     = (1 << 4),
101         WGET_OPT_PROXY      = (1 << 5),
102         WGET_OPT_USER_AGENT = (1 << 6),
103         WGET_OPT_NETWORK_READ_TIMEOUT = (1 << 7),
104         WGET_OPT_RETRIES    = (1 << 8),
105         WGET_OPT_PASSIVE    = (1 << 9),
106         WGET_OPT_HEADER     = (1 << 10) * ENABLE_FEATURE_WGET_LONG_OPTIONS,
107         WGET_OPT_POST_DATA  = (1 << 11) * ENABLE_FEATURE_WGET_LONG_OPTIONS,
108 };
109
110 enum {
111         PROGRESS_START = -1,
112         PROGRESS_END   = 0,
113         PROGRESS_BUMP  = 1,
114 };
115 #if ENABLE_FEATURE_WGET_STATUSBAR
116 static void progress_meter(int flag)
117 {
118         if (option_mask32 & WGET_OPT_QUIET)
119                 return;
120
121         if (flag == PROGRESS_START)
122                 bb_progress_init(&G.pmt, G.curfile);
123
124         bb_progress_update(&G.pmt,
125                         G.beg_range,
126                         G.transferred,
127                         (G.chunked || !G.got_clen) ? 0 : G.beg_range + G.transferred + G.content_len
128         );
129
130         if (flag == PROGRESS_END) {
131                 bb_progress_free(&G.pmt);
132                 bb_putchar_stderr('\n');
133                 G.transferred = 0;
134         }
135 }
136 #else
137 static ALWAYS_INLINE void progress_meter(int flag UNUSED_PARAM) { }
138 #endif
139
140
141 /* IPv6 knows scoped address types i.e. link and site local addresses. Link
142  * local addresses can have a scope identifier to specify the
143  * interface/link an address is valid on (e.g. fe80::1%eth0). This scope
144  * identifier is only valid on a single node.
145  *
146  * RFC 4007 says that the scope identifier MUST NOT be sent across the wire,
147  * unless all nodes agree on the semantic. Apache e.g. regards zone identifiers
148  * in the Host header as invalid requests, see
149  * https://issues.apache.org/bugzilla/show_bug.cgi?id=35122
150  */
151 static void strip_ipv6_scope_id(char *host)
152 {
153         char *scope, *cp;
154
155         /* bbox wget actually handles IPv6 addresses without [], like
156          * wget "http://::1/xxx", but this is not standard.
157          * To save code, _here_ we do not support it. */
158
159         if (host[0] != '[')
160                 return; /* not IPv6 */
161
162         scope = strchr(host, '%');
163         if (!scope)
164                 return;
165
166         /* Remove the IPv6 zone identifier from the host address */
167         cp = strchr(host, ']');
168         if (!cp || (cp[1] != ':' && cp[1] != '\0')) {
169                 /* malformed address (not "[xx]:nn" or "[xx]") */
170                 return;
171         }
172
173         /* cp points to "]...", scope points to "%eth0]..." */
174         overlapping_strcpy(scope, cp);
175 }
176
177 #if ENABLE_FEATURE_WGET_AUTHENTICATION
178 /* Base64-encode character string. */
179 static char *base64enc(const char *str)
180 {
181         unsigned len = strlen(str);
182         if (len > sizeof(G.wget_buf)/4*3 - 10) /* paranoia */
183                 len = sizeof(G.wget_buf)/4*3 - 10;
184         bb_uuencode(G.wget_buf, str, len, bb_uuenc_tbl_base64);
185         return G.wget_buf;
186 }
187 #endif
188
189 static char* sanitize_string(char *s)
190 {
191         unsigned char *p = (void *) s;
192         while (*p >= ' ')
193                 p++;
194         *p = '\0';
195         return s;
196 }
197
198 #if ENABLE_FEATURE_WGET_TIMEOUT
199 static void alarm_handler(int sig UNUSED_PARAM)
200 {
201         /* This is theoretically unsafe (uses stdio and malloc in signal handler) */
202         if (G.connecting)
203                 bb_error_msg_and_die("download timed out");
204 }
205 #endif
206
207 static FILE *open_socket(len_and_sockaddr *lsa)
208 {
209         int fd;
210         FILE *fp;
211
212         IF_FEATURE_WGET_TIMEOUT(alarm(G.timeout_seconds); G.connecting = 1;)
213         fd = xconnect_stream(lsa);
214         IF_FEATURE_WGET_TIMEOUT(G.connecting = 0;)
215
216         /* glibc 2.4 seems to try seeking on it - ??! */
217         /* hopefully it understands what ESPIPE means... */
218         fp = fdopen(fd, "r+");
219         if (fp == NULL)
220                 bb_perror_msg_and_die(bb_msg_memory_exhausted);
221
222         return fp;
223 }
224
225 /* Returns '\n' if it was seen, else '\0'. Trims at first '\r' or '\n' */
226 /* FIXME: does not respect FEATURE_WGET_TIMEOUT and -T N: */
227 static char fgets_and_trim(FILE *fp)
228 {
229         char c;
230         char *buf_ptr;
231
232         if (fgets(G.wget_buf, sizeof(G.wget_buf) - 1, fp) == NULL)
233                 bb_perror_msg_and_die("error getting response");
234
235         buf_ptr = strchrnul(G.wget_buf, '\n');
236         c = *buf_ptr;
237         *buf_ptr = '\0';
238         buf_ptr = strchrnul(G.wget_buf, '\r');
239         *buf_ptr = '\0';
240
241         log_io("< %s", G.wget_buf);
242
243         return c;
244 }
245
246 static int ftpcmd(const char *s1, const char *s2, FILE *fp)
247 {
248         int result;
249         if (s1) {
250                 if (!s2)
251                         s2 = "";
252                 fprintf(fp, "%s%s\r\n", s1, s2);
253                 fflush(fp);
254                 log_io("> %s%s", s1, s2);
255         }
256
257         do {
258                 fgets_and_trim(fp);
259         } while (!isdigit(G.wget_buf[0]) || G.wget_buf[3] != ' ');
260
261         G.wget_buf[3] = '\0';
262         result = xatoi_positive(G.wget_buf);
263         G.wget_buf[3] = ' ';
264         return result;
265 }
266
267 static void parse_url(const char *src_url, struct host_info *h)
268 {
269         char *url, *p, *sp;
270
271         free(h->allocated);
272         h->allocated = url = xstrdup(src_url);
273
274         if (strncmp(url, "http://", 7) == 0) {
275                 h->port = bb_lookup_port("http", "tcp", 80);
276                 h->host = url + 7;
277                 h->is_ftp = 0;
278         } else if (strncmp(url, "ftp://", 6) == 0) {
279                 h->port = bb_lookup_port("ftp", "tcp", 21);
280                 h->host = url + 6;
281                 h->is_ftp = 1;
282         } else
283                 bb_error_msg_and_die("not an http or ftp url: %s", sanitize_string(url));
284
285         // FYI:
286         // "Real" wget 'http://busybox.net?var=a/b' sends this request:
287         //   'GET /?var=a/b HTTP 1.0'
288         //   and saves 'index.html?var=a%2Fb' (we save 'b')
289         // wget 'http://busybox.net?login=john@doe':
290         //   request: 'GET /?login=john@doe HTTP/1.0'
291         //   saves: 'index.html?login=john@doe' (we save '?login=john@doe')
292         // wget 'http://busybox.net#test/test':
293         //   request: 'GET / HTTP/1.0'
294         //   saves: 'index.html' (we save 'test')
295         //
296         // We also don't add unique .N suffix if file exists...
297         sp = strchr(h->host, '/');
298         p = strchr(h->host, '?'); if (!sp || (p && sp > p)) sp = p;
299         p = strchr(h->host, '#'); if (!sp || (p && sp > p)) sp = p;
300         if (!sp) {
301                 h->path = "";
302         } else if (*sp == '/') {
303                 *sp = '\0';
304                 h->path = sp + 1;
305         } else { // '#' or '?'
306                 // http://busybox.net?login=john@doe is a valid URL
307                 // memmove converts to:
308                 // http:/busybox.nett?login=john@doe...
309                 memmove(h->host - 1, h->host, sp - h->host);
310                 h->host--;
311                 sp[-1] = '\0';
312                 h->path = sp;
313         }
314
315         // We used to set h->user to NULL here, but this interferes
316         // with handling of code 302 ("object was moved")
317
318         sp = strrchr(h->host, '@');
319         if (sp != NULL) {
320                 // URL-decode "user:password" string before base64-encoding:
321                 // wget http://test:my%20pass@example.com should send
322                 // Authorization: Basic dGVzdDpteSBwYXNz
323                 // which decodes to "test:my pass".
324                 // Standard wget and curl do this too.
325                 *sp = '\0';
326                 h->user = percent_decode_in_place(h->host, /*strict:*/ 0);
327                 h->host = sp + 1;
328         }
329
330         sp = h->host;
331 }
332
333 static char *gethdr(FILE *fp)
334 {
335         char *s, *hdrval;
336         int c;
337
338         /* retrieve header line */
339         c = fgets_and_trim(fp);
340
341         /* end of the headers? */
342         if (G.wget_buf[0] == '\0')
343                 return NULL;
344
345         /* convert the header name to lower case */
346         for (s = G.wget_buf; isalnum(*s) || *s == '-' || *s == '.' || *s == '_'; ++s) {
347                 /*
348                  * No-op for 20-3f and 60-7f. "0-9a-z-." are in these ranges.
349                  * 40-5f range ("@A-Z[\]^_") maps to 60-7f.
350                  * "A-Z" maps to "a-z".
351                  * "@[\]" can't occur in header names.
352                  * "^_" maps to "~,DEL" (which is wrong).
353                  * "^" was never seen yet, "_" was seen from web.archive.org
354                  * (x-archive-orig-x_commoncrawl_Signature: HEXSTRING).
355                  */
356                 *s |= 0x20;
357         }
358
359         /* verify we are at the end of the header name */
360         if (*s != ':')
361                 bb_error_msg_and_die("bad header line: %s", sanitize_string(G.wget_buf));
362
363         /* locate the start of the header value */
364         *s++ = '\0';
365         hdrval = skip_whitespace(s);
366
367         if (c != '\n') {
368                 /* Rats! The buffer isn't big enough to hold the entire header value */
369                 while (c = getc(fp), c != EOF && c != '\n')
370                         continue;
371         }
372
373         return hdrval;
374 }
375
376 static void reset_beg_range_to_zero(void)
377 {
378         bb_error_msg("restart failed");
379         G.beg_range = 0;
380         xlseek(G.output_fd, 0, SEEK_SET);
381         /* Done at the end instead: */
382         /* ftruncate(G.output_fd, 0); */
383 }
384
385 static FILE* prepare_ftp_session(FILE **dfpp, struct host_info *target, len_and_sockaddr *lsa)
386 {
387         FILE *sfp;
388         char *str;
389         int port;
390
391         if (!target->user)
392                 target->user = xstrdup("anonymous:busybox@");
393
394         sfp = open_socket(lsa);
395         if (ftpcmd(NULL, NULL, sfp) != 220)
396                 bb_error_msg_and_die("%s", sanitize_string(G.wget_buf + 4));
397
398         /*
399          * Splitting username:password pair,
400          * trying to log in
401          */
402         str = strchr(target->user, ':');
403         if (str)
404                 *str++ = '\0';
405         switch (ftpcmd("USER ", target->user, sfp)) {
406         case 230:
407                 break;
408         case 331:
409                 if (ftpcmd("PASS ", str, sfp) == 230)
410                         break;
411                 /* fall through (failed login) */
412         default:
413                 bb_error_msg_and_die("ftp login: %s", sanitize_string(G.wget_buf + 4));
414         }
415
416         ftpcmd("TYPE I", NULL, sfp);
417
418         /*
419          * Querying file size
420          */
421         if (ftpcmd("SIZE ", target->path, sfp) == 213) {
422                 G.content_len = BB_STRTOOFF(G.wget_buf + 4, NULL, 10);
423                 if (G.content_len < 0 || errno) {
424                         bb_error_msg_and_die("SIZE value is garbage");
425                 }
426                 G.got_clen = 1;
427         }
428
429         /*
430          * Entering passive mode
431          */
432         if (ftpcmd("PASV", NULL, sfp) != 227) {
433  pasv_error:
434                 bb_error_msg_and_die("bad response to %s: %s", "PASV", sanitize_string(G.wget_buf));
435         }
436         // Response is "227 garbageN1,N2,N3,N4,P1,P2[)garbage]
437         // Server's IP is N1.N2.N3.N4 (we ignore it)
438         // Server's port for data connection is P1*256+P2
439         str = strrchr(G.wget_buf, ')');
440         if (str) str[0] = '\0';
441         str = strrchr(G.wget_buf, ',');
442         if (!str) goto pasv_error;
443         port = xatou_range(str+1, 0, 255);
444         *str = '\0';
445         str = strrchr(G.wget_buf, ',');
446         if (!str) goto pasv_error;
447         port += xatou_range(str+1, 0, 255) * 256;
448         set_nport(&lsa->u.sa, htons(port));
449
450         *dfpp = open_socket(lsa);
451
452         if (G.beg_range != 0) {
453                 sprintf(G.wget_buf, "REST %"OFF_FMT"u", G.beg_range);
454                 if (ftpcmd(G.wget_buf, NULL, sfp) == 350)
455                         G.content_len -= G.beg_range;
456                 else
457                         reset_beg_range_to_zero();
458         }
459
460         if (ftpcmd("RETR ", target->path, sfp) > 150)
461                 bb_error_msg_and_die("bad response to %s: %s", "RETR", sanitize_string(G.wget_buf));
462
463         return sfp;
464 }
465
466 static void NOINLINE retrieve_file_data(FILE *dfp)
467 {
468 #if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT
469 # if ENABLE_FEATURE_WGET_TIMEOUT
470         unsigned second_cnt = G.timeout_seconds;
471 # endif
472         struct pollfd polldata;
473
474         polldata.fd = fileno(dfp);
475         polldata.events = POLLIN | POLLPRI;
476 #endif
477         progress_meter(PROGRESS_START);
478
479         if (G.chunked)
480                 goto get_clen;
481
482         /* Loops only if chunked */
483         while (1) {
484
485 #if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT
486                 /* Must use nonblocking I/O, otherwise fread will loop
487                  * and *block* until it reads full buffer,
488                  * which messes up progress bar and/or timeout logic.
489                  * Because of nonblocking I/O, we need to dance
490                  * very carefully around EAGAIN. See explanation at
491                  * clearerr() calls.
492                  */
493                 ndelay_on(polldata.fd);
494 #endif
495                 while (1) {
496                         int n;
497                         unsigned rdsz;
498
499 #if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT
500                         /* fread internally uses read loop, which in our case
501                          * is usually exited when we get EAGAIN.
502                          * In this case, libc sets error marker on the stream.
503                          * Need to clear it before next fread to avoid possible
504                          * rare false positive ferror below. Rare because usually
505                          * fread gets more than zero bytes, and we don't fall
506                          * into if (n <= 0) ...
507                          */
508                         clearerr(dfp);
509 #endif
510                         errno = 0;
511                         rdsz = sizeof(G.wget_buf);
512                         if (G.got_clen) {
513                                 if (G.content_len < (off_t)sizeof(G.wget_buf)) {
514                                         if ((int)G.content_len <= 0)
515                                                 break;
516                                         rdsz = (unsigned)G.content_len;
517                                 }
518                         }
519                         n = fread(G.wget_buf, 1, rdsz, dfp);
520
521                         if (n > 0) {
522                                 xwrite(G.output_fd, G.wget_buf, n);
523 #if ENABLE_FEATURE_WGET_STATUSBAR
524                                 G.transferred += n;
525 #endif
526                                 if (G.got_clen) {
527                                         G.content_len -= n;
528                                         if (G.content_len == 0)
529                                                 break;
530                                 }
531 #if ENABLE_FEATURE_WGET_TIMEOUT
532                                 second_cnt = G.timeout_seconds;
533 #endif
534                                 continue;
535                         }
536
537                         /* n <= 0.
538                          * man fread:
539                          * If error occurs, or EOF is reached, the return value
540                          * is a short item count (or zero).
541                          * fread does not distinguish between EOF and error.
542                          */
543                         if (errno != EAGAIN) {
544                                 if (ferror(dfp)) {
545                                         progress_meter(PROGRESS_END);
546                                         bb_perror_msg_and_die(bb_msg_read_error);
547                                 }
548                                 break; /* EOF, not error */
549                         }
550
551 #if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT
552                         /* It was EAGAIN. There is no data. Wait up to one second
553                          * then abort if timed out, or update the bar and try reading again.
554                          */
555                         if (safe_poll(&polldata, 1, 1000) == 0) {
556 # if ENABLE_FEATURE_WGET_TIMEOUT
557                                 if (second_cnt != 0 && --second_cnt == 0) {
558                                         progress_meter(PROGRESS_END);
559                                         bb_error_msg_and_die("download timed out");
560                                 }
561 # endif
562                                 /* We used to loop back to poll here,
563                                  * but there is no great harm in letting fread
564                                  * to try reading anyway.
565                                  */
566                         }
567                         /* Need to do it _every_ second for "stalled" indicator
568                          * to be shown properly.
569                          */
570                         progress_meter(PROGRESS_BUMP);
571 #endif
572                 } /* while (reading data) */
573
574 #if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT
575                 clearerr(dfp);
576                 ndelay_off(polldata.fd); /* else fgets can get very unhappy */
577 #endif
578                 if (!G.chunked)
579                         break;
580
581                 fgets_and_trim(dfp); /* Eat empty line */
582  get_clen:
583                 fgets_and_trim(dfp);
584                 G.content_len = STRTOOFF(G.wget_buf, NULL, 16);
585                 /* FIXME: error check? */
586                 if (G.content_len == 0)
587                         break; /* all done! */
588                 G.got_clen = 1;
589                 /*
590                  * Note that fgets may result in some data being buffered in dfp.
591                  * We loop back to fread, which will retrieve this data.
592                  * Also note that code has to be arranged so that fread
593                  * is done _before_ one-second poll wait - poll doesn't know
594                  * about stdio buffering and can result in spurious one second waits!
595                  */
596         }
597
598         /* If -c failed, we restart from the beginning,
599          * but we do not truncate file then, we do it only now, at the end.
600          * This lets user to ^C if his 99% complete 10 GB file download
601          * failed to restart *without* losing the almost complete file.
602          */
603         {
604                 off_t pos = lseek(G.output_fd, 0, SEEK_CUR);
605                 if (pos != (off_t)-1)
606                         ftruncate(G.output_fd, pos);
607         }
608
609         /* Draw full bar and free its resources */
610         G.chunked = 0;  /* makes it show 100% even for chunked download */
611         G.got_clen = 1; /* makes it show 100% even for download of (formerly) unknown size */
612         progress_meter(PROGRESS_END);
613 }
614
615 static void download_one_url(const char *url)
616 {
617         bool use_proxy;                 /* Use proxies if env vars are set  */
618         int redir_limit;
619         len_and_sockaddr *lsa;
620         FILE *sfp;                      /* socket to web/ftp server         */
621         FILE *dfp;                      /* socket to ftp server (data)      */
622         char *proxy = NULL;
623         char *fname_out_alloc;
624         char *redirected_path = NULL;
625         struct host_info server;
626         struct host_info target;
627
628         server.allocated = NULL;
629         target.allocated = NULL;
630         server.user = NULL;
631         target.user = NULL;
632
633         parse_url(url, &target);
634
635         /* Use the proxy if necessary */
636         use_proxy = (strcmp(G.proxy_flag, "off") != 0);
637         if (use_proxy) {
638                 proxy = getenv(target.is_ftp ? "ftp_proxy" : "http_proxy");
639                 use_proxy = (proxy && proxy[0]);
640                 if (use_proxy)
641                         parse_url(proxy, &server);
642         }
643         if (!use_proxy) {
644                 server.port = target.port;
645                 if (ENABLE_FEATURE_IPV6) {
646                         //free(server.allocated); - can't be non-NULL
647                         server.host = server.allocated = xstrdup(target.host);
648                 } else {
649                         server.host = target.host;
650                 }
651         }
652
653         if (ENABLE_FEATURE_IPV6)
654                 strip_ipv6_scope_id(target.host);
655
656         /* If there was no -O FILE, guess output filename */
657         fname_out_alloc = NULL;
658         if (!(option_mask32 & WGET_OPT_OUTNAME)) {
659                 G.fname_out = bb_get_last_path_component_nostrip(target.path);
660                 /* handle "wget http://kernel.org//" */
661                 if (G.fname_out[0] == '/' || !G.fname_out[0])
662                         G.fname_out = (char*)"index.html";
663                 /* -P DIR is considered only if there was no -O FILE */
664                 if (G.dir_prefix)
665                         G.fname_out = fname_out_alloc = concat_path_file(G.dir_prefix, G.fname_out);
666                 else {
667                         /* redirects may free target.path later, need to make a copy */
668                         G.fname_out = fname_out_alloc = xstrdup(G.fname_out);
669                 }
670         }
671 #if ENABLE_FEATURE_WGET_STATUSBAR
672         G.curfile = bb_get_last_path_component_nostrip(G.fname_out);
673 #endif
674
675         /* Determine where to start transfer */
676         G.beg_range = 0;
677         if (option_mask32 & WGET_OPT_CONTINUE) {
678                 G.output_fd = open(G.fname_out, O_WRONLY);
679                 if (G.output_fd >= 0) {
680                         G.beg_range = xlseek(G.output_fd, 0, SEEK_END);
681                 }
682                 /* File doesn't exist. We do not create file here yet.
683                  * We are not sure it exists on remote side */
684         }
685
686         redir_limit = 5;
687  resolve_lsa:
688         lsa = xhost2sockaddr(server.host, server.port);
689         if (!(option_mask32 & WGET_OPT_QUIET)) {
690                 char *s = xmalloc_sockaddr2dotted(&lsa->u.sa);
691                 fprintf(stderr, "Connecting to %s (%s)\n", server.host, s);
692                 free(s);
693         }
694  establish_session:
695         /*G.content_len = 0; - redundant, got_clen = 0 is enough */
696         G.got_clen = 0;
697         G.chunked = 0;
698         if (use_proxy || !target.is_ftp) {
699                 /*
700                  *  HTTP session
701                  */
702                 char *str;
703                 int status;
704
705
706                 /* Open socket to http server */
707                 sfp = open_socket(lsa);
708
709                 /* Send HTTP request */
710                 if (use_proxy) {
711                         fprintf(sfp, "GET %stp://%s/%s HTTP/1.1\r\n",
712                                 target.is_ftp ? "f" : "ht", target.host,
713                                 target.path);
714                 } else {
715                         if (option_mask32 & WGET_OPT_POST_DATA)
716                                 fprintf(sfp, "POST /%s HTTP/1.1\r\n", target.path);
717                         else
718                                 fprintf(sfp, "GET /%s HTTP/1.1\r\n", target.path);
719                 }
720
721                 fprintf(sfp, "Host: %s\r\nUser-Agent: %s\r\n",
722                         target.host, G.user_agent);
723
724                 /* Ask server to close the connection as soon as we are done
725                  * (IOW: we do not intend to send more requests)
726                  */
727                 fprintf(sfp, "Connection: close\r\n");
728
729 #if ENABLE_FEATURE_WGET_AUTHENTICATION
730                 if (target.user) {
731                         fprintf(sfp, "Proxy-Authorization: Basic %s\r\n"+6,
732                                 base64enc(target.user));
733                 }
734                 if (use_proxy && server.user) {
735                         fprintf(sfp, "Proxy-Authorization: Basic %s\r\n",
736                                 base64enc(server.user));
737                 }
738 #endif
739
740                 if (G.beg_range != 0)
741                         fprintf(sfp, "Range: bytes=%"OFF_FMT"u-\r\n", G.beg_range);
742
743 #if ENABLE_FEATURE_WGET_LONG_OPTIONS
744                 if (G.extra_headers)
745                         fputs(G.extra_headers, sfp);
746
747                 if (option_mask32 & WGET_OPT_POST_DATA) {
748                         fprintf(sfp,
749                                 "Content-Type: application/x-www-form-urlencoded\r\n"
750                                 "Content-Length: %u\r\n"
751                                 "\r\n"
752                                 "%s",
753                                 (int) strlen(G.post_data), G.post_data
754                         );
755                 } else
756 #endif
757                 {
758                         fprintf(sfp, "\r\n");
759                 }
760
761                 fflush(sfp);
762
763                 /*
764                  * Retrieve HTTP response line and check for "200" status code.
765                  */
766  read_response:
767                 fgets_and_trim(sfp);
768
769                 str = G.wget_buf;
770                 str = skip_non_whitespace(str);
771                 str = skip_whitespace(str);
772                 // FIXME: no error check
773                 // xatou wouldn't work: "200 OK"
774                 status = atoi(str);
775                 switch (status) {
776                 case 0:
777                 case 100:
778                         while (gethdr(sfp) != NULL)
779                                 /* eat all remaining headers */;
780                         goto read_response;
781                 case 200:
782 /*
783 Response 204 doesn't say "null file", it says "metadata
784 has changed but data didn't":
785
786 "10.2.5 204 No Content
787 The server has fulfilled the request but does not need to return
788 an entity-body, and might want to return updated metainformation.
789 The response MAY include new or updated metainformation in the form
790 of entity-headers, which if present SHOULD be associated with
791 the requested variant.
792
793 If the client is a user agent, it SHOULD NOT change its document
794 view from that which caused the request to be sent. This response
795 is primarily intended to allow input for actions to take place
796 without causing a change to the user agent's active document view,
797 although any new or updated metainformation SHOULD be applied
798 to the document currently in the user agent's active view.
799
800 The 204 response MUST NOT include a message-body, and thus
801 is always terminated by the first empty line after the header fields."
802
803 However, in real world it was observed that some web servers
804 (e.g. Boa/0.94.14rc21) simply use code 204 when file size is zero.
805 */
806                 case 204:
807                         if (G.beg_range != 0) {
808                                 /* "Range:..." was not honored by the server.
809                                  * Restart download from the beginning.
810                                  */
811                                 reset_beg_range_to_zero();
812                         }
813                         break;
814                 case 300:  /* redirection */
815                 case 301:
816                 case 302:
817                 case 303:
818                         break;
819                 case 206: /* Partial Content */
820                         if (G.beg_range != 0)
821                                 /* "Range:..." worked. Good. */
822                                 break;
823                         /* Partial Content even though we did not ask for it??? */
824                         /* fall through */
825                 default:
826                         bb_error_msg_and_die("server returned error: %s", sanitize_string(G.wget_buf));
827                 }
828
829                 /*
830                  * Retrieve HTTP headers.
831                  */
832                 while ((str = gethdr(sfp)) != NULL) {
833                         static const char keywords[] ALIGN1 =
834                                 "content-length\0""transfer-encoding\0""location\0";
835                         enum {
836                                 KEY_content_length = 1, KEY_transfer_encoding, KEY_location
837                         };
838                         smalluint key;
839
840                         /* gethdr converted "FOO:" string to lowercase */
841
842                         /* strip trailing whitespace */
843                         char *s = strchrnul(str, '\0') - 1;
844                         while (s >= str && (*s == ' ' || *s == '\t')) {
845                                 *s = '\0';
846                                 s--;
847                         }
848                         key = index_in_strings(keywords, G.wget_buf) + 1;
849                         if (key == KEY_content_length) {
850                                 G.content_len = BB_STRTOOFF(str, NULL, 10);
851                                 if (G.content_len < 0 || errno) {
852                                         bb_error_msg_and_die("content-length %s is garbage", sanitize_string(str));
853                                 }
854                                 G.got_clen = 1;
855                                 continue;
856                         }
857                         if (key == KEY_transfer_encoding) {
858                                 if (strcmp(str_tolower(str), "chunked") != 0)
859                                         bb_error_msg_and_die("transfer encoding '%s' is not supported", sanitize_string(str));
860                                 G.chunked = 1;
861                         }
862                         if (key == KEY_location && status >= 300) {
863                                 if (--redir_limit == 0)
864                                         bb_error_msg_and_die("too many redirections");
865                                 fclose(sfp);
866                                 if (str[0] == '/') {
867                                         free(redirected_path);
868                                         target.path = redirected_path = xstrdup(str+1);
869                                         /* lsa stays the same: it's on the same server */
870                                 } else {
871                                         parse_url(str, &target);
872                                         if (!use_proxy) {
873                                                 free(server.allocated);
874                                                 server.allocated = NULL;
875                                                 server.host = target.host;
876                                                 /* strip_ipv6_scope_id(target.host); - no! */
877                                                 /* we assume remote never gives us IPv6 addr with scope id */
878                                                 server.port = target.port;
879                                                 free(lsa);
880                                                 goto resolve_lsa;
881                                         } /* else: lsa stays the same: we use proxy */
882                                 }
883                                 goto establish_session;
884                         }
885                 }
886 //              if (status >= 300)
887 //                      bb_error_msg_and_die("bad redirection (no Location: header from server)");
888
889                 /* For HTTP, data is pumped over the same connection */
890                 dfp = sfp;
891
892         } else {
893                 /*
894                  *  FTP session
895                  */
896                 sfp = prepare_ftp_session(&dfp, &target, lsa);
897         }
898
899         free(lsa);
900
901         if (!(option_mask32 & WGET_OPT_SPIDER)) {
902                 if (G.output_fd < 0)
903                         G.output_fd = xopen(G.fname_out, G.o_flags);
904                 retrieve_file_data(dfp);
905                 if (!(option_mask32 & WGET_OPT_OUTNAME)) {
906                         xclose(G.output_fd);
907                         G.output_fd = -1;
908                 }
909         }
910
911         if (dfp != sfp) {
912                 /* It's ftp. Close data connection properly */
913                 fclose(dfp);
914                 if (ftpcmd(NULL, NULL, sfp) != 226)
915                         bb_error_msg_and_die("ftp error: %s", sanitize_string(G.wget_buf + 4));
916                 /* ftpcmd("QUIT", NULL, sfp); - why bother? */
917         }
918         fclose(sfp);
919
920         free(server.allocated);
921         free(target.allocated);
922         free(fname_out_alloc);
923         free(redirected_path);
924 }
925
926 int wget_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE;
927 int wget_main(int argc UNUSED_PARAM, char **argv)
928 {
929 #if ENABLE_FEATURE_WGET_LONG_OPTIONS
930         static const char wget_longopts[] ALIGN1 =
931                 /* name, has_arg, val */
932                 "continue\0"         No_argument       "c"
933 //FIXME: -s isn't --spider, it's --save-headers!
934                 "spider\0"           No_argument       "s"
935                 "quiet\0"            No_argument       "q"
936                 "output-document\0"  Required_argument "O"
937                 "directory-prefix\0" Required_argument "P"
938                 "proxy\0"            Required_argument "Y"
939                 "user-agent\0"       Required_argument "U"
940 #if ENABLE_FEATURE_WGET_TIMEOUT
941                 "timeout\0"          Required_argument "T"
942 #endif
943                 /* Ignored: */
944                 // "tries\0"            Required_argument "t"
945                 /* Ignored (we always use PASV): */
946                 "passive-ftp\0"      No_argument       "\xff"
947                 "header\0"           Required_argument "\xfe"
948                 "post-data\0"        Required_argument "\xfd"
949                 /* Ignored (we don't do ssl) */
950                 "no-check-certificate\0" No_argument   "\xfc"
951                 /* Ignored (we don't support caching) */
952                 "no-cache\0"         No_argument       "\xfb"
953                 ;
954 #endif
955
956 #if ENABLE_FEATURE_WGET_LONG_OPTIONS
957         llist_t *headers_llist = NULL;
958 #endif
959
960         INIT_G();
961
962 #if ENABLE_FEATURE_WGET_TIMEOUT
963         G.timeout_seconds = 900;
964         signal(SIGALRM, alarm_handler);
965 #endif
966         G.proxy_flag = "on";   /* use proxies if env vars are set */
967         G.user_agent = "Wget"; /* "User-Agent" header field */
968
969 #if ENABLE_FEATURE_WGET_LONG_OPTIONS
970         applet_long_options = wget_longopts;
971 #endif
972         opt_complementary = "-1" IF_FEATURE_WGET_TIMEOUT(":T+") IF_FEATURE_WGET_LONG_OPTIONS(":\xfe::");
973         getopt32(argv, "csqO:P:Y:U:T:" /*ignored:*/ "t:",
974                 &G.fname_out, &G.dir_prefix,
975                 &G.proxy_flag, &G.user_agent,
976                 IF_FEATURE_WGET_TIMEOUT(&G.timeout_seconds) IF_NOT_FEATURE_WGET_TIMEOUT(NULL),
977                 NULL /* -t RETRIES */
978                 IF_FEATURE_WGET_LONG_OPTIONS(, &headers_llist)
979                 IF_FEATURE_WGET_LONG_OPTIONS(, &G.post_data)
980         );
981         argv += optind;
982
983 #if ENABLE_FEATURE_WGET_LONG_OPTIONS
984         if (headers_llist) {
985                 int size = 1;
986                 char *cp;
987                 llist_t *ll = headers_llist;
988                 while (ll) {
989                         size += strlen(ll->data) + 2;
990                         ll = ll->link;
991                 }
992                 G.extra_headers = cp = xmalloc(size);
993                 while (headers_llist) {
994                         cp += sprintf(cp, "%s\r\n", (char*)llist_pop(&headers_llist));
995                 }
996         }
997 #endif
998
999         G.output_fd = -1;
1000         G.o_flags = O_WRONLY | O_CREAT | O_TRUNC | O_EXCL;
1001         if (G.fname_out) { /* -O FILE ? */
1002                 if (LONE_DASH(G.fname_out)) { /* -O - ? */
1003                         G.output_fd = 1;
1004                         option_mask32 &= ~WGET_OPT_CONTINUE;
1005                 }
1006                 /* compat with wget: -O FILE can overwrite */
1007                 G.o_flags = O_WRONLY | O_CREAT | O_TRUNC;
1008         }
1009
1010         while (*argv)
1011                 download_one_url(*argv++);
1012
1013         if (G.output_fd >= 0)
1014                 xclose(G.output_fd);
1015
1016         return EXIT_SUCCESS;
1017 }