iplink: fix build error (IFLA_VLAN_PROTOCOL defined before include)
[platform/upstream/busybox.git] / networking / wget.c
1 /* vi: set sw=4 ts=4: */
2 /*
3  * wget - retrieve a file using HTTP or FTP
4  *
5  * Chip Rosenthal Covad Communications <chip@laserlink.net>
6  * Licensed under GPLv2, see file LICENSE in this source tree.
7  *
8  * Copyright (C) 2010 Bradley M. Kuhn <bkuhn@ebb.org>
9  * Kuhn's copyrights are licensed GPLv2-or-later.  File as a whole remains GPLv2.
10  */
11
12 //usage:#define wget_trivial_usage
13 //usage:        IF_FEATURE_WGET_LONG_OPTIONS(
14 //usage:       "[-c|--continue] [-s|--spider] [-q|--quiet] [-O|--output-document FILE]\n"
15 //usage:       "        [--header 'header: value'] [-Y|--proxy on/off] [-P DIR]\n"
16 /* Since we ignore these opts, we don't show them in --help */
17 /* //usage:    "        [--no-check-certificate] [--no-cache]" */
18 //usage:       "        [-U|--user-agent AGENT]" IF_FEATURE_WGET_TIMEOUT(" [-T SEC]") " URL..."
19 //usage:        )
20 //usage:        IF_NOT_FEATURE_WGET_LONG_OPTIONS(
21 //usage:       "[-csq] [-O FILE] [-Y on/off] [-P DIR] [-U AGENT]"
22 //usage:                        IF_FEATURE_WGET_TIMEOUT(" [-T SEC]") " URL..."
23 //usage:        )
24 //usage:#define wget_full_usage "\n\n"
25 //usage:       "Retrieve files via HTTP or FTP\n"
26 //usage:     "\n        -s      Spider mode - only check file existence"
27 //usage:     "\n        -c      Continue retrieval of aborted transfer"
28 //usage:     "\n        -q      Quiet"
29 //usage:     "\n        -P DIR  Save to DIR (default .)"
30 //usage:        IF_FEATURE_WGET_TIMEOUT(
31 //usage:     "\n        -T SEC  Network read timeout is SEC seconds"
32 //usage:        )
33 //usage:     "\n        -O FILE Save to FILE ('-' for stdout)"
34 //usage:     "\n        -U STR  Use STR for User-Agent header"
35 //usage:     "\n        -Y      Use proxy ('on' or 'off')"
36
37 #include "libbb.h"
38
39 #if 0
40 # define log_io(...) bb_error_msg(__VA_ARGS__)
41 #else
42 # define log_io(...) ((void)0)
43 #endif
44
45
46 struct host_info {
47         char *allocated;
48         const char *path;
49         const char *user;
50         char       *host;
51         int         port;
52         smallint    is_ftp;
53 };
54
55
56 /* Globals */
57 struct globals {
58         off_t content_len;        /* Content-length of the file */
59         off_t beg_range;          /* Range at which continue begins */
60 #if ENABLE_FEATURE_WGET_STATUSBAR
61         off_t transferred;        /* Number of bytes transferred so far */
62         const char *curfile;      /* Name of current file being transferred */
63         bb_progress_t pmt;
64 #endif
65         char *dir_prefix;
66 #if ENABLE_FEATURE_WGET_LONG_OPTIONS
67         char *post_data;
68         char *extra_headers;
69 #endif
70         char *fname_out;        /* where to direct output (-O) */
71         const char *proxy_flag; /* Use proxies if env vars are set */
72         const char *user_agent; /* "User-Agent" header field */
73 #if ENABLE_FEATURE_WGET_TIMEOUT
74         unsigned timeout_seconds;
75 #endif
76         int output_fd;
77         int o_flags;
78         smallint chunked;         /* chunked transfer encoding */
79         smallint got_clen;        /* got content-length: from server  */
80         /* Local downloads do benefit from big buffer.
81          * With 512 byte buffer, it was measured to be
82          * an order of magnitude slower than with big one.
83          */
84         uint64_t just_to_align_next_member;
85         char wget_buf[CONFIG_FEATURE_COPYBUF_KB*1024];
86 } FIX_ALIASING;
87 #define G (*ptr_to_globals)
88 #define INIT_G() do { \
89         SET_PTR_TO_GLOBALS(xzalloc(sizeof(G))); \
90         IF_FEATURE_WGET_TIMEOUT(G.timeout_seconds = 900;) \
91 } while (0)
92
93
94 /* Must match option string! */
95 enum {
96         WGET_OPT_CONTINUE   = (1 << 0),
97         WGET_OPT_SPIDER     = (1 << 1),
98         WGET_OPT_QUIET      = (1 << 2),
99         WGET_OPT_OUTNAME    = (1 << 3),
100         WGET_OPT_PREFIX     = (1 << 4),
101         WGET_OPT_PROXY      = (1 << 5),
102         WGET_OPT_USER_AGENT = (1 << 6),
103         WGET_OPT_NETWORK_READ_TIMEOUT = (1 << 7),
104         WGET_OPT_RETRIES    = (1 << 8),
105         WGET_OPT_PASSIVE    = (1 << 9),
106         WGET_OPT_HEADER     = (1 << 10) * ENABLE_FEATURE_WGET_LONG_OPTIONS,
107         WGET_OPT_POST_DATA  = (1 << 11) * ENABLE_FEATURE_WGET_LONG_OPTIONS,
108 };
109
110 enum {
111         PROGRESS_START = -1,
112         PROGRESS_END   = 0,
113         PROGRESS_BUMP  = 1,
114 };
115 #if ENABLE_FEATURE_WGET_STATUSBAR
116 static void progress_meter(int flag)
117 {
118         if (option_mask32 & WGET_OPT_QUIET)
119                 return;
120
121         if (flag == PROGRESS_START)
122                 bb_progress_init(&G.pmt, G.curfile);
123
124         bb_progress_update(&G.pmt,
125                         G.beg_range,
126                         G.transferred,
127                         (G.chunked || !G.got_clen) ? 0 : G.beg_range + G.transferred + G.content_len
128         );
129
130         if (flag == PROGRESS_END) {
131                 bb_progress_free(&G.pmt);
132                 bb_putchar_stderr('\n');
133                 G.transferred = 0;
134         }
135 }
136 #else
137 static ALWAYS_INLINE void progress_meter(int flag UNUSED_PARAM) { }
138 #endif
139
140
141 /* IPv6 knows scoped address types i.e. link and site local addresses. Link
142  * local addresses can have a scope identifier to specify the
143  * interface/link an address is valid on (e.g. fe80::1%eth0). This scope
144  * identifier is only valid on a single node.
145  *
146  * RFC 4007 says that the scope identifier MUST NOT be sent across the wire,
147  * unless all nodes agree on the semantic. Apache e.g. regards zone identifiers
148  * in the Host header as invalid requests, see
149  * https://issues.apache.org/bugzilla/show_bug.cgi?id=35122
150  */
151 static void strip_ipv6_scope_id(char *host)
152 {
153         char *scope, *cp;
154
155         /* bbox wget actually handles IPv6 addresses without [], like
156          * wget "http://::1/xxx", but this is not standard.
157          * To save code, _here_ we do not support it. */
158
159         if (host[0] != '[')
160                 return; /* not IPv6 */
161
162         scope = strchr(host, '%');
163         if (!scope)
164                 return;
165
166         /* Remove the IPv6 zone identifier from the host address */
167         cp = strchr(host, ']');
168         if (!cp || (cp[1] != ':' && cp[1] != '\0')) {
169                 /* malformed address (not "[xx]:nn" or "[xx]") */
170                 return;
171         }
172
173         /* cp points to "]...", scope points to "%eth0]..." */
174         overlapping_strcpy(scope, cp);
175 }
176
177 #if ENABLE_FEATURE_WGET_AUTHENTICATION
178 /* Base64-encode character string. */
179 static char *base64enc(const char *str)
180 {
181         unsigned len = strlen(str);
182         if (len > sizeof(G.wget_buf)/4*3 - 10) /* paranoia */
183                 len = sizeof(G.wget_buf)/4*3 - 10;
184         bb_uuencode(G.wget_buf, str, len, bb_uuenc_tbl_base64);
185         return G.wget_buf;
186 }
187 #endif
188
189 static char* sanitize_string(char *s)
190 {
191         unsigned char *p = (void *) s;
192         while (*p >= ' ')
193                 p++;
194         *p = '\0';
195         return s;
196 }
197
198 static FILE *open_socket(len_and_sockaddr *lsa)
199 {
200         FILE *fp;
201
202         /* glibc 2.4 seems to try seeking on it - ??! */
203         /* hopefully it understands what ESPIPE means... */
204         fp = fdopen(xconnect_stream(lsa), "r+");
205         if (fp == NULL)
206                 bb_perror_msg_and_die(bb_msg_memory_exhausted);
207
208         return fp;
209 }
210
211 /* Returns '\n' if it was seen, else '\0'. Trims at first '\r' or '\n' */
212 static char fgets_and_trim(FILE *fp)
213 {
214         char c;
215         char *buf_ptr;
216
217         if (fgets(G.wget_buf, sizeof(G.wget_buf) - 1, fp) == NULL)
218                 bb_perror_msg_and_die("error getting response");
219
220         buf_ptr = strchrnul(G.wget_buf, '\n');
221         c = *buf_ptr;
222         *buf_ptr = '\0';
223         buf_ptr = strchrnul(G.wget_buf, '\r');
224         *buf_ptr = '\0';
225
226         log_io("< %s", G.wget_buf);
227
228         return c;
229 }
230
231 static int ftpcmd(const char *s1, const char *s2, FILE *fp)
232 {
233         int result;
234         if (s1) {
235                 if (!s2)
236                         s2 = "";
237                 fprintf(fp, "%s%s\r\n", s1, s2);
238                 fflush(fp);
239                 log_io("> %s%s", s1, s2);
240         }
241
242         do {
243                 fgets_and_trim(fp);
244         } while (!isdigit(G.wget_buf[0]) || G.wget_buf[3] != ' ');
245
246         G.wget_buf[3] = '\0';
247         result = xatoi_positive(G.wget_buf);
248         G.wget_buf[3] = ' ';
249         return result;
250 }
251
252 static void parse_url(const char *src_url, struct host_info *h)
253 {
254         char *url, *p, *sp;
255
256         free(h->allocated);
257         h->allocated = url = xstrdup(src_url);
258
259         if (strncmp(url, "http://", 7) == 0) {
260                 h->port = bb_lookup_port("http", "tcp", 80);
261                 h->host = url + 7;
262                 h->is_ftp = 0;
263         } else if (strncmp(url, "ftp://", 6) == 0) {
264                 h->port = bb_lookup_port("ftp", "tcp", 21);
265                 h->host = url + 6;
266                 h->is_ftp = 1;
267         } else
268                 bb_error_msg_and_die("not an http or ftp url: %s", sanitize_string(url));
269
270         // FYI:
271         // "Real" wget 'http://busybox.net?var=a/b' sends this request:
272         //   'GET /?var=a/b HTTP 1.0'
273         //   and saves 'index.html?var=a%2Fb' (we save 'b')
274         // wget 'http://busybox.net?login=john@doe':
275         //   request: 'GET /?login=john@doe HTTP/1.0'
276         //   saves: 'index.html?login=john@doe' (we save '?login=john@doe')
277         // wget 'http://busybox.net#test/test':
278         //   request: 'GET / HTTP/1.0'
279         //   saves: 'index.html' (we save 'test')
280         //
281         // We also don't add unique .N suffix if file exists...
282         sp = strchr(h->host, '/');
283         p = strchr(h->host, '?'); if (!sp || (p && sp > p)) sp = p;
284         p = strchr(h->host, '#'); if (!sp || (p && sp > p)) sp = p;
285         if (!sp) {
286                 h->path = "";
287         } else if (*sp == '/') {
288                 *sp = '\0';
289                 h->path = sp + 1;
290         } else { // '#' or '?'
291                 // http://busybox.net?login=john@doe is a valid URL
292                 // memmove converts to:
293                 // http:/busybox.nett?login=john@doe...
294                 memmove(h->host - 1, h->host, sp - h->host);
295                 h->host--;
296                 sp[-1] = '\0';
297                 h->path = sp;
298         }
299
300         // We used to set h->user to NULL here, but this interferes
301         // with handling of code 302 ("object was moved")
302
303         sp = strrchr(h->host, '@');
304         if (sp != NULL) {
305                 // URL-decode "user:password" string before base64-encoding:
306                 // wget http://test:my%20pass@example.com should send
307                 // Authorization: Basic dGVzdDpteSBwYXNz
308                 // which decodes to "test:my pass".
309                 // Standard wget and curl do this too.
310                 *sp = '\0';
311                 h->user = percent_decode_in_place(h->host, /*strict:*/ 0);
312                 h->host = sp + 1;
313         }
314
315         sp = h->host;
316 }
317
318 static char *gethdr(FILE *fp)
319 {
320         char *s, *hdrval;
321         int c;
322
323         /* retrieve header line */
324         c = fgets_and_trim(fp);
325
326         /* end of the headers? */
327         if (G.wget_buf[0] == '\0')
328                 return NULL;
329
330         /* convert the header name to lower case */
331         for (s = G.wget_buf; isalnum(*s) || *s == '-' || *s == '.' || *s == '_'; ++s) {
332                 /*
333                  * No-op for 20-3f and 60-7f. "0-9a-z-." are in these ranges.
334                  * 40-5f range ("@A-Z[\]^_") maps to 60-7f.
335                  * "A-Z" maps to "a-z".
336                  * "@[\]" can't occur in header names.
337                  * "^_" maps to "~,DEL" (which is wrong).
338                  * "^" was never seen yet, "_" was seen from web.archive.org
339                  * (x-archive-orig-x_commoncrawl_Signature: HEXSTRING).
340                  */
341                 *s |= 0x20;
342         }
343
344         /* verify we are at the end of the header name */
345         if (*s != ':')
346                 bb_error_msg_and_die("bad header line: %s", sanitize_string(G.wget_buf));
347
348         /* locate the start of the header value */
349         *s++ = '\0';
350         hdrval = skip_whitespace(s);
351
352         if (c != '\n') {
353                 /* Rats! The buffer isn't big enough to hold the entire header value */
354                 while (c = getc(fp), c != EOF && c != '\n')
355                         continue;
356         }
357
358         return hdrval;
359 }
360
361 static void reset_beg_range_to_zero(void)
362 {
363         bb_error_msg("restart failed");
364         G.beg_range = 0;
365         xlseek(G.output_fd, 0, SEEK_SET);
366         /* Done at the end instead: */
367         /* ftruncate(G.output_fd, 0); */
368 }
369
370 static FILE* prepare_ftp_session(FILE **dfpp, struct host_info *target, len_and_sockaddr *lsa)
371 {
372         FILE *sfp;
373         char *str;
374         int port;
375
376         if (!target->user)
377                 target->user = xstrdup("anonymous:busybox@");
378
379         sfp = open_socket(lsa);
380         if (ftpcmd(NULL, NULL, sfp) != 220)
381                 bb_error_msg_and_die("%s", sanitize_string(G.wget_buf + 4));
382
383         /*
384          * Splitting username:password pair,
385          * trying to log in
386          */
387         str = strchr(target->user, ':');
388         if (str)
389                 *str++ = '\0';
390         switch (ftpcmd("USER ", target->user, sfp)) {
391         case 230:
392                 break;
393         case 331:
394                 if (ftpcmd("PASS ", str, sfp) == 230)
395                         break;
396                 /* fall through (failed login) */
397         default:
398                 bb_error_msg_and_die("ftp login: %s", sanitize_string(G.wget_buf + 4));
399         }
400
401         ftpcmd("TYPE I", NULL, sfp);
402
403         /*
404          * Querying file size
405          */
406         if (ftpcmd("SIZE ", target->path, sfp) == 213) {
407                 G.content_len = BB_STRTOOFF(G.wget_buf + 4, NULL, 10);
408                 if (G.content_len < 0 || errno) {
409                         bb_error_msg_and_die("SIZE value is garbage");
410                 }
411                 G.got_clen = 1;
412         }
413
414         /*
415          * Entering passive mode
416          */
417         if (ftpcmd("PASV", NULL, sfp) != 227) {
418  pasv_error:
419                 bb_error_msg_and_die("bad response to %s: %s", "PASV", sanitize_string(G.wget_buf));
420         }
421         // Response is "227 garbageN1,N2,N3,N4,P1,P2[)garbage]
422         // Server's IP is N1.N2.N3.N4 (we ignore it)
423         // Server's port for data connection is P1*256+P2
424         str = strrchr(G.wget_buf, ')');
425         if (str) str[0] = '\0';
426         str = strrchr(G.wget_buf, ',');
427         if (!str) goto pasv_error;
428         port = xatou_range(str+1, 0, 255);
429         *str = '\0';
430         str = strrchr(G.wget_buf, ',');
431         if (!str) goto pasv_error;
432         port += xatou_range(str+1, 0, 255) * 256;
433         set_nport(&lsa->u.sa, htons(port));
434
435         *dfpp = open_socket(lsa);
436
437         if (G.beg_range != 0) {
438                 sprintf(G.wget_buf, "REST %"OFF_FMT"u", G.beg_range);
439                 if (ftpcmd(G.wget_buf, NULL, sfp) == 350)
440                         G.content_len -= G.beg_range;
441                 else
442                         reset_beg_range_to_zero();
443         }
444
445         if (ftpcmd("RETR ", target->path, sfp) > 150)
446                 bb_error_msg_and_die("bad response to %s: %s", "RETR", sanitize_string(G.wget_buf));
447
448         return sfp;
449 }
450
451 static void NOINLINE retrieve_file_data(FILE *dfp)
452 {
453 #if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT
454 # if ENABLE_FEATURE_WGET_TIMEOUT
455         unsigned second_cnt = G.timeout_seconds;
456 # endif
457         struct pollfd polldata;
458
459         polldata.fd = fileno(dfp);
460         polldata.events = POLLIN | POLLPRI;
461 #endif
462         progress_meter(PROGRESS_START);
463
464         if (G.chunked)
465                 goto get_clen;
466
467         /* Loops only if chunked */
468         while (1) {
469
470 #if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT
471                 /* Must use nonblocking I/O, otherwise fread will loop
472                  * and *block* until it reads full buffer,
473                  * which messes up progress bar and/or timeout logic.
474                  * Because of nonblocking I/O, we need to dance
475                  * very carefully around EAGAIN. See explanation at
476                  * clearerr() calls.
477                  */
478                 ndelay_on(polldata.fd);
479 #endif
480                 while (1) {
481                         int n;
482                         unsigned rdsz;
483
484 #if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT
485                         /* fread internally uses read loop, which in our case
486                          * is usually exited when we get EAGAIN.
487                          * In this case, libc sets error marker on the stream.
488                          * Need to clear it before next fread to avoid possible
489                          * rare false positive ferror below. Rare because usually
490                          * fread gets more than zero bytes, and we don't fall
491                          * into if (n <= 0) ...
492                          */
493                         clearerr(dfp);
494 #endif
495                         errno = 0;
496                         rdsz = sizeof(G.wget_buf);
497                         if (G.got_clen) {
498                                 if (G.content_len < (off_t)sizeof(G.wget_buf)) {
499                                         if ((int)G.content_len <= 0)
500                                                 break;
501                                         rdsz = (unsigned)G.content_len;
502                                 }
503                         }
504                         n = fread(G.wget_buf, 1, rdsz, dfp);
505
506                         if (n > 0) {
507                                 xwrite(G.output_fd, G.wget_buf, n);
508 #if ENABLE_FEATURE_WGET_STATUSBAR
509                                 G.transferred += n;
510 #endif
511                                 if (G.got_clen) {
512                                         G.content_len -= n;
513                                         if (G.content_len == 0)
514                                                 break;
515                                 }
516 #if ENABLE_FEATURE_WGET_TIMEOUT
517                                 second_cnt = G.timeout_seconds;
518 #endif
519                                 continue;
520                         }
521
522                         /* n <= 0.
523                          * man fread:
524                          * If error occurs, or EOF is reached, the return value
525                          * is a short item count (or zero).
526                          * fread does not distinguish between EOF and error.
527                          */
528                         if (errno != EAGAIN) {
529                                 if (ferror(dfp)) {
530                                         progress_meter(PROGRESS_END);
531                                         bb_perror_msg_and_die(bb_msg_read_error);
532                                 }
533                                 break; /* EOF, not error */
534                         }
535
536 #if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT
537                         /* It was EAGAIN. There is no data. Wait up to one second
538                          * then abort if timed out, or update the bar and try reading again.
539                          */
540                         if (safe_poll(&polldata, 1, 1000) == 0) {
541 # if ENABLE_FEATURE_WGET_TIMEOUT
542                                 if (second_cnt != 0 && --second_cnt == 0) {
543                                         progress_meter(PROGRESS_END);
544                                         bb_error_msg_and_die("download timed out");
545                                 }
546 # endif
547                                 /* We used to loop back to poll here,
548                                  * but there is no great harm in letting fread
549                                  * to try reading anyway.
550                                  */
551                         }
552                         /* Need to do it _every_ second for "stalled" indicator
553                          * to be shown properly.
554                          */
555                         progress_meter(PROGRESS_BUMP);
556 #endif
557                 } /* while (reading data) */
558
559 #if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT
560                 clearerr(dfp);
561                 ndelay_off(polldata.fd); /* else fgets can get very unhappy */
562 #endif
563                 if (!G.chunked)
564                         break;
565
566                 fgets_and_trim(dfp); /* Eat empty line */
567  get_clen:
568                 fgets_and_trim(dfp);
569                 G.content_len = STRTOOFF(G.wget_buf, NULL, 16);
570                 /* FIXME: error check? */
571                 if (G.content_len == 0)
572                         break; /* all done! */
573                 G.got_clen = 1;
574                 /*
575                  * Note that fgets may result in some data being buffered in dfp.
576                  * We loop back to fread, which will retrieve this data.
577                  * Also note that code has to be arranged so that fread
578                  * is done _before_ one-second poll wait - poll doesn't know
579                  * about stdio buffering and can result in spurious one second waits!
580                  */
581         }
582
583         /* If -c failed, we restart from the beginning,
584          * but we do not truncate file then, we do it only now, at the end.
585          * This lets user to ^C if his 99% complete 10 GB file download
586          * failed to restart *without* losing the almost complete file.
587          */
588         {
589                 off_t pos = lseek(G.output_fd, 0, SEEK_CUR);
590                 if (pos != (off_t)-1)
591                         ftruncate(G.output_fd, pos);
592         }
593
594         /* Draw full bar and free its resources */
595         G.chunked = 0;  /* makes it show 100% even for chunked download */
596         G.got_clen = 1; /* makes it show 100% even for download of (formerly) unknown size */
597         progress_meter(PROGRESS_END);
598 }
599
600 static void download_one_url(const char *url)
601 {
602         bool use_proxy;                 /* Use proxies if env vars are set  */
603         int redir_limit;
604         len_and_sockaddr *lsa;
605         FILE *sfp;                      /* socket to web/ftp server         */
606         FILE *dfp;                      /* socket to ftp server (data)      */
607         char *proxy = NULL;
608         char *fname_out_alloc;
609         char *redirected_path = NULL;
610         struct host_info server;
611         struct host_info target;
612
613         server.allocated = NULL;
614         target.allocated = NULL;
615         server.user = NULL;
616         target.user = NULL;
617
618         parse_url(url, &target);
619
620         /* Use the proxy if necessary */
621         use_proxy = (strcmp(G.proxy_flag, "off") != 0);
622         if (use_proxy) {
623                 proxy = getenv(target.is_ftp ? "ftp_proxy" : "http_proxy");
624                 use_proxy = (proxy && proxy[0]);
625                 if (use_proxy)
626                         parse_url(proxy, &server);
627         }
628         if (!use_proxy) {
629                 server.port = target.port;
630                 if (ENABLE_FEATURE_IPV6) {
631                         //free(server.allocated); - can't be non-NULL
632                         server.host = server.allocated = xstrdup(target.host);
633                 } else {
634                         server.host = target.host;
635                 }
636         }
637
638         if (ENABLE_FEATURE_IPV6)
639                 strip_ipv6_scope_id(target.host);
640
641         /* If there was no -O FILE, guess output filename */
642         fname_out_alloc = NULL;
643         if (!(option_mask32 & WGET_OPT_OUTNAME)) {
644                 G.fname_out = bb_get_last_path_component_nostrip(target.path);
645                 /* handle "wget http://kernel.org//" */
646                 if (G.fname_out[0] == '/' || !G.fname_out[0])
647                         G.fname_out = (char*)"index.html";
648                 /* -P DIR is considered only if there was no -O FILE */
649                 if (G.dir_prefix)
650                         G.fname_out = fname_out_alloc = concat_path_file(G.dir_prefix, G.fname_out);
651                 else {
652                         /* redirects may free target.path later, need to make a copy */
653                         G.fname_out = fname_out_alloc = xstrdup(G.fname_out);
654                 }
655         }
656 #if ENABLE_FEATURE_WGET_STATUSBAR
657         G.curfile = bb_get_last_path_component_nostrip(G.fname_out);
658 #endif
659
660         /* Determine where to start transfer */
661         G.beg_range = 0;
662         if (option_mask32 & WGET_OPT_CONTINUE) {
663                 G.output_fd = open(G.fname_out, O_WRONLY);
664                 if (G.output_fd >= 0) {
665                         G.beg_range = xlseek(G.output_fd, 0, SEEK_END);
666                 }
667                 /* File doesn't exist. We do not create file here yet.
668                  * We are not sure it exists on remote side */
669         }
670
671         redir_limit = 5;
672  resolve_lsa:
673         lsa = xhost2sockaddr(server.host, server.port);
674         if (!(option_mask32 & WGET_OPT_QUIET)) {
675                 char *s = xmalloc_sockaddr2dotted(&lsa->u.sa);
676                 fprintf(stderr, "Connecting to %s (%s)\n", server.host, s);
677                 free(s);
678         }
679  establish_session:
680         /*G.content_len = 0; - redundant, got_clen = 0 is enough */
681         G.got_clen = 0;
682         G.chunked = 0;
683         if (use_proxy || !target.is_ftp) {
684                 /*
685                  *  HTTP session
686                  */
687                 char *str;
688                 int status;
689
690
691                 /* Open socket to http server */
692                 sfp = open_socket(lsa);
693
694                 /* Send HTTP request */
695                 if (use_proxy) {
696                         fprintf(sfp, "GET %stp://%s/%s HTTP/1.1\r\n",
697                                 target.is_ftp ? "f" : "ht", target.host,
698                                 target.path);
699                 } else {
700                         if (option_mask32 & WGET_OPT_POST_DATA)
701                                 fprintf(sfp, "POST /%s HTTP/1.1\r\n", target.path);
702                         else
703                                 fprintf(sfp, "GET /%s HTTP/1.1\r\n", target.path);
704                 }
705
706                 fprintf(sfp, "Host: %s\r\nUser-Agent: %s\r\n",
707                         target.host, G.user_agent);
708
709                 /* Ask server to close the connection as soon as we are done
710                  * (IOW: we do not intend to send more requests)
711                  */
712                 fprintf(sfp, "Connection: close\r\n");
713
714 #if ENABLE_FEATURE_WGET_AUTHENTICATION
715                 if (target.user) {
716                         fprintf(sfp, "Proxy-Authorization: Basic %s\r\n"+6,
717                                 base64enc(target.user));
718                 }
719                 if (use_proxy && server.user) {
720                         fprintf(sfp, "Proxy-Authorization: Basic %s\r\n",
721                                 base64enc(server.user));
722                 }
723 #endif
724
725                 if (G.beg_range != 0)
726                         fprintf(sfp, "Range: bytes=%"OFF_FMT"u-\r\n", G.beg_range);
727
728 #if ENABLE_FEATURE_WGET_LONG_OPTIONS
729                 if (G.extra_headers)
730                         fputs(G.extra_headers, sfp);
731
732                 if (option_mask32 & WGET_OPT_POST_DATA) {
733                         fprintf(sfp,
734                                 "Content-Type: application/x-www-form-urlencoded\r\n"
735                                 "Content-Length: %u\r\n"
736                                 "\r\n"
737                                 "%s",
738                                 (int) strlen(G.post_data), G.post_data
739                         );
740                 } else
741 #endif
742                 {
743                         fprintf(sfp, "\r\n");
744                 }
745
746                 fflush(sfp);
747
748                 /*
749                  * Retrieve HTTP response line and check for "200" status code.
750                  */
751  read_response:
752                 fgets_and_trim(sfp);
753
754                 str = G.wget_buf;
755                 str = skip_non_whitespace(str);
756                 str = skip_whitespace(str);
757                 // FIXME: no error check
758                 // xatou wouldn't work: "200 OK"
759                 status = atoi(str);
760                 switch (status) {
761                 case 0:
762                 case 100:
763                         while (gethdr(sfp) != NULL)
764                                 /* eat all remaining headers */;
765                         goto read_response;
766                 case 200:
767 /*
768 Response 204 doesn't say "null file", it says "metadata
769 has changed but data didn't":
770
771 "10.2.5 204 No Content
772 The server has fulfilled the request but does not need to return
773 an entity-body, and might want to return updated metainformation.
774 The response MAY include new or updated metainformation in the form
775 of entity-headers, which if present SHOULD be associated with
776 the requested variant.
777
778 If the client is a user agent, it SHOULD NOT change its document
779 view from that which caused the request to be sent. This response
780 is primarily intended to allow input for actions to take place
781 without causing a change to the user agent's active document view,
782 although any new or updated metainformation SHOULD be applied
783 to the document currently in the user agent's active view.
784
785 The 204 response MUST NOT include a message-body, and thus
786 is always terminated by the first empty line after the header fields."
787
788 However, in real world it was observed that some web servers
789 (e.g. Boa/0.94.14rc21) simply use code 204 when file size is zero.
790 */
791                 case 204:
792                         if (G.beg_range != 0) {
793                                 /* "Range:..." was not honored by the server.
794                                  * Restart download from the beginning.
795                                  */
796                                 reset_beg_range_to_zero();
797                         }
798                         break;
799                 case 300:  /* redirection */
800                 case 301:
801                 case 302:
802                 case 303:
803                         break;
804                 case 206: /* Partial Content */
805                         if (G.beg_range != 0)
806                                 /* "Range:..." worked. Good. */
807                                 break;
808                         /* Partial Content even though we did not ask for it??? */
809                         /* fall through */
810                 default:
811                         bb_error_msg_and_die("server returned error: %s", sanitize_string(G.wget_buf));
812                 }
813
814                 /*
815                  * Retrieve HTTP headers.
816                  */
817                 while ((str = gethdr(sfp)) != NULL) {
818                         static const char keywords[] ALIGN1 =
819                                 "content-length\0""transfer-encoding\0""location\0";
820                         enum {
821                                 KEY_content_length = 1, KEY_transfer_encoding, KEY_location
822                         };
823                         smalluint key;
824
825                         /* gethdr converted "FOO:" string to lowercase */
826
827                         /* strip trailing whitespace */
828                         char *s = strchrnul(str, '\0') - 1;
829                         while (s >= str && (*s == ' ' || *s == '\t')) {
830                                 *s = '\0';
831                                 s--;
832                         }
833                         key = index_in_strings(keywords, G.wget_buf) + 1;
834                         if (key == KEY_content_length) {
835                                 G.content_len = BB_STRTOOFF(str, NULL, 10);
836                                 if (G.content_len < 0 || errno) {
837                                         bb_error_msg_and_die("content-length %s is garbage", sanitize_string(str));
838                                 }
839                                 G.got_clen = 1;
840                                 continue;
841                         }
842                         if (key == KEY_transfer_encoding) {
843                                 if (strcmp(str_tolower(str), "chunked") != 0)
844                                         bb_error_msg_and_die("transfer encoding '%s' is not supported", sanitize_string(str));
845                                 G.chunked = 1;
846                         }
847                         if (key == KEY_location && status >= 300) {
848                                 if (--redir_limit == 0)
849                                         bb_error_msg_and_die("too many redirections");
850                                 fclose(sfp);
851                                 if (str[0] == '/') {
852                                         free(redirected_path);
853                                         target.path = redirected_path = xstrdup(str+1);
854                                         /* lsa stays the same: it's on the same server */
855                                 } else {
856                                         parse_url(str, &target);
857                                         if (!use_proxy) {
858                                                 free(server.allocated);
859                                                 server.allocated = NULL;
860                                                 server.host = target.host;
861                                                 /* strip_ipv6_scope_id(target.host); - no! */
862                                                 /* we assume remote never gives us IPv6 addr with scope id */
863                                                 server.port = target.port;
864                                                 free(lsa);
865                                                 goto resolve_lsa;
866                                         } /* else: lsa stays the same: we use proxy */
867                                 }
868                                 goto establish_session;
869                         }
870                 }
871 //              if (status >= 300)
872 //                      bb_error_msg_and_die("bad redirection (no Location: header from server)");
873
874                 /* For HTTP, data is pumped over the same connection */
875                 dfp = sfp;
876
877         } else {
878                 /*
879                  *  FTP session
880                  */
881                 sfp = prepare_ftp_session(&dfp, &target, lsa);
882         }
883
884         free(lsa);
885
886         if (!(option_mask32 & WGET_OPT_SPIDER)) {
887                 if (G.output_fd < 0)
888                         G.output_fd = xopen(G.fname_out, G.o_flags);
889                 retrieve_file_data(dfp);
890                 if (!(option_mask32 & WGET_OPT_OUTNAME)) {
891                         xclose(G.output_fd);
892                         G.output_fd = -1;
893                 }
894         }
895
896         if (dfp != sfp) {
897                 /* It's ftp. Close data connection properly */
898                 fclose(dfp);
899                 if (ftpcmd(NULL, NULL, sfp) != 226)
900                         bb_error_msg_and_die("ftp error: %s", sanitize_string(G.wget_buf + 4));
901                 /* ftpcmd("QUIT", NULL, sfp); - why bother? */
902         }
903         fclose(sfp);
904
905         free(server.allocated);
906         free(target.allocated);
907         free(fname_out_alloc);
908         free(redirected_path);
909 }
910
911 int wget_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE;
912 int wget_main(int argc UNUSED_PARAM, char **argv)
913 {
914 #if ENABLE_FEATURE_WGET_LONG_OPTIONS
915         static const char wget_longopts[] ALIGN1 =
916                 /* name, has_arg, val */
917                 "continue\0"         No_argument       "c"
918 //FIXME: -s isn't --spider, it's --save-headers!
919                 "spider\0"           No_argument       "s"
920                 "quiet\0"            No_argument       "q"
921                 "output-document\0"  Required_argument "O"
922                 "directory-prefix\0" Required_argument "P"
923                 "proxy\0"            Required_argument "Y"
924                 "user-agent\0"       Required_argument "U"
925 #if ENABLE_FEATURE_WGET_TIMEOUT
926                 "timeout\0"          Required_argument "T"
927 #endif
928                 /* Ignored: */
929                 // "tries\0"            Required_argument "t"
930                 /* Ignored (we always use PASV): */
931                 "passive-ftp\0"      No_argument       "\xff"
932                 "header\0"           Required_argument "\xfe"
933                 "post-data\0"        Required_argument "\xfd"
934                 /* Ignored (we don't do ssl) */
935                 "no-check-certificate\0" No_argument   "\xfc"
936                 /* Ignored (we don't support caching) */
937                 "no-cache\0"         No_argument       "\xfb"
938                 ;
939 #endif
940
941 #if ENABLE_FEATURE_WGET_LONG_OPTIONS
942         llist_t *headers_llist = NULL;
943 #endif
944
945         INIT_G();
946
947         IF_FEATURE_WGET_TIMEOUT(G.timeout_seconds = 900;)
948         G.proxy_flag = "on";   /* use proxies if env vars are set */
949         G.user_agent = "Wget"; /* "User-Agent" header field */
950
951 #if ENABLE_FEATURE_WGET_LONG_OPTIONS
952         applet_long_options = wget_longopts;
953 #endif
954         opt_complementary = "-1" IF_FEATURE_WGET_TIMEOUT(":T+") IF_FEATURE_WGET_LONG_OPTIONS(":\xfe::");
955         getopt32(argv, "csqO:P:Y:U:T:" /*ignored:*/ "t:",
956                 &G.fname_out, &G.dir_prefix,
957                 &G.proxy_flag, &G.user_agent,
958                 IF_FEATURE_WGET_TIMEOUT(&G.timeout_seconds) IF_NOT_FEATURE_WGET_TIMEOUT(NULL),
959                 NULL /* -t RETRIES */
960                 IF_FEATURE_WGET_LONG_OPTIONS(, &headers_llist)
961                 IF_FEATURE_WGET_LONG_OPTIONS(, &G.post_data)
962         );
963         argv += optind;
964
965 #if ENABLE_FEATURE_WGET_LONG_OPTIONS
966         if (headers_llist) {
967                 int size = 1;
968                 char *cp;
969                 llist_t *ll = headers_llist;
970                 while (ll) {
971                         size += strlen(ll->data) + 2;
972                         ll = ll->link;
973                 }
974                 G.extra_headers = cp = xmalloc(size);
975                 while (headers_llist) {
976                         cp += sprintf(cp, "%s\r\n", (char*)llist_pop(&headers_llist));
977                 }
978         }
979 #endif
980
981         G.output_fd = -1;
982         G.o_flags = O_WRONLY | O_CREAT | O_TRUNC | O_EXCL;
983         if (G.fname_out) { /* -O FILE ? */
984                 if (LONE_DASH(G.fname_out)) { /* -O - ? */
985                         G.output_fd = 1;
986                         option_mask32 &= ~WGET_OPT_CONTINUE;
987                 }
988                 /* compat with wget: -O FILE can overwrite */
989                 G.o_flags = O_WRONLY | O_CREAT | O_TRUNC;
990         }
991
992         while (*argv)
993                 download_one_url(*argv++);
994
995         if (G.output_fd >= 0)
996                 xclose(G.output_fd);
997
998         return EXIT_SUCCESS;
999 }