tftpd: fix -u USER to work with chroot. Closes 5348
[platform/upstream/busybox.git] / networking / wget.c
1 /* vi: set sw=4 ts=4: */
2 /*
3  * wget - retrieve a file using HTTP or FTP
4  *
5  * Chip Rosenthal Covad Communications <chip@laserlink.net>
6  * Licensed under GPLv2, see file LICENSE in this source tree.
7  *
8  * Copyright (C) 2010 Bradley M. Kuhn <bkuhn@ebb.org>
9  * Kuhn's copyrights are licensed GPLv2-or-later.  File as a whole remains GPLv2.
10  */
11
12 //usage:#define wget_trivial_usage
13 //usage:        IF_FEATURE_WGET_LONG_OPTIONS(
14 //usage:       "[-c|--continue] [-s|--spider] [-q|--quiet] [-O|--output-document FILE]\n"
15 //usage:       "        [--header 'header: value'] [-Y|--proxy on/off] [-P DIR]\n"
16 /* Since we ignore these opts, we don't show them in --help */
17 /* //usage:    "        [--no-check-certificate] [--no-cache]" */
18 //usage:       "        [-U|--user-agent AGENT]" IF_FEATURE_WGET_TIMEOUT(" [-T SEC]") " URL..."
19 //usage:        )
20 //usage:        IF_NOT_FEATURE_WGET_LONG_OPTIONS(
21 //usage:       "[-csq] [-O FILE] [-Y on/off] [-P DIR] [-U AGENT]"
22 //usage:                        IF_FEATURE_WGET_TIMEOUT(" [-T SEC]") " URL..."
23 //usage:        )
24 //usage:#define wget_full_usage "\n\n"
25 //usage:       "Retrieve files via HTTP or FTP\n"
26 //usage:     "\n        -s      Spider mode - only check file existence"
27 //usage:     "\n        -c      Continue retrieval of aborted transfer"
28 //usage:     "\n        -q      Quiet"
29 //usage:     "\n        -P DIR  Save to DIR (default .)"
30 //usage:        IF_FEATURE_WGET_TIMEOUT(
31 //usage:     "\n        -T SEC  Network read timeout is SEC seconds"
32 //usage:        )
33 //usage:     "\n        -O FILE Save to FILE ('-' for stdout)"
34 //usage:     "\n        -U STR  Use STR for User-Agent header"
35 //usage:     "\n        -Y      Use proxy ('on' or 'off')"
36
37 #include "libbb.h"
38
39 #if 0
40 # define log_io(...) bb_error_msg(__VA_ARGS__)
41 #else
42 # define log_io(...) ((void)0)
43 #endif
44
45
46 struct host_info {
47         char *allocated;
48         const char *path;
49         const char *user;
50         char       *host;
51         int         port;
52         smallint    is_ftp;
53 };
54
55
56 /* Globals */
57 struct globals {
58         off_t content_len;        /* Content-length of the file */
59         off_t beg_range;          /* Range at which continue begins */
60 #if ENABLE_FEATURE_WGET_STATUSBAR
61         off_t transferred;        /* Number of bytes transferred so far */
62         const char *curfile;      /* Name of current file being transferred */
63         bb_progress_t pmt;
64 #endif
65         char *dir_prefix;
66 #if ENABLE_FEATURE_WGET_LONG_OPTIONS
67         char *post_data;
68         char *extra_headers;
69 #endif
70         char *fname_out;        /* where to direct output (-O) */
71         const char *proxy_flag; /* Use proxies if env vars are set */
72         const char *user_agent; /* "User-Agent" header field */
73 #if ENABLE_FEATURE_WGET_TIMEOUT
74         unsigned timeout_seconds;
75 #endif
76         int output_fd;
77         int o_flags;
78         smallint chunked;         /* chunked transfer encoding */
79         smallint got_clen;        /* got content-length: from server  */
80         /* Local downloads do benefit from big buffer.
81          * With 512 byte buffer, it was measured to be
82          * an order of magnitude slower than with big one.
83          */
84         uint64_t just_to_align_next_member;
85         char wget_buf[CONFIG_FEATURE_COPYBUF_KB*1024];
86 } FIX_ALIASING;
87 #define G (*ptr_to_globals)
88 #define INIT_G() do { \
89         SET_PTR_TO_GLOBALS(xzalloc(sizeof(G))); \
90         IF_FEATURE_WGET_TIMEOUT(G.timeout_seconds = 900;) \
91 } while (0)
92
93
94 /* Must match option string! */
95 enum {
96         WGET_OPT_CONTINUE   = (1 << 0),
97         WGET_OPT_SPIDER     = (1 << 1),
98         WGET_OPT_QUIET      = (1 << 2),
99         WGET_OPT_OUTNAME    = (1 << 3),
100         WGET_OPT_PREFIX     = (1 << 4),
101         WGET_OPT_PROXY      = (1 << 5),
102         WGET_OPT_USER_AGENT = (1 << 6),
103         WGET_OPT_NETWORK_READ_TIMEOUT = (1 << 7),
104         WGET_OPT_RETRIES    = (1 << 8),
105         WGET_OPT_PASSIVE    = (1 << 9),
106         WGET_OPT_HEADER     = (1 << 10) * ENABLE_FEATURE_WGET_LONG_OPTIONS,
107         WGET_OPT_POST_DATA  = (1 << 11) * ENABLE_FEATURE_WGET_LONG_OPTIONS,
108 };
109
110 enum {
111         PROGRESS_START = -1,
112         PROGRESS_END   = 0,
113         PROGRESS_BUMP  = 1,
114 };
115 #if ENABLE_FEATURE_WGET_STATUSBAR
116 static void progress_meter(int flag)
117 {
118         if (option_mask32 & WGET_OPT_QUIET)
119                 return;
120
121         if (flag == PROGRESS_START)
122                 bb_progress_init(&G.pmt, G.curfile);
123
124         bb_progress_update(&G.pmt,
125                         G.beg_range,
126                         G.transferred,
127                         (G.chunked || !G.got_clen) ? 0 : G.beg_range + G.transferred + G.content_len
128         );
129
130         if (flag == PROGRESS_END) {
131                 bb_progress_free(&G.pmt);
132                 bb_putchar_stderr('\n');
133                 G.transferred = 0;
134         }
135 }
136 #else
137 static ALWAYS_INLINE void progress_meter(int flag UNUSED_PARAM) { }
138 #endif
139
140
141 /* IPv6 knows scoped address types i.e. link and site local addresses. Link
142  * local addresses can have a scope identifier to specify the
143  * interface/link an address is valid on (e.g. fe80::1%eth0). This scope
144  * identifier is only valid on a single node.
145  *
146  * RFC 4007 says that the scope identifier MUST NOT be sent across the wire,
147  * unless all nodes agree on the semantic. Apache e.g. regards zone identifiers
148  * in the Host header as invalid requests, see
149  * https://issues.apache.org/bugzilla/show_bug.cgi?id=35122
150  */
151 static void strip_ipv6_scope_id(char *host)
152 {
153         char *scope, *cp;
154
155         /* bbox wget actually handles IPv6 addresses without [], like
156          * wget "http://::1/xxx", but this is not standard.
157          * To save code, _here_ we do not support it. */
158
159         if (host[0] != '[')
160                 return; /* not IPv6 */
161
162         scope = strchr(host, '%');
163         if (!scope)
164                 return;
165
166         /* Remove the IPv6 zone identifier from the host address */
167         cp = strchr(host, ']');
168         if (!cp || (cp[1] != ':' && cp[1] != '\0')) {
169                 /* malformed address (not "[xx]:nn" or "[xx]") */
170                 return;
171         }
172
173         /* cp points to "]...", scope points to "%eth0]..." */
174         overlapping_strcpy(scope, cp);
175 }
176
177 #if ENABLE_FEATURE_WGET_AUTHENTICATION
178 /* Base64-encode character string. */
179 static char *base64enc(const char *str)
180 {
181         unsigned len = strlen(str);
182         if (len > sizeof(G.wget_buf)/4*3 - 10) /* paranoia */
183                 len = sizeof(G.wget_buf)/4*3 - 10;
184         bb_uuencode(G.wget_buf, str, len, bb_uuenc_tbl_base64);
185         return G.wget_buf;
186 }
187 #endif
188
189 static char* sanitize_string(char *s)
190 {
191         unsigned char *p = (void *) s;
192         while (*p >= ' ')
193                 p++;
194         *p = '\0';
195         return s;
196 }
197
198 static FILE *open_socket(len_and_sockaddr *lsa)
199 {
200         FILE *fp;
201
202         /* glibc 2.4 seems to try seeking on it - ??! */
203         /* hopefully it understands what ESPIPE means... */
204         fp = fdopen(xconnect_stream(lsa), "r+");
205         if (fp == NULL)
206                 bb_perror_msg_and_die(bb_msg_memory_exhausted);
207
208         return fp;
209 }
210
211 /* Returns '\n' if it was seen, else '\0'. Trims at first '\r' or '\n' */
212 static char fgets_and_trim(FILE *fp)
213 {
214         char c;
215         char *buf_ptr;
216
217         if (fgets(G.wget_buf, sizeof(G.wget_buf) - 1, fp) == NULL)
218                 bb_perror_msg_and_die("error getting response");
219
220         buf_ptr = strchrnul(G.wget_buf, '\n');
221         c = *buf_ptr;
222         *buf_ptr = '\0';
223         buf_ptr = strchrnul(G.wget_buf, '\r');
224         *buf_ptr = '\0';
225
226         log_io("< %s", G.wget_buf);
227
228         return c;
229 }
230
231 static int ftpcmd(const char *s1, const char *s2, FILE *fp)
232 {
233         int result;
234         if (s1) {
235                 if (!s2)
236                         s2 = "";
237                 fprintf(fp, "%s%s\r\n", s1, s2);
238                 fflush(fp);
239                 log_io("> %s%s", s1, s2);
240         }
241
242         do {
243                 fgets_and_trim(fp);
244         } while (!isdigit(G.wget_buf[0]) || G.wget_buf[3] != ' ');
245
246         G.wget_buf[3] = '\0';
247         result = xatoi_positive(G.wget_buf);
248         G.wget_buf[3] = ' ';
249         return result;
250 }
251
252 static void parse_url(const char *src_url, struct host_info *h)
253 {
254         char *url, *p, *sp;
255
256         free(h->allocated);
257         h->allocated = url = xstrdup(src_url);
258
259         if (strncmp(url, "http://", 7) == 0) {
260                 h->port = bb_lookup_port("http", "tcp", 80);
261                 h->host = url + 7;
262                 h->is_ftp = 0;
263         } else if (strncmp(url, "ftp://", 6) == 0) {
264                 h->port = bb_lookup_port("ftp", "tcp", 21);
265                 h->host = url + 6;
266                 h->is_ftp = 1;
267         } else
268                 bb_error_msg_and_die("not an http or ftp url: %s", sanitize_string(url));
269
270         // FYI:
271         // "Real" wget 'http://busybox.net?var=a/b' sends this request:
272         //   'GET /?var=a/b HTTP 1.0'
273         //   and saves 'index.html?var=a%2Fb' (we save 'b')
274         // wget 'http://busybox.net?login=john@doe':
275         //   request: 'GET /?login=john@doe HTTP/1.0'
276         //   saves: 'index.html?login=john@doe' (we save '?login=john@doe')
277         // wget 'http://busybox.net#test/test':
278         //   request: 'GET / HTTP/1.0'
279         //   saves: 'index.html' (we save 'test')
280         //
281         // We also don't add unique .N suffix if file exists...
282         sp = strchr(h->host, '/');
283         p = strchr(h->host, '?'); if (!sp || (p && sp > p)) sp = p;
284         p = strchr(h->host, '#'); if (!sp || (p && sp > p)) sp = p;
285         if (!sp) {
286                 h->path = "";
287         } else if (*sp == '/') {
288                 *sp = '\0';
289                 h->path = sp + 1;
290         } else { // '#' or '?'
291                 // http://busybox.net?login=john@doe is a valid URL
292                 // memmove converts to:
293                 // http:/busybox.nett?login=john@doe...
294                 memmove(h->host - 1, h->host, sp - h->host);
295                 h->host--;
296                 sp[-1] = '\0';
297                 h->path = sp;
298         }
299
300         // We used to set h->user to NULL here, but this interferes
301         // with handling of code 302 ("object was moved")
302
303         sp = strrchr(h->host, '@');
304         if (sp != NULL) {
305                 // URL-decode "user:password" string before base64-encoding:
306                 // wget http://test:my%20pass@example.com should send
307                 // Authorization: Basic dGVzdDpteSBwYXNz
308                 // which decodes to "test:my pass".
309                 // Standard wget and curl do this too.
310                 *sp = '\0';
311                 h->user = percent_decode_in_place(h->host, /*strict:*/ 0);
312                 h->host = sp + 1;
313         }
314
315         sp = h->host;
316 }
317
318 static char *gethdr(FILE *fp)
319 {
320         char *s, *hdrval;
321         int c;
322
323         /* retrieve header line */
324         c = fgets_and_trim(fp);
325
326         /* end of the headers? */
327         if (G.wget_buf[0] == '\0')
328                 return NULL;
329
330         /* convert the header name to lower case */
331         for (s = G.wget_buf; isalnum(*s) || *s == '-' || *s == '.'; ++s) {
332                 /* tolower for "A-Z", no-op for "0-9a-z-." */
333                 *s |= 0x20;
334         }
335
336         /* verify we are at the end of the header name */
337         if (*s != ':')
338                 bb_error_msg_and_die("bad header line: %s", sanitize_string(G.wget_buf));
339
340         /* locate the start of the header value */
341         *s++ = '\0';
342         hdrval = skip_whitespace(s);
343
344         if (c != '\n') {
345                 /* Rats! The buffer isn't big enough to hold the entire header value */
346                 while (c = getc(fp), c != EOF && c != '\n')
347                         continue;
348         }
349
350         return hdrval;
351 }
352
353 static void reset_beg_range_to_zero(void)
354 {
355         bb_error_msg("restart failed");
356         G.beg_range = 0;
357         xlseek(G.output_fd, 0, SEEK_SET);
358         /* Done at the end instead: */
359         /* ftruncate(G.output_fd, 0); */
360 }
361
362 static FILE* prepare_ftp_session(FILE **dfpp, struct host_info *target, len_and_sockaddr *lsa)
363 {
364         FILE *sfp;
365         char *str;
366         int port;
367
368         if (!target->user)
369                 target->user = xstrdup("anonymous:busybox@");
370
371         sfp = open_socket(lsa);
372         if (ftpcmd(NULL, NULL, sfp) != 220)
373                 bb_error_msg_and_die("%s", sanitize_string(G.wget_buf + 4));
374
375         /*
376          * Splitting username:password pair,
377          * trying to log in
378          */
379         str = strchr(target->user, ':');
380         if (str)
381                 *str++ = '\0';
382         switch (ftpcmd("USER ", target->user, sfp)) {
383         case 230:
384                 break;
385         case 331:
386                 if (ftpcmd("PASS ", str, sfp) == 230)
387                         break;
388                 /* fall through (failed login) */
389         default:
390                 bb_error_msg_and_die("ftp login: %s", sanitize_string(G.wget_buf + 4));
391         }
392
393         ftpcmd("TYPE I", NULL, sfp);
394
395         /*
396          * Querying file size
397          */
398         if (ftpcmd("SIZE ", target->path, sfp) == 213) {
399                 G.content_len = BB_STRTOOFF(G.wget_buf + 4, NULL, 10);
400                 if (G.content_len < 0 || errno) {
401                         bb_error_msg_and_die("SIZE value is garbage");
402                 }
403                 G.got_clen = 1;
404         }
405
406         /*
407          * Entering passive mode
408          */
409         if (ftpcmd("PASV", NULL, sfp) != 227) {
410  pasv_error:
411                 bb_error_msg_and_die("bad response to %s: %s", "PASV", sanitize_string(G.wget_buf));
412         }
413         // Response is "227 garbageN1,N2,N3,N4,P1,P2[)garbage]
414         // Server's IP is N1.N2.N3.N4 (we ignore it)
415         // Server's port for data connection is P1*256+P2
416         str = strrchr(G.wget_buf, ')');
417         if (str) str[0] = '\0';
418         str = strrchr(G.wget_buf, ',');
419         if (!str) goto pasv_error;
420         port = xatou_range(str+1, 0, 255);
421         *str = '\0';
422         str = strrchr(G.wget_buf, ',');
423         if (!str) goto pasv_error;
424         port += xatou_range(str+1, 0, 255) * 256;
425         set_nport(&lsa->u.sa, htons(port));
426
427         *dfpp = open_socket(lsa);
428
429         if (G.beg_range != 0) {
430                 sprintf(G.wget_buf, "REST %"OFF_FMT"u", G.beg_range);
431                 if (ftpcmd(G.wget_buf, NULL, sfp) == 350)
432                         G.content_len -= G.beg_range;
433                 else
434                         reset_beg_range_to_zero();
435         }
436
437         if (ftpcmd("RETR ", target->path, sfp) > 150)
438                 bb_error_msg_and_die("bad response to %s: %s", "RETR", sanitize_string(G.wget_buf));
439
440         return sfp;
441 }
442
443 static void NOINLINE retrieve_file_data(FILE *dfp)
444 {
445 #if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT
446 # if ENABLE_FEATURE_WGET_TIMEOUT
447         unsigned second_cnt;
448 # endif
449         struct pollfd polldata;
450
451         polldata.fd = fileno(dfp);
452         polldata.events = POLLIN | POLLPRI;
453 #endif
454         progress_meter(PROGRESS_START);
455
456         if (G.chunked)
457                 goto get_clen;
458
459         /* Loops only if chunked */
460         while (1) {
461
462 #if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT
463                 /* Must use nonblocking I/O, otherwise fread will loop
464                  * and *block* until it reads full buffer,
465                  * which messes up progress bar and/or timeout logic.
466                  * Because of nonblocking I/O, we need to dance
467                  * very carefully around EAGAIN. See explanation at
468                  * clearerr() call.
469                  */
470                 ndelay_on(polldata.fd);
471 #endif
472                 while (1) {
473                         int n;
474                         unsigned rdsz;
475
476                         rdsz = sizeof(G.wget_buf);
477                         if (G.got_clen) {
478                                 if (G.content_len < (off_t)sizeof(G.wget_buf)) {
479                                         if ((int)G.content_len <= 0)
480                                                 break;
481                                         rdsz = (unsigned)G.content_len;
482                                 }
483                         }
484
485 #if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT
486 # if ENABLE_FEATURE_WGET_TIMEOUT
487                         second_cnt = G.timeout_seconds;
488 # endif
489                         while (1) {
490                                 if (safe_poll(&polldata, 1, 1000) != 0)
491                                         break; /* error, EOF, or data is available */
492 # if ENABLE_FEATURE_WGET_TIMEOUT
493                                 if (second_cnt != 0 && --second_cnt == 0) {
494                                         progress_meter(PROGRESS_END);
495                                         bb_error_msg_and_die("download timed out");
496                                 }
497 # endif
498                                 /* Needed for "stalled" indicator */
499                                 progress_meter(PROGRESS_BUMP);
500                         }
501
502                         /* fread internally uses read loop, which in our case
503                          * is usually exited when we get EAGAIN.
504                          * In this case, libc sets error marker on the stream.
505                          * Need to clear it before next fread to avoid possible
506                          * rare false positive ferror below. Rare because usually
507                          * fread gets more than zero bytes, and we don't fall
508                          * into if (n <= 0) ...
509                          */
510                         clearerr(dfp);
511                         errno = 0;
512 #endif
513                         n = fread(G.wget_buf, 1, rdsz, dfp);
514                         /* man fread:
515                          * If error occurs, or EOF is reached, the return value
516                          * is a short item count (or zero).
517                          * fread does not distinguish between EOF and error.
518                          */
519                         if (n <= 0) {
520 #if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT
521                                 if (errno == EAGAIN) /* poll lied, there is no data? */
522                                         continue; /* yes */
523 #endif
524                                 if (ferror(dfp))
525                                         bb_perror_msg_and_die(bb_msg_read_error);
526                                 break; /* EOF, not error */
527                         }
528
529                         xwrite(G.output_fd, G.wget_buf, n);
530
531 #if ENABLE_FEATURE_WGET_STATUSBAR
532                         G.transferred += n;
533                         progress_meter(PROGRESS_BUMP);
534 #endif
535                         if (G.got_clen) {
536                                 G.content_len -= n;
537                                 if (G.content_len == 0)
538                                         break;
539                         }
540                 }
541 #if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT
542                 clearerr(dfp);
543                 ndelay_off(polldata.fd); /* else fgets can get very unhappy */
544 #endif
545                 if (!G.chunked)
546                         break;
547
548                 fgets_and_trim(dfp); /* Eat empty line */
549  get_clen:
550                 fgets_and_trim(dfp);
551                 G.content_len = STRTOOFF(G.wget_buf, NULL, 16);
552                 /* FIXME: error check? */
553                 if (G.content_len == 0)
554                         break; /* all done! */
555                 G.got_clen = 1;
556         }
557
558         /* If -c failed, we restart from the beginning,
559          * but we do not truncate file then, we do it only now, at the end.
560          * This lets user to ^C if his 99% complete 10 GB file download
561          * failed to restart *without* losing the almost complete file.
562          */
563         {
564                 off_t pos = lseek(G.output_fd, 0, SEEK_CUR);
565                 if (pos != (off_t)-1)
566                         ftruncate(G.output_fd, pos);
567         }
568
569         /* Draw full bar and free its resources */
570         G.chunked = 0;  /* makes it show 100% even for chunked download */
571         G.got_clen = 1; /* makes it show 100% even for download of (formerly) unknown size */
572         progress_meter(PROGRESS_END);
573 }
574
575 static void download_one_url(const char *url)
576 {
577         bool use_proxy;                 /* Use proxies if env vars are set  */
578         int redir_limit;
579         len_and_sockaddr *lsa;
580         FILE *sfp;                      /* socket to web/ftp server         */
581         FILE *dfp;                      /* socket to ftp server (data)      */
582         char *proxy = NULL;
583         char *fname_out_alloc;
584         char *redirected_path = NULL;
585         struct host_info server;
586         struct host_info target;
587
588         server.allocated = NULL;
589         target.allocated = NULL;
590         server.user = NULL;
591         target.user = NULL;
592
593         parse_url(url, &target);
594
595         /* Use the proxy if necessary */
596         use_proxy = (strcmp(G.proxy_flag, "off") != 0);
597         if (use_proxy) {
598                 proxy = getenv(target.is_ftp ? "ftp_proxy" : "http_proxy");
599                 use_proxy = (proxy && proxy[0]);
600                 if (use_proxy)
601                         parse_url(proxy, &server);
602         }
603         if (!use_proxy) {
604                 server.port = target.port;
605                 if (ENABLE_FEATURE_IPV6) {
606                         //free(server.allocated); - can't be non-NULL
607                         server.host = server.allocated = xstrdup(target.host);
608                 } else {
609                         server.host = target.host;
610                 }
611         }
612
613         if (ENABLE_FEATURE_IPV6)
614                 strip_ipv6_scope_id(target.host);
615
616         /* If there was no -O FILE, guess output filename */
617         fname_out_alloc = NULL;
618         if (!(option_mask32 & WGET_OPT_OUTNAME)) {
619                 G.fname_out = bb_get_last_path_component_nostrip(target.path);
620                 /* handle "wget http://kernel.org//" */
621                 if (G.fname_out[0] == '/' || !G.fname_out[0])
622                         G.fname_out = (char*)"index.html";
623                 /* -P DIR is considered only if there was no -O FILE */
624                 if (G.dir_prefix)
625                         G.fname_out = fname_out_alloc = concat_path_file(G.dir_prefix, G.fname_out);
626                 else {
627                         /* redirects may free target.path later, need to make a copy */
628                         G.fname_out = fname_out_alloc = xstrdup(G.fname_out);
629                 }
630         }
631 #if ENABLE_FEATURE_WGET_STATUSBAR
632         G.curfile = bb_get_last_path_component_nostrip(G.fname_out);
633 #endif
634
635         /* Determine where to start transfer */
636         G.beg_range = 0;
637         if (option_mask32 & WGET_OPT_CONTINUE) {
638                 G.output_fd = open(G.fname_out, O_WRONLY);
639                 if (G.output_fd >= 0) {
640                         G.beg_range = xlseek(G.output_fd, 0, SEEK_END);
641                 }
642                 /* File doesn't exist. We do not create file here yet.
643                  * We are not sure it exists on remote side */
644         }
645
646         redir_limit = 5;
647  resolve_lsa:
648         lsa = xhost2sockaddr(server.host, server.port);
649         if (!(option_mask32 & WGET_OPT_QUIET)) {
650                 char *s = xmalloc_sockaddr2dotted(&lsa->u.sa);
651                 fprintf(stderr, "Connecting to %s (%s)\n", server.host, s);
652                 free(s);
653         }
654  establish_session:
655         /*G.content_len = 0; - redundant, got_clen = 0 is enough */
656         G.got_clen = 0;
657         G.chunked = 0;
658         if (use_proxy || !target.is_ftp) {
659                 /*
660                  *  HTTP session
661                  */
662                 char *str;
663                 int status;
664
665
666                 /* Open socket to http server */
667                 sfp = open_socket(lsa);
668
669                 /* Send HTTP request */
670                 if (use_proxy) {
671                         fprintf(sfp, "GET %stp://%s/%s HTTP/1.1\r\n",
672                                 target.is_ftp ? "f" : "ht", target.host,
673                                 target.path);
674                 } else {
675                         if (option_mask32 & WGET_OPT_POST_DATA)
676                                 fprintf(sfp, "POST /%s HTTP/1.1\r\n", target.path);
677                         else
678                                 fprintf(sfp, "GET /%s HTTP/1.1\r\n", target.path);
679                 }
680
681                 fprintf(sfp, "Host: %s\r\nUser-Agent: %s\r\n",
682                         target.host, G.user_agent);
683
684                 /* Ask server to close the connection as soon as we are done
685                  * (IOW: we do not intend to send more requests)
686                  */
687                 fprintf(sfp, "Connection: close\r\n");
688
689 #if ENABLE_FEATURE_WGET_AUTHENTICATION
690                 if (target.user) {
691                         fprintf(sfp, "Proxy-Authorization: Basic %s\r\n"+6,
692                                 base64enc(target.user));
693                 }
694                 if (use_proxy && server.user) {
695                         fprintf(sfp, "Proxy-Authorization: Basic %s\r\n",
696                                 base64enc(server.user));
697                 }
698 #endif
699
700                 if (G.beg_range != 0)
701                         fprintf(sfp, "Range: bytes=%"OFF_FMT"u-\r\n", G.beg_range);
702
703 #if ENABLE_FEATURE_WGET_LONG_OPTIONS
704                 if (G.extra_headers)
705                         fputs(G.extra_headers, sfp);
706
707                 if (option_mask32 & WGET_OPT_POST_DATA) {
708                         fprintf(sfp,
709                                 "Content-Type: application/x-www-form-urlencoded\r\n"
710                                 "Content-Length: %u\r\n"
711                                 "\r\n"
712                                 "%s",
713                                 (int) strlen(G.post_data), G.post_data
714                         );
715                 } else
716 #endif
717                 {
718                         fprintf(sfp, "\r\n");
719                 }
720
721                 fflush(sfp);
722
723                 /*
724                  * Retrieve HTTP response line and check for "200" status code.
725                  */
726  read_response:
727                 fgets_and_trim(sfp);
728
729                 str = G.wget_buf;
730                 str = skip_non_whitespace(str);
731                 str = skip_whitespace(str);
732                 // FIXME: no error check
733                 // xatou wouldn't work: "200 OK"
734                 status = atoi(str);
735                 switch (status) {
736                 case 0:
737                 case 100:
738                         while (gethdr(sfp) != NULL)
739                                 /* eat all remaining headers */;
740                         goto read_response;
741                 case 200:
742 /*
743 Response 204 doesn't say "null file", it says "metadata
744 has changed but data didn't":
745
746 "10.2.5 204 No Content
747 The server has fulfilled the request but does not need to return
748 an entity-body, and might want to return updated metainformation.
749 The response MAY include new or updated metainformation in the form
750 of entity-headers, which if present SHOULD be associated with
751 the requested variant.
752
753 If the client is a user agent, it SHOULD NOT change its document
754 view from that which caused the request to be sent. This response
755 is primarily intended to allow input for actions to take place
756 without causing a change to the user agent's active document view,
757 although any new or updated metainformation SHOULD be applied
758 to the document currently in the user agent's active view.
759
760 The 204 response MUST NOT include a message-body, and thus
761 is always terminated by the first empty line after the header fields."
762
763 However, in real world it was observed that some web servers
764 (e.g. Boa/0.94.14rc21) simply use code 204 when file size is zero.
765 */
766                 case 204:
767                         if (G.beg_range != 0) {
768                                 /* "Range:..." was not honored by the server.
769                                  * Restart download from the beginning.
770                                  */
771                                 reset_beg_range_to_zero();
772                         }
773                         break;
774                 case 300:  /* redirection */
775                 case 301:
776                 case 302:
777                 case 303:
778                         break;
779                 case 206: /* Partial Content */
780                         if (G.beg_range != 0)
781                                 /* "Range:..." worked. Good. */
782                                 break;
783                         /* Partial Content even though we did not ask for it??? */
784                         /* fall through */
785                 default:
786                         bb_error_msg_and_die("server returned error: %s", sanitize_string(G.wget_buf));
787                 }
788
789                 /*
790                  * Retrieve HTTP headers.
791                  */
792                 while ((str = gethdr(sfp)) != NULL) {
793                         static const char keywords[] ALIGN1 =
794                                 "content-length\0""transfer-encoding\0""location\0";
795                         enum {
796                                 KEY_content_length = 1, KEY_transfer_encoding, KEY_location
797                         };
798                         smalluint key;
799
800                         /* gethdr converted "FOO:" string to lowercase */
801
802                         /* strip trailing whitespace */
803                         char *s = strchrnul(str, '\0') - 1;
804                         while (s >= str && (*s == ' ' || *s == '\t')) {
805                                 *s = '\0';
806                                 s--;
807                         }
808                         key = index_in_strings(keywords, G.wget_buf) + 1;
809                         if (key == KEY_content_length) {
810                                 G.content_len = BB_STRTOOFF(str, NULL, 10);
811                                 if (G.content_len < 0 || errno) {
812                                         bb_error_msg_and_die("content-length %s is garbage", sanitize_string(str));
813                                 }
814                                 G.got_clen = 1;
815                                 continue;
816                         }
817                         if (key == KEY_transfer_encoding) {
818                                 if (strcmp(str_tolower(str), "chunked") != 0)
819                                         bb_error_msg_and_die("transfer encoding '%s' is not supported", sanitize_string(str));
820                                 G.chunked = 1;
821                         }
822                         if (key == KEY_location && status >= 300) {
823                                 if (--redir_limit == 0)
824                                         bb_error_msg_and_die("too many redirections");
825                                 fclose(sfp);
826                                 if (str[0] == '/') {
827                                         free(redirected_path);
828                                         target.path = redirected_path = xstrdup(str+1);
829                                         /* lsa stays the same: it's on the same server */
830                                 } else {
831                                         parse_url(str, &target);
832                                         if (!use_proxy) {
833                                                 free(server.allocated);
834                                                 server.allocated = NULL;
835                                                 server.host = target.host;
836                                                 /* strip_ipv6_scope_id(target.host); - no! */
837                                                 /* we assume remote never gives us IPv6 addr with scope id */
838                                                 server.port = target.port;
839                                                 free(lsa);
840                                                 goto resolve_lsa;
841                                         } /* else: lsa stays the same: we use proxy */
842                                 }
843                                 goto establish_session;
844                         }
845                 }
846 //              if (status >= 300)
847 //                      bb_error_msg_and_die("bad redirection (no Location: header from server)");
848
849                 /* For HTTP, data is pumped over the same connection */
850                 dfp = sfp;
851
852         } else {
853                 /*
854                  *  FTP session
855                  */
856                 sfp = prepare_ftp_session(&dfp, &target, lsa);
857         }
858
859         free(lsa);
860
861         if (!(option_mask32 & WGET_OPT_SPIDER)) {
862                 if (G.output_fd < 0)
863                         G.output_fd = xopen(G.fname_out, G.o_flags);
864                 retrieve_file_data(dfp);
865                 if (!(option_mask32 & WGET_OPT_OUTNAME)) {
866                         xclose(G.output_fd);
867                         G.output_fd = -1;
868                 }
869         }
870
871         if (dfp != sfp) {
872                 /* It's ftp. Close data connection properly */
873                 fclose(dfp);
874                 if (ftpcmd(NULL, NULL, sfp) != 226)
875                         bb_error_msg_and_die("ftp error: %s", sanitize_string(G.wget_buf + 4));
876                 /* ftpcmd("QUIT", NULL, sfp); - why bother? */
877         }
878         fclose(sfp);
879
880         free(server.allocated);
881         free(target.allocated);
882         free(fname_out_alloc);
883         free(redirected_path);
884 }
885
886 int wget_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE;
887 int wget_main(int argc UNUSED_PARAM, char **argv)
888 {
889 #if ENABLE_FEATURE_WGET_LONG_OPTIONS
890         static const char wget_longopts[] ALIGN1 =
891                 /* name, has_arg, val */
892                 "continue\0"         No_argument       "c"
893 //FIXME: -s isn't --spider, it's --save-headers!
894                 "spider\0"           No_argument       "s"
895                 "quiet\0"            No_argument       "q"
896                 "output-document\0"  Required_argument "O"
897                 "directory-prefix\0" Required_argument "P"
898                 "proxy\0"            Required_argument "Y"
899                 "user-agent\0"       Required_argument "U"
900 #if ENABLE_FEATURE_WGET_TIMEOUT
901                 "timeout\0"          Required_argument "T"
902 #endif
903                 /* Ignored: */
904                 // "tries\0"            Required_argument "t"
905                 /* Ignored (we always use PASV): */
906                 "passive-ftp\0"      No_argument       "\xff"
907                 "header\0"           Required_argument "\xfe"
908                 "post-data\0"        Required_argument "\xfd"
909                 /* Ignored (we don't do ssl) */
910                 "no-check-certificate\0" No_argument   "\xfc"
911                 /* Ignored (we don't support caching) */
912                 "no-cache\0"         No_argument       "\xfb"
913                 ;
914 #endif
915
916 #if ENABLE_FEATURE_WGET_LONG_OPTIONS
917         llist_t *headers_llist = NULL;
918 #endif
919
920         INIT_G();
921
922         IF_FEATURE_WGET_TIMEOUT(G.timeout_seconds = 900;)
923         G.proxy_flag = "on";   /* use proxies if env vars are set */
924         G.user_agent = "Wget"; /* "User-Agent" header field */
925
926 #if ENABLE_FEATURE_WGET_LONG_OPTIONS
927         applet_long_options = wget_longopts;
928 #endif
929         opt_complementary = "-1" IF_FEATURE_WGET_TIMEOUT(":T+") IF_FEATURE_WGET_LONG_OPTIONS(":\xfe::");
930         getopt32(argv, "csqO:P:Y:U:T:" /*ignored:*/ "t:",
931                 &G.fname_out, &G.dir_prefix,
932                 &G.proxy_flag, &G.user_agent,
933                 IF_FEATURE_WGET_TIMEOUT(&G.timeout_seconds) IF_NOT_FEATURE_WGET_TIMEOUT(NULL),
934                 NULL /* -t RETRIES */
935                 IF_FEATURE_WGET_LONG_OPTIONS(, &headers_llist)
936                 IF_FEATURE_WGET_LONG_OPTIONS(, &G.post_data)
937         );
938         argv += optind;
939
940 #if ENABLE_FEATURE_WGET_LONG_OPTIONS
941         if (headers_llist) {
942                 int size = 1;
943                 char *cp;
944                 llist_t *ll = headers_llist;
945                 while (ll) {
946                         size += strlen(ll->data) + 2;
947                         ll = ll->link;
948                 }
949                 G.extra_headers = cp = xmalloc(size);
950                 while (headers_llist) {
951                         cp += sprintf(cp, "%s\r\n", (char*)llist_pop(&headers_llist));
952                 }
953         }
954 #endif
955
956         G.output_fd = -1;
957         G.o_flags = O_WRONLY | O_CREAT | O_TRUNC | O_EXCL;
958         if (G.fname_out) { /* -O FILE ? */
959                 if (LONE_DASH(G.fname_out)) { /* -O - ? */
960                         G.output_fd = 1;
961                         option_mask32 &= ~WGET_OPT_CONTINUE;
962                 }
963                 /* compat with wget: -O FILE can overwrite */
964                 G.o_flags = O_WRONLY | O_CREAT | O_TRUNC;
965         }
966
967         while (*argv)
968                 download_one_url(*argv++);
969
970         if (G.output_fd >= 0)
971                 xclose(G.output_fd);
972
973         return EXIT_SUCCESS;
974 }