nc: don't redirect stderr to network in -e PROG mode
[platform/upstream/busybox.git] / networking / wget.c
1 /* vi: set sw=4 ts=4: */
2 /*
3  * wget - retrieve a file using HTTP or FTP
4  *
5  * Chip Rosenthal Covad Communications <chip@laserlink.net>
6  * Licensed under GPLv2, see file LICENSE in this source tree.
7  *
8  * Copyright (C) 2010 Bradley M. Kuhn <bkuhn@ebb.org>
9  * Kuhn's copyrights are licensed GPLv2-or-later.  File as a whole remains GPLv2.
10  */
11
12 //usage:#define wget_trivial_usage
13 //usage:        IF_FEATURE_WGET_LONG_OPTIONS(
14 //usage:       "[-c|--continue] [-s|--spider] [-q|--quiet] [-O|--output-document FILE]\n"
15 //usage:       "        [--header 'header: value'] [-Y|--proxy on/off] [-P DIR]\n"
16 /* Since we ignore these opts, we don't show them in --help */
17 /* //usage:    "        [--no-check-certificate] [--no-cache]" */
18 //usage:       "        [-U|--user-agent AGENT]" IF_FEATURE_WGET_TIMEOUT(" [-T SEC]") " URL..."
19 //usage:        )
20 //usage:        IF_NOT_FEATURE_WGET_LONG_OPTIONS(
21 //usage:       "[-csq] [-O FILE] [-Y on/off] [-P DIR] [-U AGENT]"
22 //usage:                        IF_FEATURE_WGET_TIMEOUT(" [-T SEC]") " URL..."
23 //usage:        )
24 //usage:#define wget_full_usage "\n\n"
25 //usage:       "Retrieve files via HTTP or FTP\n"
26 //usage:     "\n        -s      Spider mode - only check file existence"
27 //usage:     "\n        -c      Continue retrieval of aborted transfer"
28 //usage:     "\n        -q      Quiet"
29 //usage:     "\n        -P DIR  Save to DIR (default .)"
30 //usage:        IF_FEATURE_WGET_TIMEOUT(
31 //usage:     "\n        -T SEC  Network read timeout is SEC seconds"
32 //usage:        )
33 //usage:     "\n        -O FILE Save to FILE ('-' for stdout)"
34 //usage:     "\n        -U STR  Use STR for User-Agent header"
35 //usage:     "\n        -Y      Use proxy ('on' or 'off')"
36
37 #include "libbb.h"
38
39 #if 0
40 # define log_io(...) bb_error_msg(__VA_ARGS__)
41 #else
42 # define log_io(...) ((void)0)
43 #endif
44
45
46 struct host_info {
47         char *allocated;
48         const char *path;
49         const char *user;
50         char       *host;
51         int         port;
52         smallint    is_ftp;
53 };
54
55
56 /* Globals */
57 struct globals {
58         off_t content_len;        /* Content-length of the file */
59         off_t beg_range;          /* Range at which continue begins */
60 #if ENABLE_FEATURE_WGET_STATUSBAR
61         off_t transferred;        /* Number of bytes transferred so far */
62         const char *curfile;      /* Name of current file being transferred */
63         bb_progress_t pmt;
64 #endif
65         char *dir_prefix;
66 #if ENABLE_FEATURE_WGET_LONG_OPTIONS
67         char *post_data;
68         char *extra_headers;
69 #endif
70         char *fname_out;        /* where to direct output (-O) */
71         const char *proxy_flag; /* Use proxies if env vars are set */
72         const char *user_agent; /* "User-Agent" header field */
73 #if ENABLE_FEATURE_WGET_TIMEOUT
74         unsigned timeout_seconds;
75 #endif
76         int output_fd;
77         int o_flags;
78         smallint chunked;         /* chunked transfer encoding */
79         smallint got_clen;        /* got content-length: from server  */
80         /* Local downloads do benefit from big buffer.
81          * With 512 byte buffer, it was measured to be
82          * an order of magnitude slower than with big one.
83          */
84         uint64_t just_to_align_next_member;
85         char wget_buf[CONFIG_FEATURE_COPYBUF_KB*1024];
86 } FIX_ALIASING;
87 #define G (*ptr_to_globals)
88 #define INIT_G() do { \
89         SET_PTR_TO_GLOBALS(xzalloc(sizeof(G))); \
90         IF_FEATURE_WGET_TIMEOUT(G.timeout_seconds = 900;) \
91 } while (0)
92
93
94 /* Must match option string! */
95 enum {
96         WGET_OPT_CONTINUE   = (1 << 0),
97         WGET_OPT_SPIDER     = (1 << 1),
98         WGET_OPT_QUIET      = (1 << 2),
99         WGET_OPT_OUTNAME    = (1 << 3),
100         WGET_OPT_PREFIX     = (1 << 4),
101         WGET_OPT_PROXY      = (1 << 5),
102         WGET_OPT_USER_AGENT = (1 << 6),
103         WGET_OPT_NETWORK_READ_TIMEOUT = (1 << 7),
104         WGET_OPT_RETRIES    = (1 << 8),
105         WGET_OPT_PASSIVE    = (1 << 9),
106         WGET_OPT_HEADER     = (1 << 10) * ENABLE_FEATURE_WGET_LONG_OPTIONS,
107         WGET_OPT_POST_DATA  = (1 << 11) * ENABLE_FEATURE_WGET_LONG_OPTIONS,
108 };
109
110 enum {
111         PROGRESS_START = -1,
112         PROGRESS_END   = 0,
113         PROGRESS_BUMP  = 1,
114 };
115 #if ENABLE_FEATURE_WGET_STATUSBAR
116 static void progress_meter(int flag)
117 {
118         if (option_mask32 & WGET_OPT_QUIET)
119                 return;
120
121         if (flag == PROGRESS_START)
122                 bb_progress_init(&G.pmt, G.curfile);
123
124         bb_progress_update(&G.pmt,
125                         G.beg_range,
126                         G.transferred,
127                         (G.chunked || !G.got_clen) ? 0 : G.beg_range + G.transferred + G.content_len
128         );
129
130         if (flag == PROGRESS_END) {
131                 bb_progress_free(&G.pmt);
132                 bb_putchar_stderr('\n');
133                 G.transferred = 0;
134         }
135 }
136 #else
137 static ALWAYS_INLINE void progress_meter(int flag UNUSED_PARAM) { }
138 #endif
139
140
141 /* IPv6 knows scoped address types i.e. link and site local addresses. Link
142  * local addresses can have a scope identifier to specify the
143  * interface/link an address is valid on (e.g. fe80::1%eth0). This scope
144  * identifier is only valid on a single node.
145  *
146  * RFC 4007 says that the scope identifier MUST NOT be sent across the wire,
147  * unless all nodes agree on the semantic. Apache e.g. regards zone identifiers
148  * in the Host header as invalid requests, see
149  * https://issues.apache.org/bugzilla/show_bug.cgi?id=35122
150  */
151 static void strip_ipv6_scope_id(char *host)
152 {
153         char *scope, *cp;
154
155         /* bbox wget actually handles IPv6 addresses without [], like
156          * wget "http://::1/xxx", but this is not standard.
157          * To save code, _here_ we do not support it. */
158
159         if (host[0] != '[')
160                 return; /* not IPv6 */
161
162         scope = strchr(host, '%');
163         if (!scope)
164                 return;
165
166         /* Remove the IPv6 zone identifier from the host address */
167         cp = strchr(host, ']');
168         if (!cp || (cp[1] != ':' && cp[1] != '\0')) {
169                 /* malformed address (not "[xx]:nn" or "[xx]") */
170                 return;
171         }
172
173         /* cp points to "]...", scope points to "%eth0]..." */
174         overlapping_strcpy(scope, cp);
175 }
176
177 #if ENABLE_FEATURE_WGET_AUTHENTICATION
178 /* Base64-encode character string. */
179 static char *base64enc(const char *str)
180 {
181         unsigned len = strlen(str);
182         if (len > sizeof(G.wget_buf)/4*3 - 10) /* paranoia */
183                 len = sizeof(G.wget_buf)/4*3 - 10;
184         bb_uuencode(G.wget_buf, str, len, bb_uuenc_tbl_base64);
185         return G.wget_buf;
186 }
187 #endif
188
189 static char* sanitize_string(char *s)
190 {
191         unsigned char *p = (void *) s;
192         while (*p >= ' ')
193                 p++;
194         *p = '\0';
195         return s;
196 }
197
198 static FILE *open_socket(len_and_sockaddr *lsa)
199 {
200         FILE *fp;
201
202         /* glibc 2.4 seems to try seeking on it - ??! */
203         /* hopefully it understands what ESPIPE means... */
204         fp = fdopen(xconnect_stream(lsa), "r+");
205         if (fp == NULL)
206                 bb_perror_msg_and_die(bb_msg_memory_exhausted);
207
208         return fp;
209 }
210
211 /* Returns '\n' if it was seen, else '\0'. Trims at first '\r' or '\n' */
212 static char fgets_and_trim(FILE *fp)
213 {
214         char c;
215         char *buf_ptr;
216
217         if (fgets(G.wget_buf, sizeof(G.wget_buf) - 1, fp) == NULL)
218                 bb_perror_msg_and_die("error getting response");
219
220         buf_ptr = strchrnul(G.wget_buf, '\n');
221         c = *buf_ptr;
222         *buf_ptr = '\0';
223         buf_ptr = strchrnul(G.wget_buf, '\r');
224         *buf_ptr = '\0';
225
226         log_io("< %s", G.wget_buf);
227
228         return c;
229 }
230
231 static int ftpcmd(const char *s1, const char *s2, FILE *fp)
232 {
233         int result;
234         if (s1) {
235                 if (!s2)
236                         s2 = "";
237                 fprintf(fp, "%s%s\r\n", s1, s2);
238                 fflush(fp);
239                 log_io("> %s%s", s1, s2);
240         }
241
242         do {
243                 fgets_and_trim(fp);
244         } while (!isdigit(G.wget_buf[0]) || G.wget_buf[3] != ' ');
245
246         G.wget_buf[3] = '\0';
247         result = xatoi_positive(G.wget_buf);
248         G.wget_buf[3] = ' ';
249         return result;
250 }
251
252 static void parse_url(const char *src_url, struct host_info *h)
253 {
254         char *url, *p, *sp;
255
256         free(h->allocated);
257         h->allocated = url = xstrdup(src_url);
258
259         if (strncmp(url, "http://", 7) == 0) {
260                 h->port = bb_lookup_port("http", "tcp", 80);
261                 h->host = url + 7;
262                 h->is_ftp = 0;
263         } else if (strncmp(url, "ftp://", 6) == 0) {
264                 h->port = bb_lookup_port("ftp", "tcp", 21);
265                 h->host = url + 6;
266                 h->is_ftp = 1;
267         } else
268                 bb_error_msg_and_die("not an http or ftp url: %s", sanitize_string(url));
269
270         // FYI:
271         // "Real" wget 'http://busybox.net?var=a/b' sends this request:
272         //   'GET /?var=a/b HTTP 1.0'
273         //   and saves 'index.html?var=a%2Fb' (we save 'b')
274         // wget 'http://busybox.net?login=john@doe':
275         //   request: 'GET /?login=john@doe HTTP/1.0'
276         //   saves: 'index.html?login=john@doe' (we save '?login=john@doe')
277         // wget 'http://busybox.net#test/test':
278         //   request: 'GET / HTTP/1.0'
279         //   saves: 'index.html' (we save 'test')
280         //
281         // We also don't add unique .N suffix if file exists...
282         sp = strchr(h->host, '/');
283         p = strchr(h->host, '?'); if (!sp || (p && sp > p)) sp = p;
284         p = strchr(h->host, '#'); if (!sp || (p && sp > p)) sp = p;
285         if (!sp) {
286                 h->path = "";
287         } else if (*sp == '/') {
288                 *sp = '\0';
289                 h->path = sp + 1;
290         } else { // '#' or '?'
291                 // http://busybox.net?login=john@doe is a valid URL
292                 // memmove converts to:
293                 // http:/busybox.nett?login=john@doe...
294                 memmove(h->host - 1, h->host, sp - h->host);
295                 h->host--;
296                 sp[-1] = '\0';
297                 h->path = sp;
298         }
299
300         // We used to set h->user to NULL here, but this interferes
301         // with handling of code 302 ("object was moved")
302
303         sp = strrchr(h->host, '@');
304         if (sp != NULL) {
305                 // URL-decode "user:password" string before base64-encoding:
306                 // wget http://test:my%20pass@example.com should send
307                 // Authorization: Basic dGVzdDpteSBwYXNz
308                 // which decodes to "test:my pass".
309                 // Standard wget and curl do this too.
310                 *sp = '\0';
311                 h->user = percent_decode_in_place(h->host, /*strict:*/ 0);
312                 h->host = sp + 1;
313         }
314
315         sp = h->host;
316 }
317
318 static char *gethdr(FILE *fp)
319 {
320         char *s, *hdrval;
321         int c;
322
323         /* retrieve header line */
324         c = fgets_and_trim(fp);
325
326         /* end of the headers? */
327         if (G.wget_buf[0] == '\0')
328                 return NULL;
329
330         /* convert the header name to lower case */
331         for (s = G.wget_buf; isalnum(*s) || *s == '-' || *s == '.'; ++s) {
332                 /* tolower for "A-Z", no-op for "0-9a-z-." */
333                 *s |= 0x20;
334         }
335
336         /* verify we are at the end of the header name */
337         if (*s != ':')
338                 bb_error_msg_and_die("bad header line: %s", sanitize_string(G.wget_buf));
339
340         /* locate the start of the header value */
341         *s++ = '\0';
342         hdrval = skip_whitespace(s);
343
344         if (c != '\n') {
345                 /* Rats! The buffer isn't big enough to hold the entire header value */
346                 while (c = getc(fp), c != EOF && c != '\n')
347                         continue;
348         }
349
350         return hdrval;
351 }
352
353 static void reset_beg_range_to_zero(void)
354 {
355         bb_error_msg("restart failed");
356         G.beg_range = 0;
357         xlseek(G.output_fd, 0, SEEK_SET);
358         /* Done at the end instead: */
359         /* ftruncate(G.output_fd, 0); */
360 }
361
362 static FILE* prepare_ftp_session(FILE **dfpp, struct host_info *target, len_and_sockaddr *lsa)
363 {
364         FILE *sfp;
365         char *str;
366         int port;
367
368         if (!target->user)
369                 target->user = xstrdup("anonymous:busybox@");
370
371         sfp = open_socket(lsa);
372         if (ftpcmd(NULL, NULL, sfp) != 220)
373                 bb_error_msg_and_die("%s", sanitize_string(G.wget_buf + 4));
374
375         /*
376          * Splitting username:password pair,
377          * trying to log in
378          */
379         str = strchr(target->user, ':');
380         if (str)
381                 *str++ = '\0';
382         switch (ftpcmd("USER ", target->user, sfp)) {
383         case 230:
384                 break;
385         case 331:
386                 if (ftpcmd("PASS ", str, sfp) == 230)
387                         break;
388                 /* fall through (failed login) */
389         default:
390                 bb_error_msg_and_die("ftp login: %s", sanitize_string(G.wget_buf + 4));
391         }
392
393         ftpcmd("TYPE I", NULL, sfp);
394
395         /*
396          * Querying file size
397          */
398         if (ftpcmd("SIZE ", target->path, sfp) == 213) {
399                 G.content_len = BB_STRTOOFF(G.wget_buf + 4, NULL, 10);
400                 if (G.content_len < 0 || errno) {
401                         bb_error_msg_and_die("SIZE value is garbage");
402                 }
403                 G.got_clen = 1;
404         }
405
406         /*
407          * Entering passive mode
408          */
409         if (ftpcmd("PASV", NULL, sfp) != 227) {
410  pasv_error:
411                 bb_error_msg_and_die("bad response to %s: %s", "PASV", sanitize_string(G.wget_buf));
412         }
413         // Response is "227 garbageN1,N2,N3,N4,P1,P2[)garbage]
414         // Server's IP is N1.N2.N3.N4 (we ignore it)
415         // Server's port for data connection is P1*256+P2
416         str = strrchr(G.wget_buf, ')');
417         if (str) str[0] = '\0';
418         str = strrchr(G.wget_buf, ',');
419         if (!str) goto pasv_error;
420         port = xatou_range(str+1, 0, 255);
421         *str = '\0';
422         str = strrchr(G.wget_buf, ',');
423         if (!str) goto pasv_error;
424         port += xatou_range(str+1, 0, 255) * 256;
425         set_nport(&lsa->u.sa, htons(port));
426
427         *dfpp = open_socket(lsa);
428
429         if (G.beg_range != 0) {
430                 sprintf(G.wget_buf, "REST %"OFF_FMT"u", G.beg_range);
431                 if (ftpcmd(G.wget_buf, NULL, sfp) == 350)
432                         G.content_len -= G.beg_range;
433                 else
434                         reset_beg_range_to_zero();
435         }
436
437         if (ftpcmd("RETR ", target->path, sfp) > 150)
438                 bb_error_msg_and_die("bad response to %s: %s", "RETR", sanitize_string(G.wget_buf));
439
440         return sfp;
441 }
442
443 static void NOINLINE retrieve_file_data(FILE *dfp)
444 {
445 #if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT
446 # if ENABLE_FEATURE_WGET_TIMEOUT
447         unsigned second_cnt = G.timeout_seconds;
448 # endif
449         struct pollfd polldata;
450
451         polldata.fd = fileno(dfp);
452         polldata.events = POLLIN | POLLPRI;
453 #endif
454         progress_meter(PROGRESS_START);
455
456         if (G.chunked)
457                 goto get_clen;
458
459         /* Loops only if chunked */
460         while (1) {
461
462 #if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT
463                 /* Must use nonblocking I/O, otherwise fread will loop
464                  * and *block* until it reads full buffer,
465                  * which messes up progress bar and/or timeout logic.
466                  * Because of nonblocking I/O, we need to dance
467                  * very carefully around EAGAIN. See explanation at
468                  * clearerr() calls.
469                  */
470                 ndelay_on(polldata.fd);
471 #endif
472                 while (1) {
473                         int n;
474                         unsigned rdsz;
475
476 #if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT
477                         /* fread internally uses read loop, which in our case
478                          * is usually exited when we get EAGAIN.
479                          * In this case, libc sets error marker on the stream.
480                          * Need to clear it before next fread to avoid possible
481                          * rare false positive ferror below. Rare because usually
482                          * fread gets more than zero bytes, and we don't fall
483                          * into if (n <= 0) ...
484                          */
485                         clearerr(dfp);
486 #endif
487                         errno = 0;
488                         rdsz = sizeof(G.wget_buf);
489                         if (G.got_clen) {
490                                 if (G.content_len < (off_t)sizeof(G.wget_buf)) {
491                                         if ((int)G.content_len <= 0)
492                                                 break;
493                                         rdsz = (unsigned)G.content_len;
494                                 }
495                         }
496                         n = fread(G.wget_buf, 1, rdsz, dfp);
497
498                         if (n > 0) {
499                                 xwrite(G.output_fd, G.wget_buf, n);
500 #if ENABLE_FEATURE_WGET_STATUSBAR
501                                 G.transferred += n;
502 #endif
503                                 if (G.got_clen) {
504                                         G.content_len -= n;
505                                         if (G.content_len == 0)
506                                                 break;
507                                 }
508 #if ENABLE_FEATURE_WGET_TIMEOUT
509                                 second_cnt = G.timeout_seconds;
510 #endif
511                                 continue;
512                         }
513
514                         /* n <= 0.
515                          * man fread:
516                          * If error occurs, or EOF is reached, the return value
517                          * is a short item count (or zero).
518                          * fread does not distinguish between EOF and error.
519                          */
520                         if (errno != EAGAIN) {
521                                 if (ferror(dfp)) {
522                                         progress_meter(PROGRESS_END);
523                                         bb_perror_msg_and_die(bb_msg_read_error);
524                                 }
525                                 break; /* EOF, not error */
526                         }
527
528 #if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT
529                         /* It was EAGAIN. There is no data. Wait up to one second
530                          * then abort if timed out, or update the bar and try reading again.
531                          */
532                         if (safe_poll(&polldata, 1, 1000) == 0) {
533 # if ENABLE_FEATURE_WGET_TIMEOUT
534                                 if (second_cnt != 0 && --second_cnt == 0) {
535                                         progress_meter(PROGRESS_END);
536                                         bb_error_msg_and_die("download timed out");
537                                 }
538 # endif
539                                 /* We used to loop back to poll here,
540                                  * but there is no great harm in letting fread
541                                  * to try reading anyway.
542                                  */
543                         }
544                         /* Need to do it _every_ second for "stalled" indicator
545                          * to be shown properly.
546                          */
547                         progress_meter(PROGRESS_BUMP);
548 #endif
549                 } /* while (reading data) */
550
551 #if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT
552                 clearerr(dfp);
553                 ndelay_off(polldata.fd); /* else fgets can get very unhappy */
554 #endif
555                 if (!G.chunked)
556                         break;
557
558                 fgets_and_trim(dfp); /* Eat empty line */
559  get_clen:
560                 fgets_and_trim(dfp);
561                 G.content_len = STRTOOFF(G.wget_buf, NULL, 16);
562                 /* FIXME: error check? */
563                 if (G.content_len == 0)
564                         break; /* all done! */
565                 G.got_clen = 1;
566                 /*
567                  * Note that fgets may result in some data being buffered in dfp.
568                  * We loop back to fread, which will retrieve this data.
569                  * Also note that code has to be arranged so that fread
570                  * is done _before_ one-second poll wait - poll doesn't know
571                  * about stdio buffering and can result in spurious one second waits!
572                  */
573         }
574
575         /* If -c failed, we restart from the beginning,
576          * but we do not truncate file then, we do it only now, at the end.
577          * This lets user to ^C if his 99% complete 10 GB file download
578          * failed to restart *without* losing the almost complete file.
579          */
580         {
581                 off_t pos = lseek(G.output_fd, 0, SEEK_CUR);
582                 if (pos != (off_t)-1)
583                         ftruncate(G.output_fd, pos);
584         }
585
586         /* Draw full bar and free its resources */
587         G.chunked = 0;  /* makes it show 100% even for chunked download */
588         G.got_clen = 1; /* makes it show 100% even for download of (formerly) unknown size */
589         progress_meter(PROGRESS_END);
590 }
591
592 static void download_one_url(const char *url)
593 {
594         bool use_proxy;                 /* Use proxies if env vars are set  */
595         int redir_limit;
596         len_and_sockaddr *lsa;
597         FILE *sfp;                      /* socket to web/ftp server         */
598         FILE *dfp;                      /* socket to ftp server (data)      */
599         char *proxy = NULL;
600         char *fname_out_alloc;
601         char *redirected_path = NULL;
602         struct host_info server;
603         struct host_info target;
604
605         server.allocated = NULL;
606         target.allocated = NULL;
607         server.user = NULL;
608         target.user = NULL;
609
610         parse_url(url, &target);
611
612         /* Use the proxy if necessary */
613         use_proxy = (strcmp(G.proxy_flag, "off") != 0);
614         if (use_proxy) {
615                 proxy = getenv(target.is_ftp ? "ftp_proxy" : "http_proxy");
616                 use_proxy = (proxy && proxy[0]);
617                 if (use_proxy)
618                         parse_url(proxy, &server);
619         }
620         if (!use_proxy) {
621                 server.port = target.port;
622                 if (ENABLE_FEATURE_IPV6) {
623                         //free(server.allocated); - can't be non-NULL
624                         server.host = server.allocated = xstrdup(target.host);
625                 } else {
626                         server.host = target.host;
627                 }
628         }
629
630         if (ENABLE_FEATURE_IPV6)
631                 strip_ipv6_scope_id(target.host);
632
633         /* If there was no -O FILE, guess output filename */
634         fname_out_alloc = NULL;
635         if (!(option_mask32 & WGET_OPT_OUTNAME)) {
636                 G.fname_out = bb_get_last_path_component_nostrip(target.path);
637                 /* handle "wget http://kernel.org//" */
638                 if (G.fname_out[0] == '/' || !G.fname_out[0])
639                         G.fname_out = (char*)"index.html";
640                 /* -P DIR is considered only if there was no -O FILE */
641                 if (G.dir_prefix)
642                         G.fname_out = fname_out_alloc = concat_path_file(G.dir_prefix, G.fname_out);
643                 else {
644                         /* redirects may free target.path later, need to make a copy */
645                         G.fname_out = fname_out_alloc = xstrdup(G.fname_out);
646                 }
647         }
648 #if ENABLE_FEATURE_WGET_STATUSBAR
649         G.curfile = bb_get_last_path_component_nostrip(G.fname_out);
650 #endif
651
652         /* Determine where to start transfer */
653         G.beg_range = 0;
654         if (option_mask32 & WGET_OPT_CONTINUE) {
655                 G.output_fd = open(G.fname_out, O_WRONLY);
656                 if (G.output_fd >= 0) {
657                         G.beg_range = xlseek(G.output_fd, 0, SEEK_END);
658                 }
659                 /* File doesn't exist. We do not create file here yet.
660                  * We are not sure it exists on remote side */
661         }
662
663         redir_limit = 5;
664  resolve_lsa:
665         lsa = xhost2sockaddr(server.host, server.port);
666         if (!(option_mask32 & WGET_OPT_QUIET)) {
667                 char *s = xmalloc_sockaddr2dotted(&lsa->u.sa);
668                 fprintf(stderr, "Connecting to %s (%s)\n", server.host, s);
669                 free(s);
670         }
671  establish_session:
672         /*G.content_len = 0; - redundant, got_clen = 0 is enough */
673         G.got_clen = 0;
674         G.chunked = 0;
675         if (use_proxy || !target.is_ftp) {
676                 /*
677                  *  HTTP session
678                  */
679                 char *str;
680                 int status;
681
682
683                 /* Open socket to http server */
684                 sfp = open_socket(lsa);
685
686                 /* Send HTTP request */
687                 if (use_proxy) {
688                         fprintf(sfp, "GET %stp://%s/%s HTTP/1.1\r\n",
689                                 target.is_ftp ? "f" : "ht", target.host,
690                                 target.path);
691                 } else {
692                         if (option_mask32 & WGET_OPT_POST_DATA)
693                                 fprintf(sfp, "POST /%s HTTP/1.1\r\n", target.path);
694                         else
695                                 fprintf(sfp, "GET /%s HTTP/1.1\r\n", target.path);
696                 }
697
698                 fprintf(sfp, "Host: %s\r\nUser-Agent: %s\r\n",
699                         target.host, G.user_agent);
700
701                 /* Ask server to close the connection as soon as we are done
702                  * (IOW: we do not intend to send more requests)
703                  */
704                 fprintf(sfp, "Connection: close\r\n");
705
706 #if ENABLE_FEATURE_WGET_AUTHENTICATION
707                 if (target.user) {
708                         fprintf(sfp, "Proxy-Authorization: Basic %s\r\n"+6,
709                                 base64enc(target.user));
710                 }
711                 if (use_proxy && server.user) {
712                         fprintf(sfp, "Proxy-Authorization: Basic %s\r\n",
713                                 base64enc(server.user));
714                 }
715 #endif
716
717                 if (G.beg_range != 0)
718                         fprintf(sfp, "Range: bytes=%"OFF_FMT"u-\r\n", G.beg_range);
719
720 #if ENABLE_FEATURE_WGET_LONG_OPTIONS
721                 if (G.extra_headers)
722                         fputs(G.extra_headers, sfp);
723
724                 if (option_mask32 & WGET_OPT_POST_DATA) {
725                         fprintf(sfp,
726                                 "Content-Type: application/x-www-form-urlencoded\r\n"
727                                 "Content-Length: %u\r\n"
728                                 "\r\n"
729                                 "%s",
730                                 (int) strlen(G.post_data), G.post_data
731                         );
732                 } else
733 #endif
734                 {
735                         fprintf(sfp, "\r\n");
736                 }
737
738                 fflush(sfp);
739
740                 /*
741                  * Retrieve HTTP response line and check for "200" status code.
742                  */
743  read_response:
744                 fgets_and_trim(sfp);
745
746                 str = G.wget_buf;
747                 str = skip_non_whitespace(str);
748                 str = skip_whitespace(str);
749                 // FIXME: no error check
750                 // xatou wouldn't work: "200 OK"
751                 status = atoi(str);
752                 switch (status) {
753                 case 0:
754                 case 100:
755                         while (gethdr(sfp) != NULL)
756                                 /* eat all remaining headers */;
757                         goto read_response;
758                 case 200:
759 /*
760 Response 204 doesn't say "null file", it says "metadata
761 has changed but data didn't":
762
763 "10.2.5 204 No Content
764 The server has fulfilled the request but does not need to return
765 an entity-body, and might want to return updated metainformation.
766 The response MAY include new or updated metainformation in the form
767 of entity-headers, which if present SHOULD be associated with
768 the requested variant.
769
770 If the client is a user agent, it SHOULD NOT change its document
771 view from that which caused the request to be sent. This response
772 is primarily intended to allow input for actions to take place
773 without causing a change to the user agent's active document view,
774 although any new or updated metainformation SHOULD be applied
775 to the document currently in the user agent's active view.
776
777 The 204 response MUST NOT include a message-body, and thus
778 is always terminated by the first empty line after the header fields."
779
780 However, in real world it was observed that some web servers
781 (e.g. Boa/0.94.14rc21) simply use code 204 when file size is zero.
782 */
783                 case 204:
784                         if (G.beg_range != 0) {
785                                 /* "Range:..." was not honored by the server.
786                                  * Restart download from the beginning.
787                                  */
788                                 reset_beg_range_to_zero();
789                         }
790                         break;
791                 case 300:  /* redirection */
792                 case 301:
793                 case 302:
794                 case 303:
795                         break;
796                 case 206: /* Partial Content */
797                         if (G.beg_range != 0)
798                                 /* "Range:..." worked. Good. */
799                                 break;
800                         /* Partial Content even though we did not ask for it??? */
801                         /* fall through */
802                 default:
803                         bb_error_msg_and_die("server returned error: %s", sanitize_string(G.wget_buf));
804                 }
805
806                 /*
807                  * Retrieve HTTP headers.
808                  */
809                 while ((str = gethdr(sfp)) != NULL) {
810                         static const char keywords[] ALIGN1 =
811                                 "content-length\0""transfer-encoding\0""location\0";
812                         enum {
813                                 KEY_content_length = 1, KEY_transfer_encoding, KEY_location
814                         };
815                         smalluint key;
816
817                         /* gethdr converted "FOO:" string to lowercase */
818
819                         /* strip trailing whitespace */
820                         char *s = strchrnul(str, '\0') - 1;
821                         while (s >= str && (*s == ' ' || *s == '\t')) {
822                                 *s = '\0';
823                                 s--;
824                         }
825                         key = index_in_strings(keywords, G.wget_buf) + 1;
826                         if (key == KEY_content_length) {
827                                 G.content_len = BB_STRTOOFF(str, NULL, 10);
828                                 if (G.content_len < 0 || errno) {
829                                         bb_error_msg_and_die("content-length %s is garbage", sanitize_string(str));
830                                 }
831                                 G.got_clen = 1;
832                                 continue;
833                         }
834                         if (key == KEY_transfer_encoding) {
835                                 if (strcmp(str_tolower(str), "chunked") != 0)
836                                         bb_error_msg_and_die("transfer encoding '%s' is not supported", sanitize_string(str));
837                                 G.chunked = 1;
838                         }
839                         if (key == KEY_location && status >= 300) {
840                                 if (--redir_limit == 0)
841                                         bb_error_msg_and_die("too many redirections");
842                                 fclose(sfp);
843                                 if (str[0] == '/') {
844                                         free(redirected_path);
845                                         target.path = redirected_path = xstrdup(str+1);
846                                         /* lsa stays the same: it's on the same server */
847                                 } else {
848                                         parse_url(str, &target);
849                                         if (!use_proxy) {
850                                                 free(server.allocated);
851                                                 server.allocated = NULL;
852                                                 server.host = target.host;
853                                                 /* strip_ipv6_scope_id(target.host); - no! */
854                                                 /* we assume remote never gives us IPv6 addr with scope id */
855                                                 server.port = target.port;
856                                                 free(lsa);
857                                                 goto resolve_lsa;
858                                         } /* else: lsa stays the same: we use proxy */
859                                 }
860                                 goto establish_session;
861                         }
862                 }
863 //              if (status >= 300)
864 //                      bb_error_msg_and_die("bad redirection (no Location: header from server)");
865
866                 /* For HTTP, data is pumped over the same connection */
867                 dfp = sfp;
868
869         } else {
870                 /*
871                  *  FTP session
872                  */
873                 sfp = prepare_ftp_session(&dfp, &target, lsa);
874         }
875
876         free(lsa);
877
878         if (!(option_mask32 & WGET_OPT_SPIDER)) {
879                 if (G.output_fd < 0)
880                         G.output_fd = xopen(G.fname_out, G.o_flags);
881                 retrieve_file_data(dfp);
882                 if (!(option_mask32 & WGET_OPT_OUTNAME)) {
883                         xclose(G.output_fd);
884                         G.output_fd = -1;
885                 }
886         }
887
888         if (dfp != sfp) {
889                 /* It's ftp. Close data connection properly */
890                 fclose(dfp);
891                 if (ftpcmd(NULL, NULL, sfp) != 226)
892                         bb_error_msg_and_die("ftp error: %s", sanitize_string(G.wget_buf + 4));
893                 /* ftpcmd("QUIT", NULL, sfp); - why bother? */
894         }
895         fclose(sfp);
896
897         free(server.allocated);
898         free(target.allocated);
899         free(fname_out_alloc);
900         free(redirected_path);
901 }
902
903 int wget_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE;
904 int wget_main(int argc UNUSED_PARAM, char **argv)
905 {
906 #if ENABLE_FEATURE_WGET_LONG_OPTIONS
907         static const char wget_longopts[] ALIGN1 =
908                 /* name, has_arg, val */
909                 "continue\0"         No_argument       "c"
910 //FIXME: -s isn't --spider, it's --save-headers!
911                 "spider\0"           No_argument       "s"
912                 "quiet\0"            No_argument       "q"
913                 "output-document\0"  Required_argument "O"
914                 "directory-prefix\0" Required_argument "P"
915                 "proxy\0"            Required_argument "Y"
916                 "user-agent\0"       Required_argument "U"
917 #if ENABLE_FEATURE_WGET_TIMEOUT
918                 "timeout\0"          Required_argument "T"
919 #endif
920                 /* Ignored: */
921                 // "tries\0"            Required_argument "t"
922                 /* Ignored (we always use PASV): */
923                 "passive-ftp\0"      No_argument       "\xff"
924                 "header\0"           Required_argument "\xfe"
925                 "post-data\0"        Required_argument "\xfd"
926                 /* Ignored (we don't do ssl) */
927                 "no-check-certificate\0" No_argument   "\xfc"
928                 /* Ignored (we don't support caching) */
929                 "no-cache\0"         No_argument       "\xfb"
930                 ;
931 #endif
932
933 #if ENABLE_FEATURE_WGET_LONG_OPTIONS
934         llist_t *headers_llist = NULL;
935 #endif
936
937         INIT_G();
938
939         IF_FEATURE_WGET_TIMEOUT(G.timeout_seconds = 900;)
940         G.proxy_flag = "on";   /* use proxies if env vars are set */
941         G.user_agent = "Wget"; /* "User-Agent" header field */
942
943 #if ENABLE_FEATURE_WGET_LONG_OPTIONS
944         applet_long_options = wget_longopts;
945 #endif
946         opt_complementary = "-1" IF_FEATURE_WGET_TIMEOUT(":T+") IF_FEATURE_WGET_LONG_OPTIONS(":\xfe::");
947         getopt32(argv, "csqO:P:Y:U:T:" /*ignored:*/ "t:",
948                 &G.fname_out, &G.dir_prefix,
949                 &G.proxy_flag, &G.user_agent,
950                 IF_FEATURE_WGET_TIMEOUT(&G.timeout_seconds) IF_NOT_FEATURE_WGET_TIMEOUT(NULL),
951                 NULL /* -t RETRIES */
952                 IF_FEATURE_WGET_LONG_OPTIONS(, &headers_llist)
953                 IF_FEATURE_WGET_LONG_OPTIONS(, &G.post_data)
954         );
955         argv += optind;
956
957 #if ENABLE_FEATURE_WGET_LONG_OPTIONS
958         if (headers_llist) {
959                 int size = 1;
960                 char *cp;
961                 llist_t *ll = headers_llist;
962                 while (ll) {
963                         size += strlen(ll->data) + 2;
964                         ll = ll->link;
965                 }
966                 G.extra_headers = cp = xmalloc(size);
967                 while (headers_llist) {
968                         cp += sprintf(cp, "%s\r\n", (char*)llist_pop(&headers_llist));
969                 }
970         }
971 #endif
972
973         G.output_fd = -1;
974         G.o_flags = O_WRONLY | O_CREAT | O_TRUNC | O_EXCL;
975         if (G.fname_out) { /* -O FILE ? */
976                 if (LONE_DASH(G.fname_out)) { /* -O - ? */
977                         G.output_fd = 1;
978                         option_mask32 &= ~WGET_OPT_CONTINUE;
979                 }
980                 /* compat with wget: -O FILE can overwrite */
981                 G.o_flags = O_WRONLY | O_CREAT | O_TRUNC;
982         }
983
984         while (*argv)
985                 download_one_url(*argv++);
986
987         if (G.output_fd >= 0)
988                 xclose(G.output_fd);
989
990         return EXIT_SUCCESS;
991 }