Bump to version 1.22.1
[platform/upstream/busybox.git] / networking / wget.c
1 /* vi: set sw=4 ts=4: */
2 /*
3  * wget - retrieve a file using HTTP or FTP
4  *
5  * Chip Rosenthal Covad Communications <chip@laserlink.net>
6  * Licensed under GPLv2, see file LICENSE in this source tree.
7  *
8  * Copyright (C) 2010 Bradley M. Kuhn <bkuhn@ebb.org>
9  * Kuhn's copyrights are licensed GPLv2-or-later.  File as a whole remains GPLv2.
10  */
11
12 //usage:#define wget_trivial_usage
13 //usage:        IF_FEATURE_WGET_LONG_OPTIONS(
14 //usage:       "[-c|--continue] [-s|--spider] [-q|--quiet] [-O|--output-document FILE]\n"
15 //usage:       "        [--header 'header: value'] [-Y|--proxy on/off] [-P DIR]\n"
16 /* Since we ignore these opts, we don't show them in --help */
17 /* //usage:    "        [--no-check-certificate] [--no-cache]" */
18 //usage:       "        [-U|--user-agent AGENT]" IF_FEATURE_WGET_TIMEOUT(" [-T SEC]") " URL..."
19 //usage:        )
20 //usage:        IF_NOT_FEATURE_WGET_LONG_OPTIONS(
21 //usage:       "[-csq] [-O FILE] [-Y on/off] [-P DIR] [-U AGENT]"
22 //usage:                        IF_FEATURE_WGET_TIMEOUT(" [-T SEC]") " URL..."
23 //usage:        )
24 //usage:#define wget_full_usage "\n\n"
25 //usage:       "Retrieve files via HTTP or FTP\n"
26 //usage:     "\n        -s      Spider mode - only check file existence"
27 //usage:     "\n        -c      Continue retrieval of aborted transfer"
28 //usage:     "\n        -q      Quiet"
29 //usage:     "\n        -P DIR  Save to DIR (default .)"
30 //usage:        IF_FEATURE_WGET_TIMEOUT(
31 //usage:     "\n        -T SEC  Network read timeout is SEC seconds"
32 //usage:        )
33 //usage:     "\n        -O FILE Save to FILE ('-' for stdout)"
34 //usage:     "\n        -U STR  Use STR for User-Agent header"
35 //usage:     "\n        -Y      Use proxy ('on' or 'off')"
36
37 #include "libbb.h"
38
39 #if 0
40 # define log_io(...) bb_error_msg(__VA_ARGS__)
41 #else
42 # define log_io(...) ((void)0)
43 #endif
44
45
46 struct host_info {
47         char *allocated;
48         const char *path;
49         const char *user;
50         char       *host;
51         int         port;
52         smallint    is_ftp;
53 };
54
55
56 /* Globals */
57 struct globals {
58         off_t content_len;        /* Content-length of the file */
59         off_t beg_range;          /* Range at which continue begins */
60 #if ENABLE_FEATURE_WGET_STATUSBAR
61         off_t transferred;        /* Number of bytes transferred so far */
62         const char *curfile;      /* Name of current file being transferred */
63         bb_progress_t pmt;
64 #endif
65         char *dir_prefix;
66 #if ENABLE_FEATURE_WGET_LONG_OPTIONS
67         char *post_data;
68         char *extra_headers;
69 #endif
70         char *fname_out;        /* where to direct output (-O) */
71         const char *proxy_flag; /* Use proxies if env vars are set */
72         const char *user_agent; /* "User-Agent" header field */
73 #if ENABLE_FEATURE_WGET_TIMEOUT
74         unsigned timeout_seconds;
75         bool connecting;
76 #endif
77         int output_fd;
78         int o_flags;
79         smallint chunked;         /* chunked transfer encoding */
80         smallint got_clen;        /* got content-length: from server  */
81         /* Local downloads do benefit from big buffer.
82          * With 512 byte buffer, it was measured to be
83          * an order of magnitude slower than with big one.
84          */
85         uint64_t just_to_align_next_member;
86         char wget_buf[CONFIG_FEATURE_COPYBUF_KB*1024];
87 } FIX_ALIASING;
88 #define G (*ptr_to_globals)
89 #define INIT_G() do { \
90         SET_PTR_TO_GLOBALS(xzalloc(sizeof(G))); \
91 } while (0)
92 #define FINI_G() do { \
93         FREE_PTR_TO_GLOBALS(); \
94 } while (0)
95
96
97 /* Must match option string! */
98 enum {
99         WGET_OPT_CONTINUE   = (1 << 0),
100         WGET_OPT_SPIDER     = (1 << 1),
101         WGET_OPT_QUIET      = (1 << 2),
102         WGET_OPT_OUTNAME    = (1 << 3),
103         WGET_OPT_PREFIX     = (1 << 4),
104         WGET_OPT_PROXY      = (1 << 5),
105         WGET_OPT_USER_AGENT = (1 << 6),
106         WGET_OPT_NETWORK_READ_TIMEOUT = (1 << 7),
107         WGET_OPT_RETRIES    = (1 << 8),
108         WGET_OPT_PASSIVE    = (1 << 9),
109         WGET_OPT_HEADER     = (1 << 10) * ENABLE_FEATURE_WGET_LONG_OPTIONS,
110         WGET_OPT_POST_DATA  = (1 << 11) * ENABLE_FEATURE_WGET_LONG_OPTIONS,
111 };
112
113 enum {
114         PROGRESS_START = -1,
115         PROGRESS_END   = 0,
116         PROGRESS_BUMP  = 1,
117 };
118 #if ENABLE_FEATURE_WGET_STATUSBAR
119 static void progress_meter(int flag)
120 {
121         if (option_mask32 & WGET_OPT_QUIET)
122                 return;
123
124         if (flag == PROGRESS_START)
125                 bb_progress_init(&G.pmt, G.curfile);
126
127         bb_progress_update(&G.pmt,
128                         G.beg_range,
129                         G.transferred,
130                         (G.chunked || !G.got_clen) ? 0 : G.beg_range + G.transferred + G.content_len
131         );
132
133         if (flag == PROGRESS_END) {
134                 bb_progress_free(&G.pmt);
135                 bb_putchar_stderr('\n');
136                 G.transferred = 0;
137         }
138 }
139 #else
140 static ALWAYS_INLINE void progress_meter(int flag UNUSED_PARAM) { }
141 #endif
142
143
144 /* IPv6 knows scoped address types i.e. link and site local addresses. Link
145  * local addresses can have a scope identifier to specify the
146  * interface/link an address is valid on (e.g. fe80::1%eth0). This scope
147  * identifier is only valid on a single node.
148  *
149  * RFC 4007 says that the scope identifier MUST NOT be sent across the wire,
150  * unless all nodes agree on the semantic. Apache e.g. regards zone identifiers
151  * in the Host header as invalid requests, see
152  * https://issues.apache.org/bugzilla/show_bug.cgi?id=35122
153  */
154 static void strip_ipv6_scope_id(char *host)
155 {
156         char *scope, *cp;
157
158         /* bbox wget actually handles IPv6 addresses without [], like
159          * wget "http://::1/xxx", but this is not standard.
160          * To save code, _here_ we do not support it. */
161
162         if (host[0] != '[')
163                 return; /* not IPv6 */
164
165         scope = strchr(host, '%');
166         if (!scope)
167                 return;
168
169         /* Remove the IPv6 zone identifier from the host address */
170         cp = strchr(host, ']');
171         if (!cp || (cp[1] != ':' && cp[1] != '\0')) {
172                 /* malformed address (not "[xx]:nn" or "[xx]") */
173                 return;
174         }
175
176         /* cp points to "]...", scope points to "%eth0]..." */
177         overlapping_strcpy(scope, cp);
178 }
179
180 #if ENABLE_FEATURE_WGET_AUTHENTICATION
181 /* Base64-encode character string. */
182 static char *base64enc(const char *str)
183 {
184         unsigned len = strlen(str);
185         if (len > sizeof(G.wget_buf)/4*3 - 10) /* paranoia */
186                 len = sizeof(G.wget_buf)/4*3 - 10;
187         bb_uuencode(G.wget_buf, str, len, bb_uuenc_tbl_base64);
188         return G.wget_buf;
189 }
190 #endif
191
192 static char* sanitize_string(char *s)
193 {
194         unsigned char *p = (void *) s;
195         while (*p >= ' ')
196                 p++;
197         *p = '\0';
198         return s;
199 }
200
201 #if ENABLE_FEATURE_WGET_TIMEOUT
202 static void alarm_handler(int sig UNUSED_PARAM)
203 {
204         /* This is theoretically unsafe (uses stdio and malloc in signal handler) */
205         if (G.connecting)
206                 bb_error_msg_and_die("download timed out");
207 }
208 #endif
209
210 static FILE *open_socket(len_and_sockaddr *lsa)
211 {
212         int fd;
213         FILE *fp;
214
215         IF_FEATURE_WGET_TIMEOUT(alarm(G.timeout_seconds); G.connecting = 1;)
216         fd = xconnect_stream(lsa);
217         IF_FEATURE_WGET_TIMEOUT(G.connecting = 0;)
218
219         /* glibc 2.4 seems to try seeking on it - ??! */
220         /* hopefully it understands what ESPIPE means... */
221         fp = fdopen(fd, "r+");
222         if (fp == NULL)
223                 bb_perror_msg_and_die(bb_msg_memory_exhausted);
224
225         return fp;
226 }
227
228 /* Returns '\n' if it was seen, else '\0'. Trims at first '\r' or '\n' */
229 /* FIXME: does not respect FEATURE_WGET_TIMEOUT and -T N: */
230 static char fgets_and_trim(FILE *fp)
231 {
232         char c;
233         char *buf_ptr;
234
235         if (fgets(G.wget_buf, sizeof(G.wget_buf) - 1, fp) == NULL)
236                 bb_perror_msg_and_die("error getting response");
237
238         buf_ptr = strchrnul(G.wget_buf, '\n');
239         c = *buf_ptr;
240         *buf_ptr = '\0';
241         buf_ptr = strchrnul(G.wget_buf, '\r');
242         *buf_ptr = '\0';
243
244         log_io("< %s", G.wget_buf);
245
246         return c;
247 }
248
249 static int ftpcmd(const char *s1, const char *s2, FILE *fp)
250 {
251         int result;
252         if (s1) {
253                 if (!s2)
254                         s2 = "";
255                 fprintf(fp, "%s%s\r\n", s1, s2);
256                 fflush(fp);
257                 log_io("> %s%s", s1, s2);
258         }
259
260         do {
261                 fgets_and_trim(fp);
262         } while (!isdigit(G.wget_buf[0]) || G.wget_buf[3] != ' ');
263
264         G.wget_buf[3] = '\0';
265         result = xatoi_positive(G.wget_buf);
266         G.wget_buf[3] = ' ';
267         return result;
268 }
269
270 static void parse_url(const char *src_url, struct host_info *h)
271 {
272         char *url, *p, *sp;
273
274         free(h->allocated);
275         h->allocated = url = xstrdup(src_url);
276
277         if (strncmp(url, "ftp://", 6) == 0) {
278                 h->port = bb_lookup_port("ftp", "tcp", 21);
279                 h->host = url + 6;
280                 h->is_ftp = 1;
281         } else
282         if (strncmp(url, "http://", 7) == 0) {
283                 h->host = url + 7;
284  http:
285                 h->port = bb_lookup_port("http", "tcp", 80);
286                 h->is_ftp = 0;
287         } else
288         if (!strstr(url, "//")) {
289                 // GNU wget is user-friendly and falls back to http://
290                 h->host = url;
291                 goto http;
292         } else
293                 bb_error_msg_and_die("not an http or ftp url: %s", sanitize_string(url));
294
295         // FYI:
296         // "Real" wget 'http://busybox.net?var=a/b' sends this request:
297         //   'GET /?var=a/b HTTP 1.0'
298         //   and saves 'index.html?var=a%2Fb' (we save 'b')
299         // wget 'http://busybox.net?login=john@doe':
300         //   request: 'GET /?login=john@doe HTTP/1.0'
301         //   saves: 'index.html?login=john@doe' (we save '?login=john@doe')
302         // wget 'http://busybox.net#test/test':
303         //   request: 'GET / HTTP/1.0'
304         //   saves: 'index.html' (we save 'test')
305         //
306         // We also don't add unique .N suffix if file exists...
307         sp = strchr(h->host, '/');
308         p = strchr(h->host, '?'); if (!sp || (p && sp > p)) sp = p;
309         p = strchr(h->host, '#'); if (!sp || (p && sp > p)) sp = p;
310         if (!sp) {
311                 h->path = "";
312         } else if (*sp == '/') {
313                 *sp = '\0';
314                 h->path = sp + 1;
315         } else { // '#' or '?'
316                 // http://busybox.net?login=john@doe is a valid URL
317                 // memmove converts to:
318                 // http:/busybox.nett?login=john@doe...
319                 memmove(h->host - 1, h->host, sp - h->host);
320                 h->host--;
321                 sp[-1] = '\0';
322                 h->path = sp;
323         }
324
325         // We used to set h->user to NULL here, but this interferes
326         // with handling of code 302 ("object was moved")
327
328         sp = strrchr(h->host, '@');
329         if (sp != NULL) {
330                 // URL-decode "user:password" string before base64-encoding:
331                 // wget http://test:my%20pass@example.com should send
332                 // Authorization: Basic dGVzdDpteSBwYXNz
333                 // which decodes to "test:my pass".
334                 // Standard wget and curl do this too.
335                 *sp = '\0';
336                 h->user = percent_decode_in_place(h->host, /*strict:*/ 0);
337                 h->host = sp + 1;
338         }
339
340         sp = h->host;
341 }
342
343 static char *gethdr(FILE *fp)
344 {
345         char *s, *hdrval;
346         int c;
347
348         /* retrieve header line */
349         c = fgets_and_trim(fp);
350
351         /* end of the headers? */
352         if (G.wget_buf[0] == '\0')
353                 return NULL;
354
355         /* convert the header name to lower case */
356         for (s = G.wget_buf; isalnum(*s) || *s == '-' || *s == '.' || *s == '_'; ++s) {
357                 /*
358                  * No-op for 20-3f and 60-7f. "0-9a-z-." are in these ranges.
359                  * 40-5f range ("@A-Z[\]^_") maps to 60-7f.
360                  * "A-Z" maps to "a-z".
361                  * "@[\]" can't occur in header names.
362                  * "^_" maps to "~,DEL" (which is wrong).
363                  * "^" was never seen yet, "_" was seen from web.archive.org
364                  * (x-archive-orig-x_commoncrawl_Signature: HEXSTRING).
365                  */
366                 *s |= 0x20;
367         }
368
369         /* verify we are at the end of the header name */
370         if (*s != ':')
371                 bb_error_msg_and_die("bad header line: %s", sanitize_string(G.wget_buf));
372
373         /* locate the start of the header value */
374         *s++ = '\0';
375         hdrval = skip_whitespace(s);
376
377         if (c != '\n') {
378                 /* Rats! The buffer isn't big enough to hold the entire header value */
379                 while (c = getc(fp), c != EOF && c != '\n')
380                         continue;
381         }
382
383         return hdrval;
384 }
385
386 static void reset_beg_range_to_zero(void)
387 {
388         bb_error_msg("restart failed");
389         G.beg_range = 0;
390         xlseek(G.output_fd, 0, SEEK_SET);
391         /* Done at the end instead: */
392         /* ftruncate(G.output_fd, 0); */
393 }
394
395 static FILE* prepare_ftp_session(FILE **dfpp, struct host_info *target, len_and_sockaddr *lsa)
396 {
397         FILE *sfp;
398         char *str;
399         int port;
400
401         if (!target->user)
402                 target->user = xstrdup("anonymous:busybox@");
403
404         sfp = open_socket(lsa);
405         if (ftpcmd(NULL, NULL, sfp) != 220)
406                 bb_error_msg_and_die("%s", sanitize_string(G.wget_buf + 4));
407
408         /*
409          * Splitting username:password pair,
410          * trying to log in
411          */
412         str = strchr(target->user, ':');
413         if (str)
414                 *str++ = '\0';
415         switch (ftpcmd("USER ", target->user, sfp)) {
416         case 230:
417                 break;
418         case 331:
419                 if (ftpcmd("PASS ", str, sfp) == 230)
420                         break;
421                 /* fall through (failed login) */
422         default:
423                 bb_error_msg_and_die("ftp login: %s", sanitize_string(G.wget_buf + 4));
424         }
425
426         ftpcmd("TYPE I", NULL, sfp);
427
428         /*
429          * Querying file size
430          */
431         if (ftpcmd("SIZE ", target->path, sfp) == 213) {
432                 G.content_len = BB_STRTOOFF(G.wget_buf + 4, NULL, 10);
433                 if (G.content_len < 0 || errno) {
434                         bb_error_msg_and_die("SIZE value is garbage");
435                 }
436                 G.got_clen = 1;
437         }
438
439         /*
440          * Entering passive mode
441          */
442         if (ftpcmd("PASV", NULL, sfp) != 227) {
443  pasv_error:
444                 bb_error_msg_and_die("bad response to %s: %s", "PASV", sanitize_string(G.wget_buf));
445         }
446         // Response is "227 garbageN1,N2,N3,N4,P1,P2[)garbage]
447         // Server's IP is N1.N2.N3.N4 (we ignore it)
448         // Server's port for data connection is P1*256+P2
449         str = strrchr(G.wget_buf, ')');
450         if (str) str[0] = '\0';
451         str = strrchr(G.wget_buf, ',');
452         if (!str) goto pasv_error;
453         port = xatou_range(str+1, 0, 255);
454         *str = '\0';
455         str = strrchr(G.wget_buf, ',');
456         if (!str) goto pasv_error;
457         port += xatou_range(str+1, 0, 255) * 256;
458         set_nport(&lsa->u.sa, htons(port));
459
460         *dfpp = open_socket(lsa);
461
462         if (G.beg_range != 0) {
463                 sprintf(G.wget_buf, "REST %"OFF_FMT"u", G.beg_range);
464                 if (ftpcmd(G.wget_buf, NULL, sfp) == 350)
465                         G.content_len -= G.beg_range;
466                 else
467                         reset_beg_range_to_zero();
468         }
469
470         if (ftpcmd("RETR ", target->path, sfp) > 150)
471                 bb_error_msg_and_die("bad response to %s: %s", "RETR", sanitize_string(G.wget_buf));
472
473         return sfp;
474 }
475
476 static void NOINLINE retrieve_file_data(FILE *dfp)
477 {
478 #if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT
479 # if ENABLE_FEATURE_WGET_TIMEOUT
480         unsigned second_cnt = G.timeout_seconds;
481 # endif
482         struct pollfd polldata;
483
484         polldata.fd = fileno(dfp);
485         polldata.events = POLLIN | POLLPRI;
486 #endif
487         progress_meter(PROGRESS_START);
488
489         if (G.chunked)
490                 goto get_clen;
491
492         /* Loops only if chunked */
493         while (1) {
494
495 #if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT
496                 /* Must use nonblocking I/O, otherwise fread will loop
497                  * and *block* until it reads full buffer,
498                  * which messes up progress bar and/or timeout logic.
499                  * Because of nonblocking I/O, we need to dance
500                  * very carefully around EAGAIN. See explanation at
501                  * clearerr() calls.
502                  */
503                 ndelay_on(polldata.fd);
504 #endif
505                 while (1) {
506                         int n;
507                         unsigned rdsz;
508
509 #if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT
510                         /* fread internally uses read loop, which in our case
511                          * is usually exited when we get EAGAIN.
512                          * In this case, libc sets error marker on the stream.
513                          * Need to clear it before next fread to avoid possible
514                          * rare false positive ferror below. Rare because usually
515                          * fread gets more than zero bytes, and we don't fall
516                          * into if (n <= 0) ...
517                          */
518                         clearerr(dfp);
519 #endif
520                         errno = 0;
521                         rdsz = sizeof(G.wget_buf);
522                         if (G.got_clen) {
523                                 if (G.content_len < (off_t)sizeof(G.wget_buf)) {
524                                         if ((int)G.content_len <= 0)
525                                                 break;
526                                         rdsz = (unsigned)G.content_len;
527                                 }
528                         }
529                         n = fread(G.wget_buf, 1, rdsz, dfp);
530
531                         if (n > 0) {
532                                 xwrite(G.output_fd, G.wget_buf, n);
533 #if ENABLE_FEATURE_WGET_STATUSBAR
534                                 G.transferred += n;
535 #endif
536                                 if (G.got_clen) {
537                                         G.content_len -= n;
538                                         if (G.content_len == 0)
539                                                 break;
540                                 }
541 #if ENABLE_FEATURE_WGET_TIMEOUT
542                                 second_cnt = G.timeout_seconds;
543 #endif
544                                 continue;
545                         }
546
547                         /* n <= 0.
548                          * man fread:
549                          * If error occurs, or EOF is reached, the return value
550                          * is a short item count (or zero).
551                          * fread does not distinguish between EOF and error.
552                          */
553                         if (errno != EAGAIN) {
554                                 if (ferror(dfp)) {
555                                         progress_meter(PROGRESS_END);
556                                         bb_perror_msg_and_die(bb_msg_read_error);
557                                 }
558                                 break; /* EOF, not error */
559                         }
560
561 #if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT
562                         /* It was EAGAIN. There is no data. Wait up to one second
563                          * then abort if timed out, or update the bar and try reading again.
564                          */
565                         if (safe_poll(&polldata, 1, 1000) == 0) {
566 # if ENABLE_FEATURE_WGET_TIMEOUT
567                                 if (second_cnt != 0 && --second_cnt == 0) {
568                                         progress_meter(PROGRESS_END);
569                                         bb_error_msg_and_die("download timed out");
570                                 }
571 # endif
572                                 /* We used to loop back to poll here,
573                                  * but there is no great harm in letting fread
574                                  * to try reading anyway.
575                                  */
576                         }
577                         /* Need to do it _every_ second for "stalled" indicator
578                          * to be shown properly.
579                          */
580                         progress_meter(PROGRESS_BUMP);
581 #endif
582                 } /* while (reading data) */
583
584 #if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT
585                 clearerr(dfp);
586                 ndelay_off(polldata.fd); /* else fgets can get very unhappy */
587 #endif
588                 if (!G.chunked)
589                         break;
590
591                 fgets_and_trim(dfp); /* Eat empty line */
592  get_clen:
593                 fgets_and_trim(dfp);
594                 G.content_len = STRTOOFF(G.wget_buf, NULL, 16);
595                 /* FIXME: error check? */
596                 if (G.content_len == 0)
597                         break; /* all done! */
598                 G.got_clen = 1;
599                 /*
600                  * Note that fgets may result in some data being buffered in dfp.
601                  * We loop back to fread, which will retrieve this data.
602                  * Also note that code has to be arranged so that fread
603                  * is done _before_ one-second poll wait - poll doesn't know
604                  * about stdio buffering and can result in spurious one second waits!
605                  */
606         }
607
608         /* If -c failed, we restart from the beginning,
609          * but we do not truncate file then, we do it only now, at the end.
610          * This lets user to ^C if his 99% complete 10 GB file download
611          * failed to restart *without* losing the almost complete file.
612          */
613         {
614                 off_t pos = lseek(G.output_fd, 0, SEEK_CUR);
615                 if (pos != (off_t)-1)
616                         ftruncate(G.output_fd, pos);
617         }
618
619         /* Draw full bar and free its resources */
620         G.chunked = 0;  /* makes it show 100% even for chunked download */
621         G.got_clen = 1; /* makes it show 100% even for download of (formerly) unknown size */
622         progress_meter(PROGRESS_END);
623 }
624
625 static void download_one_url(const char *url)
626 {
627         bool use_proxy;                 /* Use proxies if env vars are set  */
628         int redir_limit;
629         len_and_sockaddr *lsa;
630         FILE *sfp;                      /* socket to web/ftp server         */
631         FILE *dfp;                      /* socket to ftp server (data)      */
632         char *proxy = NULL;
633         char *fname_out_alloc;
634         char *redirected_path = NULL;
635         struct host_info server;
636         struct host_info target;
637
638         server.allocated = NULL;
639         target.allocated = NULL;
640         server.user = NULL;
641         target.user = NULL;
642
643         parse_url(url, &target);
644
645         /* Use the proxy if necessary */
646         use_proxy = (strcmp(G.proxy_flag, "off") != 0);
647         if (use_proxy) {
648                 proxy = getenv(target.is_ftp ? "ftp_proxy" : "http_proxy");
649                 use_proxy = (proxy && proxy[0]);
650                 if (use_proxy)
651                         parse_url(proxy, &server);
652         }
653         if (!use_proxy) {
654                 server.port = target.port;
655                 if (ENABLE_FEATURE_IPV6) {
656                         //free(server.allocated); - can't be non-NULL
657                         server.host = server.allocated = xstrdup(target.host);
658                 } else {
659                         server.host = target.host;
660                 }
661         }
662
663         if (ENABLE_FEATURE_IPV6)
664                 strip_ipv6_scope_id(target.host);
665
666         /* If there was no -O FILE, guess output filename */
667         fname_out_alloc = NULL;
668         if (!(option_mask32 & WGET_OPT_OUTNAME)) {
669                 G.fname_out = bb_get_last_path_component_nostrip(target.path);
670                 /* handle "wget http://kernel.org//" */
671                 if (G.fname_out[0] == '/' || !G.fname_out[0])
672                         G.fname_out = (char*)"index.html";
673                 /* -P DIR is considered only if there was no -O FILE */
674                 if (G.dir_prefix)
675                         G.fname_out = fname_out_alloc = concat_path_file(G.dir_prefix, G.fname_out);
676                 else {
677                         /* redirects may free target.path later, need to make a copy */
678                         G.fname_out = fname_out_alloc = xstrdup(G.fname_out);
679                 }
680         }
681 #if ENABLE_FEATURE_WGET_STATUSBAR
682         G.curfile = bb_get_last_path_component_nostrip(G.fname_out);
683 #endif
684
685         /* Determine where to start transfer */
686         G.beg_range = 0;
687         if (option_mask32 & WGET_OPT_CONTINUE) {
688                 G.output_fd = open(G.fname_out, O_WRONLY);
689                 if (G.output_fd >= 0) {
690                         G.beg_range = xlseek(G.output_fd, 0, SEEK_END);
691                 }
692                 /* File doesn't exist. We do not create file here yet.
693                  * We are not sure it exists on remote side */
694         }
695
696         redir_limit = 5;
697  resolve_lsa:
698         lsa = xhost2sockaddr(server.host, server.port);
699         if (!(option_mask32 & WGET_OPT_QUIET)) {
700                 char *s = xmalloc_sockaddr2dotted(&lsa->u.sa);
701                 fprintf(stderr, "Connecting to %s (%s)\n", server.host, s);
702                 free(s);
703         }
704  establish_session:
705         /*G.content_len = 0; - redundant, got_clen = 0 is enough */
706         G.got_clen = 0;
707         G.chunked = 0;
708         if (use_proxy || !target.is_ftp) {
709                 /*
710                  *  HTTP session
711                  */
712                 char *str;
713                 int status;
714
715
716                 /* Open socket to http server */
717                 sfp = open_socket(lsa);
718
719                 /* Send HTTP request */
720                 if (use_proxy) {
721                         fprintf(sfp, "GET %stp://%s/%s HTTP/1.1\r\n",
722                                 target.is_ftp ? "f" : "ht", target.host,
723                                 target.path);
724                 } else {
725                         if (option_mask32 & WGET_OPT_POST_DATA)
726                                 fprintf(sfp, "POST /%s HTTP/1.1\r\n", target.path);
727                         else
728                                 fprintf(sfp, "GET /%s HTTP/1.1\r\n", target.path);
729                 }
730
731                 fprintf(sfp, "Host: %s\r\nUser-Agent: %s\r\n",
732                         target.host, G.user_agent);
733
734                 /* Ask server to close the connection as soon as we are done
735                  * (IOW: we do not intend to send more requests)
736                  */
737                 fprintf(sfp, "Connection: close\r\n");
738
739 #if ENABLE_FEATURE_WGET_AUTHENTICATION
740                 if (target.user) {
741                         fprintf(sfp, "Proxy-Authorization: Basic %s\r\n"+6,
742                                 base64enc(target.user));
743                 }
744                 if (use_proxy && server.user) {
745                         fprintf(sfp, "Proxy-Authorization: Basic %s\r\n",
746                                 base64enc(server.user));
747                 }
748 #endif
749
750                 if (G.beg_range != 0)
751                         fprintf(sfp, "Range: bytes=%"OFF_FMT"u-\r\n", G.beg_range);
752
753 #if ENABLE_FEATURE_WGET_LONG_OPTIONS
754                 if (G.extra_headers)
755                         fputs(G.extra_headers, sfp);
756
757                 if (option_mask32 & WGET_OPT_POST_DATA) {
758                         fprintf(sfp,
759                                 "Content-Type: application/x-www-form-urlencoded\r\n"
760                                 "Content-Length: %u\r\n"
761                                 "\r\n"
762                                 "%s",
763                                 (int) strlen(G.post_data), G.post_data
764                         );
765                 } else
766 #endif
767                 {
768                         fprintf(sfp, "\r\n");
769                 }
770
771                 fflush(sfp);
772
773                 /*
774                  * Retrieve HTTP response line and check for "200" status code.
775                  */
776  read_response:
777                 fgets_and_trim(sfp);
778
779                 str = G.wget_buf;
780                 str = skip_non_whitespace(str);
781                 str = skip_whitespace(str);
782                 // FIXME: no error check
783                 // xatou wouldn't work: "200 OK"
784                 status = atoi(str);
785                 switch (status) {
786                 case 0:
787                 case 100:
788                         while (gethdr(sfp) != NULL)
789                                 /* eat all remaining headers */;
790                         goto read_response;
791                 case 200:
792 /*
793 Response 204 doesn't say "null file", it says "metadata
794 has changed but data didn't":
795
796 "10.2.5 204 No Content
797 The server has fulfilled the request but does not need to return
798 an entity-body, and might want to return updated metainformation.
799 The response MAY include new or updated metainformation in the form
800 of entity-headers, which if present SHOULD be associated with
801 the requested variant.
802
803 If the client is a user agent, it SHOULD NOT change its document
804 view from that which caused the request to be sent. This response
805 is primarily intended to allow input for actions to take place
806 without causing a change to the user agent's active document view,
807 although any new or updated metainformation SHOULD be applied
808 to the document currently in the user agent's active view.
809
810 The 204 response MUST NOT include a message-body, and thus
811 is always terminated by the first empty line after the header fields."
812
813 However, in real world it was observed that some web servers
814 (e.g. Boa/0.94.14rc21) simply use code 204 when file size is zero.
815 */
816                 case 204:
817                         if (G.beg_range != 0) {
818                                 /* "Range:..." was not honored by the server.
819                                  * Restart download from the beginning.
820                                  */
821                                 reset_beg_range_to_zero();
822                         }
823                         break;
824                 case 300:  /* redirection */
825                 case 301:
826                 case 302:
827                 case 303:
828                         break;
829                 case 206: /* Partial Content */
830                         if (G.beg_range != 0)
831                                 /* "Range:..." worked. Good. */
832                                 break;
833                         /* Partial Content even though we did not ask for it??? */
834                         /* fall through */
835                 default:
836                         bb_error_msg_and_die("server returned error: %s", sanitize_string(G.wget_buf));
837                 }
838
839                 /*
840                  * Retrieve HTTP headers.
841                  */
842                 while ((str = gethdr(sfp)) != NULL) {
843                         static const char keywords[] ALIGN1 =
844                                 "content-length\0""transfer-encoding\0""location\0";
845                         enum {
846                                 KEY_content_length = 1, KEY_transfer_encoding, KEY_location
847                         };
848                         smalluint key;
849
850                         /* gethdr converted "FOO:" string to lowercase */
851
852                         /* strip trailing whitespace */
853                         char *s = strchrnul(str, '\0') - 1;
854                         while (s >= str && (*s == ' ' || *s == '\t')) {
855                                 *s = '\0';
856                                 s--;
857                         }
858                         key = index_in_strings(keywords, G.wget_buf) + 1;
859                         if (key == KEY_content_length) {
860                                 G.content_len = BB_STRTOOFF(str, NULL, 10);
861                                 if (G.content_len < 0 || errno) {
862                                         bb_error_msg_and_die("content-length %s is garbage", sanitize_string(str));
863                                 }
864                                 G.got_clen = 1;
865                                 continue;
866                         }
867                         if (key == KEY_transfer_encoding) {
868                                 if (strcmp(str_tolower(str), "chunked") != 0)
869                                         bb_error_msg_and_die("transfer encoding '%s' is not supported", sanitize_string(str));
870                                 G.chunked = 1;
871                         }
872                         if (key == KEY_location && status >= 300) {
873                                 if (--redir_limit == 0)
874                                         bb_error_msg_and_die("too many redirections");
875                                 fclose(sfp);
876                                 if (str[0] == '/') {
877                                         free(redirected_path);
878                                         target.path = redirected_path = xstrdup(str+1);
879                                         /* lsa stays the same: it's on the same server */
880                                 } else {
881                                         parse_url(str, &target);
882                                         if (!use_proxy) {
883                                                 free(server.allocated);
884                                                 server.allocated = NULL;
885                                                 server.host = target.host;
886                                                 /* strip_ipv6_scope_id(target.host); - no! */
887                                                 /* we assume remote never gives us IPv6 addr with scope id */
888                                                 server.port = target.port;
889                                                 free(lsa);
890                                                 goto resolve_lsa;
891                                         } /* else: lsa stays the same: we use proxy */
892                                 }
893                                 goto establish_session;
894                         }
895                 }
896 //              if (status >= 300)
897 //                      bb_error_msg_and_die("bad redirection (no Location: header from server)");
898
899                 /* For HTTP, data is pumped over the same connection */
900                 dfp = sfp;
901
902         } else {
903                 /*
904                  *  FTP session
905                  */
906                 sfp = prepare_ftp_session(&dfp, &target, lsa);
907         }
908
909         free(lsa);
910
911         if (!(option_mask32 & WGET_OPT_SPIDER)) {
912                 if (G.output_fd < 0)
913                         G.output_fd = xopen(G.fname_out, G.o_flags);
914                 retrieve_file_data(dfp);
915                 if (!(option_mask32 & WGET_OPT_OUTNAME)) {
916                         xclose(G.output_fd);
917                         G.output_fd = -1;
918                 }
919         }
920
921         if (dfp != sfp) {
922                 /* It's ftp. Close data connection properly */
923                 fclose(dfp);
924                 if (ftpcmd(NULL, NULL, sfp) != 226)
925                         bb_error_msg_and_die("ftp error: %s", sanitize_string(G.wget_buf + 4));
926                 /* ftpcmd("QUIT", NULL, sfp); - why bother? */
927         }
928         fclose(sfp);
929
930         free(server.allocated);
931         free(target.allocated);
932         free(fname_out_alloc);
933         free(redirected_path);
934 }
935
936 int wget_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE;
937 int wget_main(int argc UNUSED_PARAM, char **argv)
938 {
939 #if ENABLE_FEATURE_WGET_LONG_OPTIONS
940         static const char wget_longopts[] ALIGN1 =
941                 /* name, has_arg, val */
942                 "continue\0"         No_argument       "c"
943 //FIXME: -s isn't --spider, it's --save-headers!
944                 "spider\0"           No_argument       "s"
945                 "quiet\0"            No_argument       "q"
946                 "output-document\0"  Required_argument "O"
947                 "directory-prefix\0" Required_argument "P"
948                 "proxy\0"            Required_argument "Y"
949                 "user-agent\0"       Required_argument "U"
950 #if ENABLE_FEATURE_WGET_TIMEOUT
951                 "timeout\0"          Required_argument "T"
952 #endif
953                 /* Ignored: */
954                 // "tries\0"            Required_argument "t"
955                 /* Ignored (we always use PASV): */
956                 "passive-ftp\0"      No_argument       "\xff"
957                 "header\0"           Required_argument "\xfe"
958                 "post-data\0"        Required_argument "\xfd"
959                 /* Ignored (we don't do ssl) */
960                 "no-check-certificate\0" No_argument   "\xfc"
961                 /* Ignored (we don't support caching) */
962                 "no-cache\0"         No_argument       "\xfb"
963                 ;
964 #endif
965
966 #if ENABLE_FEATURE_WGET_LONG_OPTIONS
967         llist_t *headers_llist = NULL;
968 #endif
969
970         INIT_G();
971
972 #if ENABLE_FEATURE_WGET_TIMEOUT
973         G.timeout_seconds = 900;
974         signal(SIGALRM, alarm_handler);
975 #endif
976         G.proxy_flag = "on";   /* use proxies if env vars are set */
977         G.user_agent = "Wget"; /* "User-Agent" header field */
978
979 #if ENABLE_FEATURE_WGET_LONG_OPTIONS
980         applet_long_options = wget_longopts;
981 #endif
982         opt_complementary = "-1" IF_FEATURE_WGET_TIMEOUT(":T+") IF_FEATURE_WGET_LONG_OPTIONS(":\xfe::");
983         getopt32(argv, "csqO:P:Y:U:T:" /*ignored:*/ "t:",
984                 &G.fname_out, &G.dir_prefix,
985                 &G.proxy_flag, &G.user_agent,
986                 IF_FEATURE_WGET_TIMEOUT(&G.timeout_seconds) IF_NOT_FEATURE_WGET_TIMEOUT(NULL),
987                 NULL /* -t RETRIES */
988                 IF_FEATURE_WGET_LONG_OPTIONS(, &headers_llist)
989                 IF_FEATURE_WGET_LONG_OPTIONS(, &G.post_data)
990         );
991         argv += optind;
992
993 #if ENABLE_FEATURE_WGET_LONG_OPTIONS
994         if (headers_llist) {
995                 int size = 1;
996                 char *cp;
997                 llist_t *ll = headers_llist;
998                 while (ll) {
999                         size += strlen(ll->data) + 2;
1000                         ll = ll->link;
1001                 }
1002                 G.extra_headers = cp = xmalloc(size);
1003                 while (headers_llist) {
1004                         cp += sprintf(cp, "%s\r\n", (char*)llist_pop(&headers_llist));
1005                 }
1006         }
1007 #endif
1008
1009         G.output_fd = -1;
1010         G.o_flags = O_WRONLY | O_CREAT | O_TRUNC | O_EXCL;
1011         if (G.fname_out) { /* -O FILE ? */
1012                 if (LONE_DASH(G.fname_out)) { /* -O - ? */
1013                         G.output_fd = 1;
1014                         option_mask32 &= ~WGET_OPT_CONTINUE;
1015                 }
1016                 /* compat with wget: -O FILE can overwrite */
1017                 G.o_flags = O_WRONLY | O_CREAT | O_TRUNC;
1018         }
1019
1020         while (*argv)
1021                 download_one_url(*argv++);
1022
1023         if (G.output_fd >= 0)
1024                 xclose(G.output_fd);
1025
1026 #if ENABLE_FEATURE_CLEAN_UP && ENABLE_FEATURE_WGET_LONG_OPTIONS
1027         free(G.extra_headers);
1028 #endif
1029         FINI_G();
1030
1031         return EXIT_SUCCESS;
1032 }