1 /*****************************************************************************
3 * Project ___| | | | _ \| |
5 * | (__| |_| | _ <| |___
6 * \___|\___/|_| \_\_____|
8 * The contents of this file are subject to the Mozilla Public License
9 * Version 1.0 (the "License"); you may not use this file except in
10 * compliance with the License. You may obtain a copy of the License at
11 * http://www.mozilla.org/MPL/
13 * Software distributed under the License is distributed on an "AS IS"
14 * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the
15 * License for the specific language governing rights and limitations
18 * The Original Code is Curl.
20 * The Initial Developer of the Original Code is Daniel Stenberg.
22 * Portions created by the Initial Developer are Copyright (C) 1998.
23 * All Rights Reserved.
25 * ------------------------------------------------------------
27 * - Daniel Stenberg <daniel@haxx.se>
38 * ------------------------------------------------------------
39 ****************************************************************************/
43 /* -- WIN32 approved -- */
49 #include <sys/types.h>
56 #if defined(WIN32) && !defined(__GNUC__) || defined(__MINGW32__)
61 #ifdef HAVE_SYS_SOCKET_H
62 #include <sys/socket.h>
64 #include <netinet/in.h>
66 #include <sys/resource.h>
71 #ifdef HAVE_ARPA_INET_H
72 #include <arpa/inet.h>
77 #include <sys/ioctl.h>
80 #ifdef HAVE_SYS_PARAM_H
81 #include <sys/param.h>
84 #ifdef HAVE_SYS_SELECT_H
85 #include <sys/select.h>
89 #error "We can't compile without select() support!"
92 #error "We can't compile without socket() support!"
98 #include <curl/curl.h>
99 #include <curl/types.h>
104 #include "download.h"
106 #include "speedcheck.h"
108 #include "progress.h"
111 #define _MPRINTF_REPLACE /* use our functions only */
112 #include <curl/mprintf.h>
114 /* The last #include file should be: */
116 #include "memdebug.h"
120 #define min(a, b) ((a) < (b) ? (a) : (b))
124 _Transfer(struct connectdata *c_conn)
126 size_t nread; /* number of bytes read */
127 int bytecount = 0; /* total number of bytes read */
128 int writebytecount = 0; /* number of bytes written */
129 long contentlength=0; /* size of incoming data */
130 struct timeval start = tvnow();
131 struct timeval now = start; /* current time */
132 bool header = TRUE; /* incoming data has HTTP header */
133 int headerline = 0; /* counts header lines to better track the
135 char *hbufp; /* points at *end* of header line */
137 char *str; /* within buf */
138 char *str_start; /* within buf */
139 char *end_ptr; /* within buf */
140 char *p; /* within headerbuff */
141 bool content_range = FALSE; /* set TRUE if Content-Range: was found */
142 int offset = 0; /* possible resume offset read from the
143 Content-Range: header */
144 int code = 0; /* error code from the 'HTTP/1.? XXX' line */
146 /* for the low speed checks: */
151 char newurl[URL_MAX_LENGTH]; /* buffer for Location: URL */
154 /* the highest fd we use + 1 */
155 struct UrlData *data;
156 struct connectdata *conn = (struct connectdata *)c_conn;
160 if(!conn || (conn->handle != STRUCT_CONNECT))
161 return CURLE_BAD_FUNCTION_ARGUMENT;
163 data = conn->data; /* there's the root struct */
165 maxfd = (conn->sockfd>conn->writesockfd?conn->sockfd:conn->writesockfd)+1;
167 hbufp = data->headerbuff;
169 myalarm (0); /* switch off the alarm-style timeout */
177 pgrsTime(data, TIMER_PRETRANSFER);
180 if (!conn->getheader) {
183 pgrsSetDownloadSize(data, conn->size);
185 /* we want header and/or body, if neither then don't do this! */
186 if(conn->getheader ||
187 !data->bits.no_body) {
192 struct timeval interval;
195 /* timeout every X second
196 - makes a better progressmeter (i.e even when no data is read, the
197 meter can be updated and reflect reality)
198 - allows removal of the alarm() crap
199 - variable timeout is easier
202 FD_ZERO (&readfd); /* clear it */
203 if(conn->sockfd != -1) {
204 FD_SET (conn->sockfd, &readfd); /* read socket */
208 FD_ZERO (&writefd); /* clear it */
209 if(conn->writesockfd != -1) {
210 FD_SET (conn->writesockfd, &writefd); /* write socket */
211 keepon |= KEEP_WRITE;
214 /* get these in backup variables to be able to restore them on each lap in
220 readfd = rkeepfd; /* set those every lap in the loop */
223 interval.tv_usec = 0;
225 switch (select (maxfd, &readfd, &writefd, NULL, &interval)) {
226 case -1: /* select() error, stop reading */
228 /* The EINTR is not serious, and it seems you might get this more
229 ofen when using the lib in a multi-threaded environment! */
234 keepon = 0; /* no more read or write */
236 case 0: /* timeout */
239 if((keepon & KEEP_READ) && FD_ISSET(conn->sockfd, &readfd)) {
241 urg = curl_read(conn, buf, BUFSIZE -1, &nread);
243 /* NULL terminate, allowing string ops to be used */
244 if (0 < (signed int) nread)
247 /* if we receive 0 or less here, the server closed the connection and
248 we bail out from this! */
249 else if (0 >= (signed int) nread) {
250 keepon &= ~KEEP_READ;
254 str = buf; /* Default buffer to use when we write the
255 buffer, it may be changed in the flow below
256 before the actual storing is done. */
258 /* Since this is a two-state thing, we check if we are parsing
259 headers at the moment or not. */
262 /* we are in parse-the-header-mode */
264 /* header line within buffer loop */
268 str_start = str; /* str_start is start of line within buf */
270 end_ptr = strchr (str_start, '\n');
273 /* no more complete header lines within buffer */
274 /* copy what is remaining into headerbuff */
275 int str_length = (int)strlen(str);
277 if (hbuflen + (int)str_length >= data->headersize) {
279 long newsize=MAX((hbuflen+str_length)*3/2,
281 hbufp_index = hbufp - data->headerbuff;
282 newbuff = (char *)realloc(data->headerbuff, newsize);
284 failf (data, "Failed to alloc memory for big header!");
285 return CURLE_READ_ERROR;
287 data->headersize=newsize;
288 data->headerbuff = newbuff;
289 hbufp = data->headerbuff + hbufp_index;
292 hbufp += strlen (str);
293 hbuflen += strlen (str);
294 break; /* read more and try again */
297 str = end_ptr + 1; /* move just past new line */
299 if (hbuflen + (str - str_start) >= data->headersize) {
301 long newsize=MAX((hbuflen+(str-str_start))*3/2,
303 hbufp_index = hbufp - data->headerbuff;
304 newbuff = (char *)realloc(data->headerbuff, newsize);
306 failf (data, "Failed to alloc memory for big header!");
307 return CURLE_READ_ERROR;
309 data->headersize= newsize;
310 data->headerbuff = newbuff;
311 hbufp = data->headerbuff + hbufp_index;
314 /* copy to end of line */
315 strncpy (hbufp, str_start, str - str_start);
316 hbufp += str - str_start;
317 hbuflen += str - str_start;
320 p = data->headerbuff;
322 /* we now have a full line that p points to */
323 if (('\n' == *p) || ('\r' == *p)) {
324 /* Zero-length line means end of header! */
325 if (-1 != conn->size) /* if known */
326 conn->size += bytecount; /* we append the already read size */
330 p++; /* pass the \r byte */
332 p++; /* pass the \n byte */
334 pgrsSetDownloadSize(data, conn->size);
336 header = FALSE; /* no more header to parse! */
338 /* now, only output this if the header AND body are requested:
340 writetype = CLIENTWRITE_HEADER;
341 if (data->bits.http_include_header)
342 writetype |= CLIENTWRITE_BODY;
344 urg = client_write(data, writetype, data->headerbuff,
345 p - data->headerbuff);
349 data->header_size += p - data->headerbuff;
350 break; /* exit header line loop */
354 /* This is the first header, it MUST be the error code line
355 or else we consiser this to be the body right away! */
356 if (sscanf (p, " HTTP/1.%*c %3d", &code)) {
357 /* 404 -> URL not found! */
359 ( ((data->bits.http_follow_location) && (code >= 400))
361 (!data->bits.http_follow_location && (code >= 300)))
362 && (data->bits.http_fail_on_error)) {
363 /* If we have been told to fail hard on HTTP-errors,
364 here is the check for that: */
365 /* serious error, go home! */
366 failf (data, "The requested file was not found");
367 return CURLE_HTTP_NOT_FOUND;
369 data->progress.httpcode = code;
372 header = FALSE; /* this is not a header line */
376 /* check for Content-Length: header lines to get size */
377 if (strnequal("Content-Length", p, 14) &&
378 sscanf (p+14, ": %ld", &contentlength))
379 conn->size = contentlength;
380 else if (strnequal("Content-Range", p, 13)) {
381 if (sscanf (p+13, ": bytes %d-", &offset) ||
382 sscanf (p+13, ": bytes: %d-", &offset)) {
383 /* This second format was added August 1st by Igor
384 Khristophorov since Sun's webserver JavaWebServer/1.1.1
385 obviously sends the header this way! :-( */
386 if (data->resume_from == offset) {
387 /* we asked for a resume and we got it */
388 content_range = TRUE;
392 else if(data->cookies &&
393 strnequal("Set-Cookie: ", p, 11)) {
394 cookie_add(data->cookies, TRUE, &p[12]);
396 else if(strnequal("Last-Modified:", p,
397 strlen("Last-Modified:")) &&
398 (data->timecondition || data->bits.get_filetime) ) {
399 time_t secs=time(NULL);
400 timeofdoc = curl_getdate(p+strlen("Last-Modified:"), &secs);
401 if(data->bits.get_filetime)
402 data->progress.filetime = timeofdoc;
404 else if ((code >= 300 && code < 400) &&
405 (data->bits.http_follow_location) &&
406 strnequal("Location", p, 8) &&
407 sscanf (p+8, ": %" URL_MAX_LENGTH_TXT "s",
409 /* this is the URL that the server advices us to get
411 data->newurl = strdup (newurl);
414 writetype = CLIENTWRITE_HEADER;
415 if (data->bits.http_include_header)
416 writetype |= CLIENTWRITE_BODY;
418 urg = client_write(data, writetype, p, hbuflen);
422 data->header_size += hbuflen;
424 /* reset hbufp pointer && hbuflen */
425 hbufp = data->headerbuff;
428 while (*str); /* header line within buffer */
430 /* We might have reached the end of the header part here, but
431 there might be a non-header part left in the end of the read
435 /* the next token and forward is not part of
438 /* we subtract the remaining header size from the buffer */
439 nread -= (str - buf);
442 } /* end if header mode */
444 /* This is not an 'else if' since it may be a rest from the header
445 parsing, where the beginning of the buffer is headers and the end
447 if (str && !header && ((signed int)nread > 0)) {
449 if(0 == bodywrites) {
450 /* These checks are only made the first time we are about to
451 write a chunk of the body */
452 if(conn->protocol&PROT_HTTP) {
453 /* HTTP-only checks */
454 if (data->resume_from && !content_range ) {
455 /* we wanted to resume a download, although the server
456 doesn't seem to support this */
457 failf (data, "HTTP server doesn't seem to support byte ranges. Cannot resume.");
458 return CURLE_HTTP_RANGE_ERROR;
460 else if (data->newurl) {
461 /* abort after the headers if "follow Location" is set */
462 infof (data, "Follow to new URL: %s\n", data->newurl);
465 else if(data->timecondition && !data->range) {
466 /* A time condition has been set AND no ranges have been
467 requested. This seems to be what chapter 13.3.4 of
468 RFC 2616 defines to be the correct action for a
470 if((timeofdoc > 0) && (data->timevalue > 0)) {
471 switch(data->timecondition) {
472 case TIMECOND_IFMODSINCE:
474 if(timeofdoc < data->timevalue) {
476 "The requested document is not new enough");
480 case TIMECOND_IFUNMODSINCE:
481 if(timeofdoc > data->timevalue) {
483 "The requested document is not old enough");
488 } /* two valid time strings */
489 } /* we have a time condition */
491 } /* this is the first time we write a body part */
494 if(data->maxdownload &&
495 (bytecount + nread > data->maxdownload)) {
496 nread = data->maxdownload - bytecount;
497 if((signed int)nread < 0 ) /* this should be unusual */
499 keepon &= ~KEEP_READ; /* we're done reading */
504 pgrsSetDownloadCounter(data, (double)bytecount);
506 urg = client_write(data, CLIENTWRITE_BODY, str, nread);
510 } /* if (! header and data to read ) */
511 } /* if( read from socket ) */
513 if((keepon & KEEP_WRITE) && FD_ISSET(conn->writesockfd, &writefd)) {
516 char scratch[BUFSIZE * 2];
518 size_t bytes_written;
521 buf = data->buffer; /* put it back on the buffer */
523 nread = data->fread(buf, 1, conn->upload_bufsize, data->in);
525 /* the signed int typecase of nread of for systems that has
527 if ((signed int)nread<=0) {
529 keepon &= ~KEEP_WRITE; /* we're done writing */
532 writebytecount += nread;
533 pgrsSetUploadCounter(data, (double)writebytecount);
535 /* convert LF to CRLF if so asked */
537 for(i = 0, si = 0; i < (int)nread; i++, si++) {
538 if (buf[i] == 0x0a) {
539 scratch[si++] = 0x0d;
543 scratch[si] = buf[i];
547 buf = scratch; /* point to the new buffer */
550 /* write to socket */
551 urg = curl_write(conn, buf, nread, &bytes_written);
553 if(nread != bytes_written) {
554 failf(data, "Failed uploading data");
555 return CURLE_WRITE_ERROR;
565 urg = CURLE_ABORTED_BY_CALLBACK;
567 urg = speedcheck (data, now);
571 if(data->progress.ulspeed > conn->upload_bufsize) {
572 /* If we're transfering more data per second than fits in our buffer,
573 we increase the buffer size to adjust to the current
574 speed. However, we must not set it larger than BUFSIZE. We don't
575 adjust it downwards again since we don't see any point in that!
577 conn->upload_bufsize=(long)min(data->progress.ulspeed, BUFSIZE);
580 if (data->timeout && (tvdiff (now, start) > data->timeout)) {
581 failf (data, "Operation timed out with %d out of %d bytes received",
582 bytecount, conn->size);
583 return CURLE_OPERATION_TIMEOUTED;
587 if(!(data->bits.no_body) && contentlength &&
588 (bytecount != contentlength)) {
589 failf(data, "transfer closed with %d bytes remaining to read",
590 contentlength-bytecount);
591 return CURLE_PARTIAL_FILE;
594 return CURLE_ABORTED_BY_CALLBACK;
597 *conn->bytecountp = bytecount; /* read count */
598 if(conn->writebytecountp)
599 *conn->writebytecountp = writebytecount; /* write count */
604 typedef int (*func_T)(void);
606 CURLcode curl_transfer(CURL *curl)
609 struct UrlData *data = curl;
610 struct connectdata *c_connect=NULL;
615 pgrsTime(data, TIMER_STARTSINGLE);
616 res = curl_connect(curl, (CURLconnect **)&c_connect);
617 if(res == CURLE_OK) {
618 res = curl_do(c_connect);
619 if(res == CURLE_OK) {
620 res = _Transfer(c_connect); /* now fetch that URL please */
622 res = curl_done(c_connect);
625 if((res == CURLE_OK) && data->newurl) {
626 /* Location: redirect
628 This is assumed to happen for HTTP(S) only!
631 char path[URL_MAX_LENGTH];
632 if (data->maxredirs && (data->followlocation >= data->maxredirs)) {
633 failf(data,"Maximum (%d) redirects followed", data->maxredirs);
634 curl_disconnect(c_connect);
635 res=CURLE_TOO_MANY_REDIRECTS;
639 /* mark the next request as a followed location: */
640 data->bits.this_is_a_follow = TRUE;
642 data->followlocation++; /* count location-followers */
644 if(data->bits.http_auto_referer) {
645 /* We are asked to automatically set the previous URL as the
646 referer when we get the next URL. We pick the ->url field,
647 which may or may not be 100% correct */
649 if(data->free_referer) {
650 /* If we already have an allocated referer, free this first */
654 data->referer = strdup(data->url);
655 data->free_referer = TRUE; /* yes, free this later */
656 data->bits.http_set_referer = TRUE; /* might have been false */
659 if(2 != sscanf(data->newurl, "%15[^:]://%" URL_MAX_LENGTH_TXT
662 *DANG* this is an RFC 2068 violation. The URL is supposed
663 to be absolute and this doesn't seem to be that!
665 Instead, we have to TRY to append this new path to the old URL
666 to the right of the host part. Oh crap, this is doomed to cause
667 problems in the future...
673 /* protsep points to the start of the host name */
674 protsep=strstr(data->url, "//");
678 /* TBD: set the port with curl_setopt() */
679 data->port=0; /* we got a full URL and then we should reset the
680 port number here to re-initiate it later */
681 protsep+=2; /* pass the slashes */
684 if('/' != data->newurl[0]) {
685 /* First we need to find out if there's a ?-letter in the URL,
686 and cut it and the right-side of that off */
687 pathsep = strrchr(protsep, '?');
691 /* we have a relative path to append to the last slash if
692 there's one available */
693 pathsep = strrchr(protsep, '/');
698 /* We got a new absolute path for this server, cut off from the
700 pathsep = strchr(protsep, '/');
705 newest=(char *)malloc( strlen(data->url) +
706 1 + /* possible slash */
707 strlen(data->newurl) + 1/* zero byte */);
710 return CURLE_OUT_OF_MEMORY;
711 sprintf(newest, "%s%s%s", data->url, ('/' == data->newurl[0])?"":"/",
714 data->newurl = newest;
717 /* This was an absolute URL, clear the port number! */
718 /* TBD: set the port with curl_setopt() */
722 if(data->bits.urlstringalloc)
725 /* TBD: set the URL with curl_setopt() */
726 data->url = data->newurl;
727 data->newurl = NULL; /* don't show! */
728 data->bits.urlstringalloc = TRUE; /* the URL is allocated */
730 /* Disable both types of POSTs, since doing a second POST when
731 following isn't what anyone would want! */
732 data->bits.http_post = FALSE;
733 data->bits.http_formpost = FALSE;
735 infof(data, "Follows Location: to new URL: '%s'\n", data->url);
737 curl_disconnect(c_connect);
741 curl_disconnect(c_connect);
743 break; /* it only reaches here when this shouldn't loop */
745 } while(1); /* loop if Location: */