1 /*---------------------------------------------------------------------\
3 | |__ / \ / / . \ . \ |
8 \---------------------------------------------------------------------*/
9 /** \file zypp/media/MediaMultiCurl.cc
14 #include <sys/types.h>
18 #include <arpa/inet.h>
25 #include <zypp/ZConfig.h>
26 #include <zypp/base/Logger.h>
27 #include <zypp/media/MediaMultiCurl.h>
28 #include <zypp/media/MetaLinkParser.h>
29 #include <zypp/ManagedFile.h>
30 #include <zypp/media/CurlHelper.h>
33 using namespace zypp::base;
35 #undef CURLVERSION_AT_LEAST
36 #define CURLVERSION_AT_LEAST(M,N,O) LIBCURL_VERSION_NUM >= ((((M)<<8)+(N))<<8)+(O)
42 //////////////////////////////////////////////////////////////////////
45 class multifetchrequest;
47 // Hack: we derive from MediaCurl just to get the storage space for
48 // settings, url, curlerrors and the like
50 class multifetchworker : MediaCurl {
51 friend class multifetchrequest;
54 multifetchworker(int no, multifetchrequest &request, const Url &url);
59 bool recheckChecksum();
60 void disableCompetition();
63 void adddnsfd(fd_set &rset, int &maxfd);
64 void dnsevent(fd_set &rset);
88 size_t writefunction(void *ptr, size_t size);
89 static size_t _writefunction(void *ptr, size_t size, size_t nmemb, void *stream);
91 size_t headerfunction(char *ptr, size_t size);
92 static size_t _headerfunction(void *ptr, size_t size, size_t nmemb, void *stream);
94 multifetchrequest *_request;
105 #define WORKER_STARTING 0
106 #define WORKER_LOOKUP 1
107 #define WORKER_FETCH 2
108 #define WORKER_DISCARD 3
109 #define WORKER_DONE 4
110 #define WORKER_SLEEP 5
111 #define WORKER_BROKEN 6
115 class multifetchrequest {
117 multifetchrequest(const MediaMultiCurl *context, const Pathname &filename, const Url &baseurl, CURLM *multi, FILE *fp, callback::SendReport<DownloadProgressReport> *report, MediaBlockList *blklist, off_t filesize);
118 ~multifetchrequest();
120 void run(std::vector<Url> &urllist);
123 friend class multifetchworker;
125 const MediaMultiCurl *_context;
126 const Pathname _filename;
130 callback::SendReport<DownloadProgressReport> *_report;
131 MediaBlockList *_blklist;
136 std::list<multifetchworker *> _workers;
142 size_t _activeworkers;
143 size_t _lookupworkers;
144 size_t _sleepworkers;
145 double _minsleepuntil;
149 off_t _fetchedgoodsize;
152 double _lastprogress;
154 double _lastperiodstart;
155 double _lastperiodfetched;
160 double _connect_timeout;
165 #define BLKSIZE 131072
169 //////////////////////////////////////////////////////////////////////
175 if (gettimeofday(&tv, NULL))
177 return tv.tv_sec + tv.tv_usec / 1000000.;
181 multifetchworker::writefunction(void *ptr, size_t size)
184 if (_state == WORKER_BROKEN)
187 double now = currentTime();
189 len = size > _size ? _size : size;
196 if (_blkstart && _off == _blkstart)
198 // make sure that the server replied with "partial content"
201 (void)curl_easy_getinfo(_curl, CURLINFO_EFFECTIVE_URL, &effurl);
202 if (effurl && !strncasecmp(effurl, "http", 4))
205 (void)curl_easy_getinfo(_curl, CURLINFO_RESPONSE_CODE, &statuscode);
206 if (statuscode != 206)
214 _request->_lastprogress = now;
216 if (_state == WORKER_DISCARD || !_request->_fp)
218 // block is no longer needed
219 // still calculate the checksum so that we can throw out bad servers
220 if (_request->_blklist)
221 _dig.update((const char *)ptr, len);
226 if (fseeko(_request->_fp, _off, SEEK_SET))
228 cnt = fwrite(ptr, 1, len, _request->_fp);
231 _request->_fetchedsize += cnt;
232 if (_request->_blklist)
233 _dig.update((const char *)ptr, cnt);
243 multifetchworker::_writefunction(void *ptr, size_t size, size_t nmemb, void *stream)
245 multifetchworker *me = reinterpret_cast<multifetchworker *>(stream);
246 return me->writefunction(ptr, size * nmemb);
250 multifetchworker::headerfunction(char *p, size_t size)
253 if (l > 9 && !strncasecmp(p, "Location:", 9))
255 std::string line(p + 9, l - 9);
256 if (line[l - 10] == '\r')
257 line.erase(l - 10, 1);
258 XXX << "#" << _workerno << ": redirecting to" << line << endl;
261 if (l <= 14 || l >= 128 || strncasecmp(p, "Content-Range:", 14) != 0)
265 while (l && (*p == ' ' || *p == '\t'))
267 if (l < 6 || strncasecmp(p, "bytes", 5))
274 unsigned long long start, off, filesize;
275 if (sscanf(buf, "%llu-%llu/%llu", &start, &off, &filesize) != 3)
277 if (_request->_filesize == (off_t)-1)
279 WAR << "#" << _workerno << ": setting request filesize to " << filesize << endl;
280 _request->_filesize = filesize;
281 if (_request->_totalsize == 0 && !_request->_blklist)
282 _request->_totalsize = filesize;
284 if (_request->_filesize != (off_t)filesize)
286 XXX << "#" << _workerno << ": filesize mismatch" << endl;
287 _state = WORKER_BROKEN;
288 strncpy(_curlError, "filesize mismatch", CURL_ERROR_SIZE);
294 multifetchworker::_headerfunction(void *ptr, size_t size, size_t nmemb, void *stream)
296 multifetchworker *me = reinterpret_cast<multifetchworker *>(stream);
297 return me->headerfunction((char *)ptr, size * nmemb);
300 multifetchworker::multifetchworker(int no, multifetchrequest &request, const Url &url)
301 : MediaCurl(url, Pathname())
305 _state = WORKER_STARTING;
307 _off = _blkstart = 0;
308 _size = _blksize = 0;
318 _maxspeed = _request->_maxspeed;
321 Url curlUrl( clearQueryString(url) );
322 _urlbuf = curlUrl.asString();
323 _curl = _request->_context->fromEasyPool(_url.getHost());
325 XXX << "reused worker from pool" << endl;
326 if (!_curl && !(_curl = curl_easy_init()))
328 _state = WORKER_BROKEN;
329 strncpy(_curlError, "curl_easy_init failed", CURL_ERROR_SIZE);
336 catch (Exception &ex)
338 curl_easy_cleanup(_curl);
340 _state = WORKER_BROKEN;
341 strncpy(_curlError, "curl_easy_setopt failed", CURL_ERROR_SIZE);
344 curl_easy_setopt(_curl, CURLOPT_PRIVATE, this);
345 curl_easy_setopt(_curl, CURLOPT_URL, _urlbuf.c_str());
346 curl_easy_setopt(_curl, CURLOPT_WRITEFUNCTION, &_writefunction);
347 curl_easy_setopt(_curl, CURLOPT_WRITEDATA, this);
348 if (_request->_filesize == off_t(-1) || !_request->_blklist || !_request->_blklist->haveChecksum(0))
350 curl_easy_setopt(_curl, CURLOPT_HEADERFUNCTION, &_headerfunction);
351 curl_easy_setopt(_curl, CURLOPT_HEADERDATA, this);
353 // if this is the same host copy authorization
354 // (the host check is also what curl does when doing a redirect)
355 // (note also that unauthorized exceptions are thrown with the request host)
356 if (url.getHost() == _request->_context->_url.getHost())
358 _settings.setUsername(_request->_context->_settings.username());
359 _settings.setPassword(_request->_context->_settings.password());
360 _settings.setAuthType(_request->_context->_settings.authType());
361 if ( _settings.userPassword().size() )
363 curl_easy_setopt(_curl, CURLOPT_USERPWD, _settings.userPassword().c_str());
364 std::string use_auth = _settings.authType();
365 if (use_auth.empty())
366 use_auth = "digest,basic"; // our default
367 long auth = CurlAuthData::auth_type_str2long(use_auth);
368 if( auth != CURLAUTH_NONE)
370 XXX << "#" << _workerno << ": Enabling HTTP authentication methods: " << use_auth
371 << " (CURLOPT_HTTPAUTH=" << auth << ")" << std::endl;
372 curl_easy_setopt(_curl, CURLOPT_HTTPAUTH, auth);
379 multifetchworker::~multifetchworker()
383 if (_state == WORKER_FETCH || _state == WORKER_DISCARD)
384 curl_multi_remove_handle(_request->_multi, _curl);
385 if (_state == WORKER_DONE || _state == WORKER_SLEEP)
387 #if CURLVERSION_AT_LEAST(7,15,5)
388 curl_easy_setopt(_curl, CURLOPT_MAX_RECV_SPEED_LARGE, (curl_off_t)0);
390 curl_easy_setopt(_curl, CURLOPT_PRIVATE, (void *)0);
391 curl_easy_setopt(_curl, CURLOPT_WRITEFUNCTION, (void *)0);
392 curl_easy_setopt(_curl, CURLOPT_WRITEDATA, (void *)0);
393 curl_easy_setopt(_curl, CURLOPT_HEADERFUNCTION, (void *)0);
394 curl_easy_setopt(_curl, CURLOPT_HEADERDATA, (void *)0);
395 _request->_context->toEasyPool(_url.getHost(), _curl);
398 curl_easy_cleanup(_curl);
405 while (waitpid(_pid, &status, 0) == -1)
415 // the destructor in MediaCurl doesn't call disconnect() if
416 // the media is not attached, so we do it here manually
420 static inline bool env_isset(std::string name)
422 const char *s = getenv(name.c_str());
423 return s && *s ? true : false;
427 multifetchworker::checkdns()
429 std::string host = _url.getHost();
434 if (_request->_context->isDNSok(host))
437 // no need to do dns checking for numeric hosts
439 if (inet_pton(AF_INET, host.c_str(), addrbuf) == 1)
441 if (inet_pton(AF_INET6, host.c_str(), addrbuf) == 1)
444 // no need to do dns checking if we use a proxy
445 if (!_settings.proxy().empty())
447 if (env_isset("all_proxy") || env_isset("ALL_PROXY"))
449 std::string schemeproxy = _url.getScheme() + "_proxy";
450 if (env_isset(schemeproxy))
452 if (schemeproxy != "http_proxy")
454 std::transform(schemeproxy.begin(), schemeproxy.end(), schemeproxy.begin(), ::toupper);
455 if (env_isset(schemeproxy))
459 XXX << "checking DNS lookup of " << host << endl;
463 _state = WORKER_BROKEN;
464 strncpy(_curlError, "DNS pipe creation failed", CURL_ERROR_SIZE);
468 if (_pid == pid_t(-1))
473 _state = WORKER_BROKEN;
474 strncpy(_curlError, "DNS checker fork failed", CURL_ERROR_SIZE);
480 // XXX: close all other file descriptors
481 struct addrinfo *ai, aihints;
482 memset(&aihints, 0, sizeof(aihints));
483 aihints.ai_family = PF_UNSPEC;
484 int tstsock = socket(PF_INET6, SOCK_DGRAM | SOCK_CLOEXEC, 0);
486 aihints.ai_family = PF_INET;
489 aihints.ai_socktype = SOCK_STREAM;
490 aihints.ai_flags = AI_CANONNAME;
491 unsigned int connecttimeout = _request->_connect_timeout;
493 alarm(connecttimeout);
494 signal(SIGALRM, SIG_DFL);
495 if (getaddrinfo(host.c_str(), NULL, &aihints, &ai))
500 _dnspipe = pipefds[0];
501 _state = WORKER_LOOKUP;
505 multifetchworker::adddnsfd(fd_set &rset, int &maxfd)
507 if (_state != WORKER_LOOKUP)
509 FD_SET(_dnspipe, &rset);
510 if (maxfd < _dnspipe)
515 multifetchworker::dnsevent(fd_set &rset)
518 if (_state != WORKER_LOOKUP || !FD_ISSET(_dnspipe, &rset))
521 while (waitpid(_pid, &status, 0) == -1)
532 if (!WIFEXITED(status))
534 _state = WORKER_BROKEN;
535 strncpy(_curlError, "DNS lookup failed", CURL_ERROR_SIZE);
536 _request->_activeworkers--;
539 int exitcode = WEXITSTATUS(status);
540 XXX << "#" << _workerno << ": DNS lookup returned " << exitcode << endl;
543 _state = WORKER_BROKEN;
544 strncpy(_curlError, "DNS lookup failed", CURL_ERROR_SIZE);
545 _request->_activeworkers--;
548 _request->_context->setDNSok(_url.getHost());
553 multifetchworker::checkChecksum()
555 // XXX << "checkChecksum block " << _blkno << endl;
556 if (!_blksize || !_request->_blklist)
558 return _request->_blklist->verifyDigest(_blkno, _dig);
562 multifetchworker::recheckChecksum()
564 // XXX << "recheckChecksum block " << _blkno << endl;
565 if (!_request->_fp || !_blksize || !_request->_blklist)
567 if (fseeko(_request->_fp, _blkstart, SEEK_SET))
571 _request->_blklist->createDigest(_dig); // resets digest
574 size_t cnt = l > sizeof(buf) ? sizeof(buf) : l;
575 if (fread(buf, cnt, 1, _request->_fp) != 1)
577 _dig.update(buf, cnt);
580 return _request->_blklist->verifyDigest(_blkno, _dig);
585 multifetchworker::stealjob()
587 if (!_request->_stealing)
589 XXX << "start stealing!" << endl;
590 _request->_stealing = true;
592 multifetchworker *best = 0;
593 std::list<multifetchworker *>::iterator workeriter = _request->_workers.begin();
595 for (; workeriter != _request->_workers.end(); ++workeriter)
597 multifetchworker *worker = *workeriter;
600 if (worker->_pass == -1)
601 continue; // do not steal!
602 if (worker->_state == WORKER_DISCARD || worker->_state == WORKER_DONE || worker->_state == WORKER_SLEEP || !worker->_blksize)
603 continue; // do not steal finished jobs
604 if (!worker->_avgspeed && worker->_blkreceived)
608 if (now > worker->_blkstarttime)
609 worker->_avgspeed = worker->_blkreceived / (now - worker->_blkstarttime);
611 if (!best || best->_pass > worker->_pass)
616 if (best->_pass < worker->_pass)
618 // if it is the same block, we want to know the best worker, otherwise the worst
619 if (worker->_blkstart == best->_blkstart)
621 if ((worker->_blksize - worker->_blkreceived) * best->_avgspeed < (best->_blksize - best->_blkreceived) * worker->_avgspeed)
626 if ((worker->_blksize - worker->_blkreceived) * best->_avgspeed > (best->_blksize - best->_blkreceived) * worker->_avgspeed)
632 _state = WORKER_DONE;
633 _request->_activeworkers--;
634 _request->_finished = true;
637 // do not sleep twice
638 if (_state != WORKER_SLEEP)
640 if (!_avgspeed && _blkreceived)
644 if (now > _blkstarttime)
645 _avgspeed = _blkreceived / (now - _blkstarttime);
648 // lets see if we should sleep a bit
649 XXX << "me #" << _workerno << ": " << _avgspeed << ", size " << best->_blksize << endl;
650 XXX << "best #" << best->_workerno << ": " << best->_avgspeed << ", size " << (best->_blksize - best->_blkreceived) << endl;
651 if (_avgspeed && best->_avgspeed && best->_blksize - best->_blkreceived > 0 &&
652 (best->_blksize - best->_blkreceived) * _avgspeed < best->_blksize * best->_avgspeed)
656 double sl = (best->_blksize - best->_blkreceived) / best->_avgspeed * 2;
659 XXX << "#" << _workerno << ": going to sleep for " << sl * 1000 << " ms" << endl;
660 _sleepuntil = now + sl;
661 _state = WORKER_SLEEP;
662 _request->_sleepworkers++;
668 best->_competing = true;
669 _blkstart = best->_blkstart;
670 _blksize = best->_blksize;
673 _blkno = best->_blkno;
678 multifetchworker::disableCompetition()
680 std::list<multifetchworker *>::iterator workeriter = _request->_workers.begin();
681 for (; workeriter != _request->_workers.end(); ++workeriter)
683 multifetchworker *worker = *workeriter;
686 if (worker->_blkstart == _blkstart)
688 if (worker->_state == WORKER_FETCH)
689 worker->_state = WORKER_DISCARD;
690 worker->_pass = -1; /* do not steal this one, we already have it */
697 multifetchworker::nextjob()
700 if (_request->_stealing)
706 MediaBlockList *blklist = _request->_blklist;
710 if (_request->_filesize != off_t(-1))
712 if (_request->_blkoff >= _request->_filesize)
717 _blksize = _request->_filesize - _request->_blkoff;
718 if (_blksize > BLKSIZE)
724 MediaBlock blk = blklist->getBlock(_request->_blkno);
725 while (_request->_blkoff >= (off_t)(blk.off + blk.size))
727 if (++_request->_blkno == blklist->numBlocks())
732 blk = blklist->getBlock(_request->_blkno);
733 _request->_blkoff = blk.off;
735 _blksize = blk.off + blk.size - _request->_blkoff;
736 if (_blksize > BLKSIZE && !blklist->haveChecksum(_request->_blkno))
739 _blkno = _request->_blkno;
740 _blkstart = _request->_blkoff;
741 _request->_blkoff += _blksize;
746 multifetchworker::run()
750 if (_state == WORKER_BROKEN || _state == WORKER_DONE)
751 return; // just in case...
753 sprintf(rangebuf, "%llu-", (unsigned long long)_blkstart);
755 sprintf(rangebuf, "%llu-%llu", (unsigned long long)_blkstart, (unsigned long long)_blkstart + _blksize - 1);
756 XXX << "#" << _workerno << ": BLK " << _blkno << ":" << rangebuf << " " << _url << endl;
757 if (curl_easy_setopt(_curl, CURLOPT_RANGE, !_noendrange || _blkstart != 0 ? rangebuf : (char *)0) != CURLE_OK)
759 _request->_activeworkers--;
760 _state = WORKER_BROKEN;
761 strncpy(_curlError, "curl_easy_setopt range failed", CURL_ERROR_SIZE);
764 if (curl_multi_add_handle(_request->_multi, _curl) != CURLM_OK)
766 _request->_activeworkers--;
767 _state = WORKER_BROKEN;
768 strncpy(_curlError, "curl_multi_add_handle failed", CURL_ERROR_SIZE);
771 _request->_havenewjob = true;
774 if (_request->_blklist)
775 _request->_blklist->createDigest(_dig); // resets digest
776 _state = WORKER_FETCH;
778 double now = currentTime();
784 //////////////////////////////////////////////////////////////////////
787 multifetchrequest::multifetchrequest(const MediaMultiCurl *context, const Pathname &filename, const Url &baseurl, CURLM *multi, FILE *fp, callback::SendReport<DownloadProgressReport> *report, MediaBlockList *blklist, off_t filesize) : _context(context), _filename(filename), _baseurl(baseurl)
792 _filesize = filesize;
798 _blkoff = _blklist->getBlock(0).off;
807 _fetchedgoodsize = 0;
809 _lastperiodstart = _lastprogress = _starttime = currentTime();
810 _lastperiodfetched = 0;
813 _connect_timeout = 0;
818 for (size_t blkno = 0; blkno < blklist->numBlocks(); blkno++)
820 MediaBlock blk = blklist->getBlock(blkno);
821 _totalsize += blk.size;
824 else if (filesize != off_t(-1))
825 _totalsize = filesize;
828 multifetchrequest::~multifetchrequest()
830 for (std::list<multifetchworker *>::iterator workeriter = _workers.begin(); workeriter != _workers.end(); ++workeriter)
832 multifetchworker *worker = *workeriter;
840 multifetchrequest::run(std::vector<Url> &urllist)
843 std::vector<Url>::iterator urliter = urllist.begin();
846 fd_set rset, wset, xset;
851 XXX << "finished!" << endl;
855 if ((int)_activeworkers < _maxworkers && urliter != urllist.end() && _workers.size() < MAXURLS)
857 // spawn another worker!
858 multifetchworker *worker = new multifetchworker(workerno++, *this, *urliter);
859 _workers.push_back(worker);
860 if (worker->_state != WORKER_BROKEN)
863 if (worker->_state != WORKER_LOOKUP)
875 WAR << "No more active workers!" << endl;
876 // show the first worker error we find
877 for (std::list<multifetchworker *>::iterator workeriter = _workers.begin(); workeriter != _workers.end(); ++workeriter)
879 if ((*workeriter)->_state != WORKER_BROKEN)
881 ZYPP_THROW(MediaCurlException(_baseurl, "Server error", (*workeriter)->_curlError));
890 curl_multi_fdset(_multi, &rset, &wset, &xset, &maxfd);
893 for (std::list<multifetchworker *>::iterator workeriter = _workers.begin(); workeriter != _workers.end(); ++workeriter)
894 (*workeriter)->adddnsfd(rset, maxfd);
897 // if we added a new job we have to call multi_perform once
898 // to make it show up in the fd set. do not sleep in this case.
900 tv.tv_usec = _havenewjob ? 0 : 200000;
901 if (_sleepworkers && !_havenewjob)
903 if (_minsleepuntil == 0)
905 for (std::list<multifetchworker *>::iterator workeriter = _workers.begin(); workeriter != _workers.end(); ++workeriter)
907 multifetchworker *worker = *workeriter;
908 if (worker->_state != WORKER_SLEEP)
910 if (!_minsleepuntil || _minsleepuntil > worker->_sleepuntil)
911 _minsleepuntil = worker->_sleepuntil;
914 double sl = _minsleepuntil - currentTime();
921 tv.tv_usec = sl * 1000000;
923 int r = select(maxfd + 1, &rset, &wset, &xset, &tv);
924 if (r == -1 && errno != EINTR)
925 ZYPP_THROW(MediaCurlException(_baseurl, "select() failed", "unknown error"));
926 if (r != 0 && _lookupworkers)
927 for (std::list<multifetchworker *>::iterator workeriter = _workers.begin(); workeriter != _workers.end(); ++workeriter)
929 multifetchworker *worker = *workeriter;
930 if (worker->_state != WORKER_LOOKUP)
932 (*workeriter)->dnsevent(rset);
933 if (worker->_state != WORKER_LOOKUP)
943 mcode = curl_multi_perform(_multi, &tasks);
944 if (mcode == CURLM_CALL_MULTI_PERFORM)
946 if (mcode != CURLM_OK)
947 ZYPP_THROW(MediaCurlException(_baseurl, "curl_multi_perform", "unknown error"));
951 double now = currentTime();
954 if (now > _lastperiodstart + .5)
957 _periodavg = (_fetchedsize - _lastperiodfetched) / (now - _lastperiodstart);
959 _periodavg = (_periodavg + (_fetchedsize - _lastperiodfetched) / (now - _lastperiodstart)) / 2;
960 _lastperiodfetched = _fetchedsize;
961 _lastperiodstart = now;
967 for (std::list<multifetchworker *>::iterator workeriter = _workers.begin(); workeriter != _workers.end(); ++workeriter)
969 multifetchworker *worker = *workeriter;
970 if (worker->_state != WORKER_SLEEP)
972 if (worker->_sleepuntil > now)
974 if (_minsleepuntil == worker->_sleepuntil)
976 XXX << "#" << worker->_workerno << ": sleep done, wake up" << endl;
978 // nextjob chnages the state
983 // collect all curl results, reschedule new jobs
985 while ((msg = curl_multi_info_read(_multi, &nqueue)) != 0)
987 if (msg->msg != CURLMSG_DONE)
989 CURL *easy = msg->easy_handle;
990 CURLcode cc = msg->data.result;
991 multifetchworker *worker;
992 if (curl_easy_getinfo(easy, CURLINFO_PRIVATE, &worker) != CURLE_OK)
993 ZYPP_THROW(MediaCurlException(_baseurl, "curl_easy_getinfo", "unknown error"));
994 if (worker->_blkreceived && now > worker->_blkstarttime)
996 if (worker->_avgspeed)
997 worker->_avgspeed = (worker->_avgspeed + worker->_blkreceived / (now - worker->_blkstarttime)) / 2;
999 worker->_avgspeed = worker->_blkreceived / (now - worker->_blkstarttime);
1001 XXX << "#" << worker->_workerno << ": BLK " << worker->_blkno << " done code " << cc << " speed " << worker->_avgspeed << endl;
1002 curl_multi_remove_handle(_multi, easy);
1003 if (cc == CURLE_HTTP_RETURNED_ERROR)
1005 long statuscode = 0;
1006 (void)curl_easy_getinfo(easy, CURLINFO_RESPONSE_CODE, &statuscode);
1007 XXX << "HTTP status " << statuscode << endl;
1008 if (statuscode == 416 && !_blklist) /* Range error */
1010 if (_filesize == off_t(-1))
1012 if (!worker->_noendrange)
1014 XXX << "#" << worker->_workerno << ": retrying with no end range" << endl;
1015 worker->_noendrange = true;
1019 worker->_noendrange = false;
1023 if (worker->_blkstart >= _filesize)
1032 if (!worker->checkChecksum())
1034 WAR << "#" << worker->_workerno << ": checksum error, disable worker" << endl;
1035 worker->_state = WORKER_BROKEN;
1036 strncpy(worker->_curlError, "checksum error", CURL_ERROR_SIZE);
1040 if (worker->_state == WORKER_FETCH)
1042 if (worker->_competing)
1044 worker->disableCompetition();
1045 // multiple workers wrote into this block. We already know that our
1046 // data was correct, but maybe some other worker overwrote our data
1047 // with something broken. Thus we have to re-check the block.
1048 if (!worker->recheckChecksum())
1050 XXX << "#" << worker->_workerno << ": recheck checksum error, refetch block" << endl;
1051 // re-fetch! No need to worry about the bad workers,
1052 // they will now be set to DISCARD. At the end of their block
1053 // they will notice that they wrote bad data and go into BROKEN.
1058 _fetchedgoodsize += worker->_blksize;
1061 // make bad workers sleep a little
1063 int maxworkerno = 0;
1065 for (std::list<multifetchworker *>::iterator workeriter = _workers.begin(); workeriter != _workers.end(); ++workeriter)
1067 multifetchworker *oworker = *workeriter;
1068 if (oworker->_state == WORKER_BROKEN)
1070 if (oworker->_avgspeed > maxavg)
1072 maxavg = oworker->_avgspeed;
1073 maxworkerno = oworker->_workerno;
1075 if (oworker->_avgspeed > worker->_avgspeed)
1078 if (maxavg && !_stealing)
1080 double ratio = worker->_avgspeed / maxavg;
1082 if (numbetter < 3) // don't sleep that much if we're in the top two
1083 ratio = ratio * ratio;
1086 XXX << "#" << worker->_workerno << ": too slow ("<< ratio << ", " << worker->_avgspeed << ", #" << maxworkerno << ": " << maxavg << "), going to sleep for " << ratio * 1000 << " ms" << endl;
1087 worker->_sleepuntil = now + ratio;
1088 worker->_state = WORKER_SLEEP;
1094 // do rate control (if requested)
1095 // should use periodavg, but that's not what libcurl does
1096 if (_maxspeed && now > _starttime)
1098 double avg = _fetchedsize / (now - _starttime);
1099 avg = worker->_maxspeed * _maxspeed / avg;
1100 if (avg < _maxspeed / _maxworkers)
1101 avg = _maxspeed / _maxworkers;
1102 if (avg > _maxspeed)
1106 worker->_maxspeed = avg;
1107 #if CURLVERSION_AT_LEAST(7,15,5)
1108 curl_easy_setopt(worker->_curl, CURLOPT_MAX_RECV_SPEED_LARGE, (curl_off_t)(avg));
1116 worker->_state = WORKER_BROKEN;
1118 if (!_activeworkers && !(urliter != urllist.end() && _workers.size() < MAXURLS))
1120 // end of workers reached! goodbye!
1121 worker->evaluateCurlCode(Pathname(), cc, false);
1125 if ( _filesize > 0 && _fetchedgoodsize > _filesize ) {
1126 ZYPP_THROW(MediaFileSizeExceededException(_baseurl, _filesize));
1133 int percent = _totalsize ? (100 * (_fetchedgoodsize + _fetchedsize)) / (_totalsize + _fetchedsize) : 0;
1136 if (now > _starttime)
1137 avg = _fetchedsize / (now - _starttime);
1138 if (!(*(_report))->progress(percent, _baseurl, avg, _lastperiodstart == _starttime ? avg : _periodavg))
1139 ZYPP_THROW(MediaCurlException(_baseurl, "User abort", "cancelled"));
1142 if (_timeout && now - _lastprogress > _timeout)
1147 ZYPP_THROW(MediaTimeoutException(_baseurl));
1149 // print some download stats
1150 WAR << "overall result" << endl;
1151 for (std::list<multifetchworker *>::iterator workeriter = _workers.begin(); workeriter != _workers.end(); ++workeriter)
1153 multifetchworker *worker = *workeriter;
1154 WAR << "#" << worker->_workerno << ": state: " << worker->_state << " received: " << worker->_received << " url: " << worker->_url << endl;
1159 //////////////////////////////////////////////////////////////////////
1162 MediaMultiCurl::MediaMultiCurl(const Url &url_r, const Pathname & attach_point_hint_r)
1163 : MediaCurl(url_r, attach_point_hint_r)
1165 MIL << "MediaMultiCurl::MediaMultiCurl(" << url_r << ", " << attach_point_hint_r << ")" << endl;
1167 _customHeadersMetalink = 0;
1170 MediaMultiCurl::~MediaMultiCurl()
1172 if (_customHeadersMetalink)
1174 curl_slist_free_all(_customHeadersMetalink);
1175 _customHeadersMetalink = 0;
1179 curl_multi_cleanup(_multi);
1182 std::map<std::string, CURL *>::iterator it;
1183 for (it = _easypool.begin(); it != _easypool.end(); it++)
1185 CURL *easy = it->second;
1188 curl_easy_cleanup(easy);
1194 void MediaMultiCurl::setupEasy()
1196 MediaCurl::setupEasy();
1198 if (_customHeadersMetalink)
1200 curl_slist_free_all(_customHeadersMetalink);
1201 _customHeadersMetalink = 0;
1203 struct curl_slist *sl = _customHeaders;
1204 for (; sl; sl = sl->next)
1205 _customHeadersMetalink = curl_slist_append(_customHeadersMetalink, sl->data);
1206 _customHeadersMetalink = curl_slist_append(_customHeadersMetalink, "Accept: */*, application/metalink+xml, application/metalink4+xml");
1209 static bool looks_like_metalink_fd(int fd)
1213 while ((l = pread(fd, buf, sizeof(buf) - 1, (off_t)0)) == -1 && errno == EINTR)
1219 while (*p == ' ' || *p == '\t' || *p == '\r' || *p == '\n')
1221 if (!strncasecmp(p, "<?xml", 5))
1223 while (*p && *p != '>')
1227 while (*p == ' ' || *p == '\t' || *p == '\r' || *p == '\n')
1230 bool ret = !strncasecmp(p, "<metalink", 9) ? true : false;
1234 static bool looks_like_metalink(const Pathname & file)
1237 if ((fd = open(file.asString().c_str(), O_RDONLY|O_CLOEXEC)) == -1)
1239 bool ret = looks_like_metalink_fd(fd);
1241 DBG << "looks_like_metalink(" << file << "): " << ret << endl;
1245 // here we try to suppress all progress coming from a metalink download
1246 // bsc#1021291: Nevertheless send alive trigger (without stats), so UIs
1247 // are able to abort a hanging metalink download via callback response.
1248 int MediaMultiCurl::progressCallback( void *clientp, double dltotal, double dlnow, double ultotal, double ulnow)
1250 CURL *_curl = MediaCurl::progressCallback_getcurl(clientp);
1252 return MediaCurl::aliveCallback(clientp, dltotal, dlnow, ultotal, ulnow);
1254 // bsc#408814: Don't report any sizes before we don't have data on disk. Data reported
1255 // due to redirection etc. are not interesting, but may disturb filesize checks.
1257 if ( curl_easy_getinfo( _curl, CURLINFO_PRIVATE, &fp ) != CURLE_OK || !fp )
1258 return MediaCurl::aliveCallback( clientp, dltotal, dlnow, ultotal, ulnow );
1259 if ( ftell( fp ) == 0 )
1260 return MediaCurl::aliveCallback( clientp, dltotal, 0.0, ultotal, ulnow );
1262 // (no longer needed due to the filesize check above?)
1263 // work around curl bug that gives us old data
1264 long httpReturnCode = 0;
1265 if (curl_easy_getinfo(_curl, CURLINFO_RESPONSE_CODE, &httpReturnCode ) != CURLE_OK || httpReturnCode == 0)
1266 return MediaCurl::aliveCallback(clientp, dltotal, dlnow, ultotal, ulnow);
1269 bool ismetalink = false;
1270 if (curl_easy_getinfo(_curl, CURLINFO_CONTENT_TYPE, &ptr) == CURLE_OK && ptr)
1272 std::string ct = std::string(ptr);
1273 if (ct.find("application/metalink+xml") == 0 || ct.find("application/metalink4+xml") == 0)
1276 if (!ismetalink && dlnow < 256)
1278 // can't tell yet, ...
1279 return MediaCurl::aliveCallback(clientp, dltotal, dlnow, ultotal, ulnow);
1284 ismetalink = looks_like_metalink_fd(fileno(fp));
1285 DBG << "looks_like_metalink_fd: " << ismetalink << endl;
1289 // this is a metalink file change the expected filesize
1290 MediaCurl::resetExpectedFileSize( clientp, ByteCount( 2, ByteCount::MB) );
1291 // we're downloading the metalink file. Just trigger aliveCallbacks
1292 curl_easy_setopt(_curl, CURLOPT_PROGRESSFUNCTION, &MediaCurl::aliveCallback);
1293 return MediaCurl::aliveCallback(clientp, dltotal, dlnow, ultotal, ulnow);
1295 curl_easy_setopt(_curl, CURLOPT_PROGRESSFUNCTION, &MediaCurl::progressCallback);
1296 return MediaCurl::progressCallback(clientp, dltotal, dlnow, ultotal, ulnow);
1299 void MediaMultiCurl::doGetFileCopy( const Pathname & filename , const Pathname & target, callback::SendReport<DownloadProgressReport> & report, const ByteCount &expectedFileSize_r, RequestOptions options ) const
1301 Pathname dest = target.absolutename();
1302 if( assert_dir( dest.dirname() ) )
1304 DBG << "assert_dir " << dest.dirname() << " failed" << endl;
1305 ZYPP_THROW( MediaSystemException(getFileUrl(filename), "System error on " + dest.dirname().asString()) );
1308 ManagedFile destNew { target.extend( ".new.zypp.XXXXXX" ) };
1311 AutoFREE<char> buf { ::strdup( (*destNew).c_str() ) };
1314 ERR << "out of memory for temp file name" << endl;
1315 ZYPP_THROW(MediaSystemException(getFileUrl(filename), "out of memory for temp file name"));
1318 AutoFD tmp_fd { ::mkostemp( buf, O_CLOEXEC ) };
1321 ERR << "mkstemp failed for file '" << destNew << "'" << endl;
1322 ZYPP_THROW(MediaWriteException(destNew));
1324 destNew = ManagedFile( (*buf), filesystem::unlink );
1326 file = ::fdopen( tmp_fd, "we" );
1329 ERR << "fopen failed for file '" << destNew << "'" << endl;
1330 ZYPP_THROW(MediaWriteException(destNew));
1332 tmp_fd.resetDispose(); // don't close it here! ::fdopen moved ownership to file
1335 DBG << "dest: " << dest << endl;
1336 DBG << "temp: " << destNew << endl;
1338 // set IFMODSINCE time condition (no download if not modified)
1339 if( PathInfo(target).isExist() && !(options & OPTION_NO_IFMODSINCE) )
1341 curl_easy_setopt(_curl, CURLOPT_TIMECONDITION, CURL_TIMECOND_IFMODSINCE);
1342 curl_easy_setopt(_curl, CURLOPT_TIMEVALUE, (long)PathInfo(target).mtime());
1346 curl_easy_setopt(_curl, CURLOPT_TIMECONDITION, CURL_TIMECOND_NONE);
1347 curl_easy_setopt(_curl, CURLOPT_TIMEVALUE, 0L);
1349 // change header to include Accept: metalink
1350 curl_easy_setopt(_curl, CURLOPT_HTTPHEADER, _customHeadersMetalink);
1351 // change to our own progress funcion
1352 curl_easy_setopt(_curl, CURLOPT_PROGRESSFUNCTION, &progressCallback);
1353 curl_easy_setopt(_curl, CURLOPT_PRIVATE, (*file) ); // important to pass the FILE* explicitly (passing through varargs)
1356 MediaCurl::doGetFileCopyFile(filename, dest, file, report, expectedFileSize_r, options);
1358 catch (Exception &ex)
1360 curl_easy_setopt(_curl, CURLOPT_TIMECONDITION, CURL_TIMECOND_NONE);
1361 curl_easy_setopt(_curl, CURLOPT_TIMEVALUE, 0L);
1362 curl_easy_setopt(_curl, CURLOPT_HTTPHEADER, _customHeaders);
1363 curl_easy_setopt(_curl, CURLOPT_PRIVATE, (void *)0);
1366 curl_easy_setopt(_curl, CURLOPT_TIMECONDITION, CURL_TIMECOND_NONE);
1367 curl_easy_setopt(_curl, CURLOPT_TIMEVALUE, 0L);
1368 curl_easy_setopt(_curl, CURLOPT_HTTPHEADER, _customHeaders);
1369 curl_easy_setopt(_curl, CURLOPT_PRIVATE, (void *)0);
1370 long httpReturnCode = 0;
1371 CURLcode infoRet = curl_easy_getinfo(_curl, CURLINFO_RESPONSE_CODE, &httpReturnCode);
1372 if (infoRet == CURLE_OK)
1374 DBG << "HTTP response: " + str::numstring(httpReturnCode) << endl;
1375 if ( httpReturnCode == 304
1376 || ( httpReturnCode == 213 && _url.getScheme() == "ftp" ) ) // not modified
1378 DBG << "not modified: " << PathInfo(dest) << endl;
1384 WAR << "Could not get the reponse code." << endl;
1387 bool ismetalink = false;
1390 if (curl_easy_getinfo(_curl, CURLINFO_CONTENT_TYPE, &ptr) == CURLE_OK && ptr)
1392 std::string ct = std::string(ptr);
1393 if (ct.find("application/metalink+xml") == 0 || ct.find("application/metalink4+xml") == 0)
1399 // some proxies do not store the content type, so also look at the file to find
1400 // out if we received a metalink (bnc#649925)
1402 if (looks_like_metalink(destNew))
1408 bool userabort = false;
1409 Pathname failedFile = ZConfig::instance().repoCachePath() / "MultiCurl.failed";
1410 file = nullptr; // explicitly close destNew before the parser reads it.
1415 MediaBlockList bl = mlp.getBlockList();
1416 std::vector<Url> urls = mlp.getUrls();
1418 file = fopen((*destNew).c_str(), "w+e");
1420 ZYPP_THROW(MediaWriteException(destNew));
1421 if (PathInfo(target).isExist())
1423 XXX << "reusing blocks from file " << target << endl;
1424 bl.reuseBlocks(file, target.asString());
1427 if (bl.haveChecksum(1) && PathInfo(failedFile).isExist())
1429 XXX << "reusing blocks from file " << failedFile << endl;
1430 bl.reuseBlocks(file, failedFile.asString());
1432 filesystem::unlink(failedFile);
1434 Pathname df = deltafile();
1437 XXX << "reusing blocks from file " << df << endl;
1438 bl.reuseBlocks(file, df.asString());
1443 multifetch(filename, file, &urls, &report, &bl, expectedFileSize_r);
1445 catch (MediaCurlException &ex)
1447 userabort = ex.errstr() == "User abort";
1451 catch (MediaFileSizeExceededException &ex) {
1454 catch (Exception &ex)
1456 // something went wrong. fall back to normal download
1457 file = nullptr; // explicitly close destNew before moving it
1458 if (PathInfo(destNew).size() >= 63336)
1460 ::unlink(failedFile.asString().c_str());
1461 filesystem::hardlinkCopy(destNew, failedFile);
1467 file = fopen((*destNew).c_str(), "w+e");
1469 ZYPP_THROW(MediaWriteException(destNew));
1470 MediaCurl::doGetFileCopyFile(filename, dest, file, report, expectedFileSize_r, options | OPTION_NO_REPORT_START);
1474 if (::fchmod( ::fileno(file), filesystem::applyUmaskTo( 0644 )))
1476 ERR << "Failed to chmod file " << destNew << endl;
1479 file.resetDispose(); // we're going to close it manually here
1482 filesystem::unlink(destNew);
1483 ERR << "Fclose failed for file '" << destNew << "'" << endl;
1484 ZYPP_THROW(MediaWriteException(destNew));
1487 if ( rename( destNew, dest ) != 0 )
1489 ERR << "Rename failed" << endl;
1490 ZYPP_THROW(MediaWriteException(dest));
1492 destNew.resetDispose(); // no more need to unlink it
1494 DBG << "done: " << PathInfo(dest) << endl;
1497 void MediaMultiCurl::multifetch(const Pathname & filename, FILE *fp, std::vector<Url> *urllist, callback::SendReport<DownloadProgressReport> *report, MediaBlockList *blklist, off_t filesize) const
1499 Url baseurl(getFileUrl(filename));
1500 if (blklist && filesize == off_t(-1) && blklist->haveFilesize())
1501 filesize = blklist->getFilesize();
1502 if (blklist && !blklist->haveBlocks() && filesize != 0)
1504 if (blklist && (filesize == 0 || !blklist->numBlocks()))
1506 checkFileDigest(baseurl, fp, blklist);
1513 _multi = curl_multi_init();
1515 ZYPP_THROW(MediaCurlInitException(baseurl));
1518 multifetchrequest req(this, filename, baseurl, _multi, fp, report, blklist, filesize);
1519 req._timeout = _settings.timeout();
1520 req._connect_timeout = _settings.connectTimeout();
1521 req._maxspeed = _settings.maxDownloadSpeed();
1522 req._maxworkers = _settings.maxConcurrentConnections();
1523 if (req._maxworkers > MAXURLS)
1524 req._maxworkers = MAXURLS;
1525 if (req._maxworkers <= 0)
1526 req._maxworkers = 1;
1527 std::vector<Url> myurllist;
1528 for (std::vector<Url>::iterator urliter = urllist->begin(); urliter != urllist->end(); ++urliter)
1532 std::string scheme = urliter->getScheme();
1533 if (scheme == "http" || scheme == "https" || scheme == "ftp" || scheme == "tftp")
1535 checkProtocol(*urliter);
1536 myurllist.push_back(internal::propagateQueryParams(*urliter, _url));
1543 if (!myurllist.size())
1544 myurllist.push_back(baseurl);
1546 checkFileDigest(baseurl, fp, blklist);
1549 void MediaMultiCurl::checkFileDigest(Url &url, FILE *fp, MediaBlockList *blklist) const
1551 if (!blklist || !blklist->haveFileChecksum())
1553 if (fseeko(fp, off_t(0), SEEK_SET))
1554 ZYPP_THROW(MediaCurlException(url, "fseeko", "seek error"));
1556 blklist->createFileDigest(dig);
1559 while ((l = fread(buf, 1, sizeof(buf), fp)) > 0)
1561 if (!blklist->verifyFileDigest(dig))
1562 ZYPP_THROW(MediaCurlException(url, "file verification failed", "checksum error"));
1565 bool MediaMultiCurl::isDNSok(const std::string &host) const
1567 return _dnsok.find(host) == _dnsok.end() ? false : true;
1570 void MediaMultiCurl::setDNSok(const std::string &host) const
1572 _dnsok.insert(host);
1575 CURL *MediaMultiCurl::fromEasyPool(const std::string &host) const
1577 if (_easypool.find(host) == _easypool.end())
1579 CURL *ret = _easypool[host];
1580 _easypool.erase(host);
1584 void MediaMultiCurl::toEasyPool(const std::string &host, CURL *easy) const
1586 CURL *oldeasy = _easypool[host];
1587 _easypool[host] = easy;
1589 curl_easy_cleanup(oldeasy);
1592 } // namespace media