1 /***************************************************************************
3 * Project ___| | | | _ \| |
5 * | (__| |_| | _ <| |___
6 * \___|\___/|_| \_\_____|
8 * Copyright (C) 1998 - 2020, Daniel Stenberg, <daniel@haxx.se>, et al.
10 * This software is licensed as described in the file COPYING, which
11 * you should have received as part of this distribution. The terms
12 * are also available at https://curl.haxx.se/docs/copyright.html.
14 * You may opt to use, copy, modify, merge, publish, distribute and/or sell
15 * copies of the Software, and permit persons to whom the Software is
16 * furnished to do so, under the terms of the COPYING file.
18 * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
19 * KIND, either express or implied.
21 ***************************************************************************/
23 #include "curl_setup.h"
26 #include "urlapi-int.h"
31 #include "curl_ctype.h"
32 #include "inet_pton.h"
34 /* The last 3 #include files should be in this order */
35 #include "curl_printf.h"
36 #include "curl_memory.h"
39 /* MSDOS/Windows style drive prefix, eg c: in c:foo */
40 #define STARTS_WITH_DRIVE_PREFIX(str) \
41 ((('a' <= str[0] && str[0] <= 'z') || \
42 ('A' <= str[0] && str[0] <= 'Z')) && \
45 /* MSDOS/Windows style drive prefix, optionally with
46 * a '|' instead of ':', followed by a slash or NUL */
47 #define STARTS_WITH_URL_DRIVE_PREFIX(str) \
48 ((('a' <= (str)[0] && (str)[0] <= 'z') || \
49 ('A' <= (str)[0] && (str)[0] <= 'Z')) && \
50 ((str)[1] == ':' || (str)[1] == '|') && \
51 ((str)[2] == '/' || (str)[2] == '\\' || (str)[2] == 0))
53 /* Internal representation of CURLU. Point to URL-encoded strings. */
58 char *options; /* IMAP only? */
60 char *zoneid; /* for numerical IPv6 addresses */
66 char *scratch; /* temporary scratch area */
67 char *temppath; /* temporary path pointer */
68 long portnum; /* the numerical version */
71 #define DEFAULT_SCHEME "https"
73 static void free_urlhandle(struct Curl_URL *u)
89 /* move the full contents of one handle onto another and
91 static void mv_urlhandle(struct Curl_URL *from,
100 * Find the separator at the end of the host name, or the '?' in cases like
101 * http://www.url.com?id=2380
103 static const char *find_host_sep(const char *url)
108 /* Find the start of the hostname */
109 sep = strstr(url, "//");
115 query = strchr(sep, '?');
116 sep = strchr(sep, '/');
119 sep = url + strlen(url);
122 query = url + strlen(url);
124 return sep < query ? sep : query;
128 * Decide in an encoding-independent manner whether a character in an
129 * URL must be escaped. The same criterion must be used in strlen_url()
132 static bool urlchar_needs_escaping(int c)
134 return !(ISCNTRL(c) || ISSPACE(c) || ISGRAPH(c));
138 * strlen_url() returns the length of the given URL if the spaces within the
139 * URL were properly URL encoded.
140 * URL encoding should be skipped for host names, otherwise IDN resolution
143 static size_t strlen_url(const char *url, bool relative)
145 const unsigned char *ptr;
147 bool left = TRUE; /* left side of the ? */
148 const unsigned char *host_sep = (const unsigned char *) url;
151 host_sep = (const unsigned char *) find_host_sep(url);
153 for(ptr = (unsigned char *)url; *ptr; ptr++) {
165 if(urlchar_needs_escaping(*ptr))
180 /* strcpy_url() copies a url to a output buffer and URL-encodes the spaces in
181 * the source URL accordingly.
182 * URL encoding should be skipped for host names, otherwise IDN resolution
185 static void strcpy_url(char *output, const char *url, bool relative)
187 /* we must add this with whitespace-replacing */
189 const unsigned char *iptr;
191 const unsigned char *host_sep = (const unsigned char *) url;
194 host_sep = (const unsigned char *) find_host_sep(url);
196 for(iptr = (unsigned char *)url; /* read from here */
197 *iptr; /* until zero byte */
200 if(iptr < host_sep) {
210 if(urlchar_needs_escaping(*iptr)) {
211 msnprintf(optr, 4, "%%%02x", *iptr);
219 *optr++='%'; /* add a '%' */
220 *optr++='2'; /* add a '2' */
221 *optr++='0'; /* add a '0' */
224 *optr++='+'; /* add a '+' here */
228 *optr = 0; /* null-terminate output buffer */
233 * Returns true if the given URL is absolute (as opposed to relative) within
234 * the buffer size. Returns the scheme in the buffer if TRUE and 'buf' is
237 bool Curl_is_absolute_url(const char *url, char *buf, size_t buflen)
241 if(STARTS_WITH_DRIVE_PREFIX(url))
244 for(i = 0; i < buflen && url[i]; ++i) {
246 if((s == ':') && (url[i + 1] == '/')) {
251 /* RFC 3986 3.1 explains:
252 scheme = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
254 else if(ISALNUM(s) || (s == '+') || (s == '-') || (s == '.') ) {
256 buf[i] = (char)TOLOWER(s);
265 * Concatenate a relative URL to a base URL making it absolute.
266 * URL-encodes any spaces.
267 * The returned pointer must be freed by the caller unless NULL
268 * (returns NULL on out of memory).
270 static char *concat_url(const char *base, const char *relurl)
273 TRY to append this new path to the old URL
274 to the right of the host part. Oh crap, this is doomed to cause
275 problems in the future...
281 bool host_changed = FALSE;
283 const char *useurl = relurl;
286 /* we must make our own copy of the URL to play with, as it may
287 point to read-only data */
288 char *url_clone = strdup(base);
291 return NULL; /* skip out of this NOW */
293 /* protsep points to the start of the host name */
294 protsep = strstr(url_clone, "//");
298 protsep += 2; /* pass the slashes */
300 if('/' != relurl[0]) {
303 /* First we need to find out if there's a ?-letter in the URL,
304 and cut it and the right-side of that off */
305 pathsep = strchr(protsep, '?');
309 /* we have a relative path to append to the last slash if there's one
310 available, or if the new URL is just a query string (starts with a
311 '?') we append the new one at the end of the entire currently worked
313 if(useurl[0] != '?') {
314 pathsep = strrchr(protsep, '/');
319 /* Check if there's any slash after the host name, and if so, remember
320 that position instead */
321 pathsep = strchr(protsep, '/');
323 protsep = pathsep + 1;
327 /* now deal with one "./" or any amount of "../" in the newurl
328 and act accordingly */
330 if((useurl[0] == '.') && (useurl[1] == '/'))
331 useurl += 2; /* just skip the "./" */
333 while((useurl[0] == '.') &&
334 (useurl[1] == '.') &&
335 (useurl[2] == '/')) {
337 useurl += 3; /* pass the "../" */
342 /* cut off one more level from the right of the original URL */
343 pathsep = strrchr(protsep, '/');
354 /* We got a new absolute path for this server */
356 if(relurl[1] == '/') {
357 /* the new URL starts with //, just keep the protocol part from the
360 useurl = &relurl[2]; /* we keep the slashes from the original, so we
365 /* cut off the original URL from the first slash, or deal with URLs
367 pathsep = strchr(protsep, '/');
369 /* When people use badly formatted URLs, such as
370 "http://www.url.com?dir=/home/daniel" we must not use the first
371 slash, if there's a ?-letter before it! */
372 char *sep = strchr(protsep, '?');
373 if(sep && (sep < pathsep))
378 /* There was no slash. Now, since we might be operating on a badly
379 formatted URL, such as "http://www.url.com?id=2380" which doesn't
380 use a slash separator as it is supposed to, we need to check for a
382 pathsep = strchr(protsep, '?');
389 /* If the new part contains a space, this is a mighty stupid redirect
390 but we still make an effort to do "right". To the left of a '?'
391 letter we replace each space with %20 while it is replaced with '+'
392 on the right side of the '?' letter.
394 newlen = strlen_url(useurl, !host_changed);
396 urllen = strlen(url_clone);
398 newest = malloc(urllen + 1 + /* possible slash */
399 newlen + 1 /* zero byte */);
402 free(url_clone); /* don't leak this */
406 /* copy over the root url part */
407 memcpy(newest, url_clone, urllen);
409 /* check if we need to append a slash */
410 if(('/' == useurl[0]) || (protsep && !*protsep) || ('?' == useurl[0]))
413 newest[urllen++]='/';
415 /* then append the new piece on the right side */
416 strcpy_url(&newest[urllen], useurl, !host_changed);
424 * parse_hostname_login()
426 * Parse the login details (user name, password and options) from the URL and
427 * strip them out of the host name
430 static CURLUcode parse_hostname_login(struct Curl_URL *u,
434 CURLUcode result = CURLUE_OK;
437 char *passwdp = NULL;
438 char *optionsp = NULL;
439 const struct Curl_handler *h = NULL;
441 /* At this point, we're hoping all the other special cases have
442 * been taken care of, so conn->host.name is at most
443 * [user[:password][;options]]@]hostname
445 * We need somewhere to put the embedded details, so do that first.
448 char *ptr = strchr(*hostname, '@');
449 char *login = *hostname;
454 /* We will now try to extract the
455 * possible login information in a string like:
456 * ftp://user:password@ftp.my.site:8021/README */
459 /* if this is a known scheme, get some details */
461 h = Curl_builtin_scheme(u->scheme);
463 /* We could use the login information in the URL so extract it. Only parse
464 options if the handler says we should. Note that 'h' might be NULL! */
465 ccode = Curl_parse_login_details(login, ptr - login - 1,
467 (h && (h->flags & PROTOPT_URLOPTIONS)) ?
470 result = CURLUE_MALFORMED_INPUT;
475 if(flags & CURLU_DISALLOW_USER) {
476 /* Option DISALLOW_USER is set and url contains username. */
477 result = CURLUE_USER_NOT_ALLOWED;
485 u->password = passwdp;
488 u->options = optionsp;
500 UNITTEST CURLUcode Curl_parse_port(struct Curl_URL *u, char *hostname)
502 char *portptr = NULL;
507 * Find the end of an IPv6 address, either on the ']' ending bracket or
508 * a percent-encoded zone index.
510 if(1 == sscanf(hostname, "[%*45[0123456789abcdefABCDEF:.]%c%n",
511 &endbracket, &len)) {
512 if(']' == endbracket)
513 portptr = &hostname[len];
514 else if('%' == endbracket) {
516 if(1 == sscanf(hostname + zonelen, "%*[^]]%c%n", &endbracket, &len)) {
517 if(']' != endbracket)
518 return CURLUE_MALFORMED_INPUT;
519 portptr = &hostname[--zonelen + len + 1];
522 return CURLUE_MALFORMED_INPUT;
525 return CURLUE_MALFORMED_INPUT;
527 /* this is a RFC2732-style specified IP-address */
528 if(portptr && *portptr) {
530 return CURLUE_MALFORMED_INPUT;
536 portptr = strchr(hostname, ':');
543 /* Browser behavior adaptation. If there's a colon with no digits after,
544 just cut off the name there which makes us ignore the colon and just
545 use the default port. Firefox, Chrome and Safari all do that. */
551 if(!ISDIGIT(portptr[1]))
552 return CURLUE_BAD_PORT_NUMBER;
554 port = strtol(portptr + 1, &rest, 10); /* Port number must be decimal */
556 if((port <= 0) || (port > 0xffff))
557 /* Single unix standard says port numbers are 16 bits long, but we don't
558 treat port zero as OK. */
559 return CURLUE_BAD_PORT_NUMBER;
562 return CURLUE_BAD_PORT_NUMBER;
564 *portptr++ = '\0'; /* cut off the name there */
566 /* generate a new port number string to get rid of leading zeroes etc */
567 msnprintf(portbuf, sizeof(portbuf), "%ld", port);
569 u->port = strdup(portbuf);
571 return CURLUE_OUT_OF_MEMORY;
577 /* scan for byte values < 31 or 127 */
578 static CURLUcode junkscan(const char *part)
581 static const char badbytes[]={
582 /* */ 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
583 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
584 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
585 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f,
587 0x00 /* null-terminate */
589 size_t n = strlen(part);
590 size_t nfine = strcspn(part, badbytes);
592 /* since we don't know which part is scanned, return a generic error
594 return CURLUE_MALFORMED_INPUT;
599 static CURLUcode hostname_check(struct Curl_URL *u, char *hostname)
602 size_t hlen = strlen(hostname);
604 if(hostname[0] == '[') {
606 char dest[16]; /* fits a binary IPv6 address */
608 const char *l = "0123456789abcdefABCDEF:.";
609 if(hlen < 4) /* '[::]' is the shortest possible valid string */
610 return CURLUE_MALFORMED_INPUT;
614 if(hostname[hlen] != ']')
615 return CURLUE_MALFORMED_INPUT;
617 /* only valid letters are ok */
618 len = strspn(hostname, l);
621 if(hostname[len] == '%') {
622 /* this could now be '%[zone id]' */
625 char *h = &hostname[len + 1];
626 /* pass '25' if present and is a url encoded percent sign */
627 if(!strncmp(h, "25", 2) && h[2] && (h[2] != ']'))
629 while(*h && (*h != ']') && (i < 15))
631 if(!i || (']' != *h))
632 return CURLUE_MALFORMED_INPUT;
634 u->zoneid = strdup(zoneid);
636 return CURLUE_OUT_OF_MEMORY;
637 hostname[len] = ']'; /* insert end bracket */
638 hostname[len + 1] = 0; /* terminate the hostname */
641 return CURLUE_MALFORMED_INPUT;
642 /* hostname is fine */
645 hostname[hlen] = 0; /* end the address there */
646 if(1 != Curl_inet_pton(AF_INET6, hostname, dest))
647 return CURLUE_MALFORMED_INPUT;
648 hostname[hlen] = ']'; /* restore ending bracket */
652 /* letters from the second string is not ok */
653 len = strcspn(hostname, " ");
655 /* hostname with bad content */
656 return CURLUE_MALFORMED_INPUT;
659 return CURLUE_NO_HOST;
663 #define HOSTNAME_END(x) (((x) == '/') || ((x) == '?') || ((x) == '#'))
665 static CURLUcode seturl(const char *url, CURLU *u, unsigned int flags)
668 bool path_alloced = FALSE;
671 char *fragment = NULL;
673 bool url_has_scheme = FALSE;
674 char schemebuf[MAX_SCHEME_LEN + 1];
675 const char *schemep = NULL;
676 size_t schemelen = 0;
680 return CURLUE_MALFORMED_INPUT;
682 /*************************************************************
684 ************************************************************/
685 /* allocate scratch area */
686 urllen = strlen(url);
687 if(urllen > CURL_MAX_INPUT_LENGTH)
688 /* excessive input length */
689 return CURLUE_MALFORMED_INPUT;
691 path = u->scratch = malloc(urllen * 2 + 2);
693 return CURLUE_OUT_OF_MEMORY;
695 hostname = &path[urllen + 1];
698 if(Curl_is_absolute_url(url, schemebuf, sizeof(schemebuf))) {
699 url_has_scheme = TRUE;
700 schemelen = strlen(schemebuf);
703 /* handle the file: scheme */
704 if(url_has_scheme && strcasecompare(schemebuf, "file")) {
705 /* path has been allocated large enough to hold this */
706 strcpy(path, &url[5]);
708 hostname = NULL; /* no host for file: URLs */
709 u->scheme = strdup("file");
711 return CURLUE_OUT_OF_MEMORY;
713 /* Extra handling URLs with an authority component (i.e. that start with
716 * We allow omitted hostname (e.g. file:/<path>) -- valid according to
717 * RFC 8089, but not the (current) WHAT-WG URL spec.
719 if(path[0] == '/' && path[1] == '/') {
720 /* swallow the two slashes */
721 char *ptr = &path[2];
724 * According to RFC 8089, a file: URL can be reliably dereferenced if:
726 * o it has no/blank hostname, or
728 * o the hostname matches "localhost" (case-insensitively), or
730 * o the hostname is a FQDN that resolves to this machine.
732 * For brevity, we only consider URLs with empty, "localhost", or
733 * "127.0.0.1" hostnames as local.
735 * Additionally, there is an exception for URLs with a Windows drive
736 * letter in the authority (which was accidentally omitted from RFC 8089
737 * Appendix E, but believe me, it was meant to be there. --MK)
739 if(ptr[0] != '/' && !STARTS_WITH_URL_DRIVE_PREFIX(ptr)) {
740 /* the URL includes a host name, it must match "localhost" or
741 "127.0.0.1" to be valid */
742 if(!checkprefix("localhost/", ptr) &&
743 !checkprefix("127.0.0.1/", ptr)) {
744 /* Invalid file://hostname/, expected localhost or 127.0.0.1 or
746 return CURLUE_MALFORMED_INPUT;
748 ptr += 9; /* now points to the slash after the host */
754 #if !defined(MSDOS) && !defined(WIN32) && !defined(__CYGWIN__)
755 /* Don't allow Windows drive letters when not in Windows.
756 * This catches both "file:/c:" and "file:c:" */
757 if(('/' == path[0] && STARTS_WITH_URL_DRIVE_PREFIX(&path[1])) ||
758 STARTS_WITH_URL_DRIVE_PREFIX(path)) {
759 /* File drive letters are only accepted in MSDOS/Windows */
760 return CURLUE_MALFORMED_INPUT;
763 /* If the path starts with a slash and a drive letter, ditch the slash */
764 if('/' == path[0] && STARTS_WITH_URL_DRIVE_PREFIX(&path[1])) {
765 /* This cannot be done with strcpy, as the memory chunks overlap! */
766 memmove(path, &path[1], strlen(&path[1]) + 1);
780 p = &url[schemelen + 1];
781 while(p && (*p == '/') && (i < 4)) {
786 /* less than one or more than three slashes */
787 return CURLUE_MALFORMED_INPUT;
790 if(!Curl_builtin_scheme(schemep) &&
791 !(flags & CURLU_NON_SUPPORT_SCHEME))
792 return CURLUE_UNSUPPORTED_SCHEME;
794 if(junkscan(schemep))
795 return CURLUE_MALFORMED_INPUT;
801 if(!(flags & (CURLU_DEFAULT_SCHEME|CURLU_GUESS_SCHEME)))
802 return CURLUE_MALFORMED_INPUT;
803 if(flags & CURLU_DEFAULT_SCHEME)
804 schemep = DEFAULT_SCHEME;
807 * The URL was badly formatted, let's try without scheme specified.
811 hostp = p; /* host name starts here */
813 while(*p && !HOSTNAME_END(*p)) /* find end of host name */
818 memcpy(hostname, hostp, len);
822 if(!(flags & CURLU_NO_AUTHORITY))
823 return CURLUE_MALFORMED_INPUT;
827 memcpy(path, p, len);
831 u->scheme = strdup(schemep);
833 return CURLUE_OUT_OF_MEMORY;
838 return CURLUE_MALFORMED_INPUT;
840 if((flags & CURLU_URLENCODE) && path[0]) {
841 /* worst case output length is 3x the original! */
842 char *newp = malloc(strlen(path) * 3);
844 return CURLUE_OUT_OF_MEMORY;
846 strcpy_url(newp, path, TRUE); /* consider it relative */
847 u->temppath = path = newp;
850 fragment = strchr(path, '#');
854 u->fragment = strdup(fragment);
856 return CURLUE_OUT_OF_MEMORY;
860 query = strchr(path, '?');
863 /* done even if the query part is a blank string */
864 u->query = strdup(query);
866 return CURLUE_OUT_OF_MEMORY;
870 /* if there's no path left set, unset */
873 if(!(flags & CURLU_PATH_AS_IS)) {
874 /* remove ../ and ./ sequences according to RFC3986 */
875 char *newp = Curl_dedotdotify(path);
877 return CURLUE_OUT_OF_MEMORY;
879 if(strcmp(newp, path)) {
880 /* if we got a new version */
882 Curl_safefree(u->temppath);
883 u->temppath = path = newp;
890 u->path = path_alloced?path:strdup(path);
892 return CURLUE_OUT_OF_MEMORY;
893 u->temppath = NULL; /* used now */
898 * Parse the login details and strip them out of the host name.
900 if(junkscan(hostname))
901 return CURLUE_MALFORMED_INPUT;
903 result = parse_hostname_login(u, &hostname, flags);
907 result = Curl_parse_port(u, hostname);
911 if(0 == strlen(hostname) && (flags & CURLU_NO_AUTHORITY)) {
912 /* Skip hostname check, it's allowed to be empty. */
915 result = hostname_check(u, hostname);
920 u->host = strdup(hostname);
922 return CURLUE_OUT_OF_MEMORY;
924 if((flags & CURLU_GUESS_SCHEME) && !schemep) {
925 /* legacy curl-style guess based on host name */
926 if(checkprefix("ftp.", hostname))
928 else if(checkprefix("dict.", hostname))
930 else if(checkprefix("ldap.", hostname))
932 else if(checkprefix("imap.", hostname))
934 else if(checkprefix("smtp.", hostname))
936 else if(checkprefix("pop3.", hostname))
941 u->scheme = strdup(schemep);
943 return CURLUE_OUT_OF_MEMORY;
947 Curl_safefree(u->scratch);
948 Curl_safefree(u->temppath);
954 * Parse the URL and set the relevant members of the Curl_URL struct.
956 static CURLUcode parseurl(const char *url, CURLU *u, unsigned int flags)
958 CURLUcode result = seturl(url, u, flags);
961 memset(u, 0, sizeof(struct Curl_URL));
968 CURLU *curl_url(void)
970 return calloc(sizeof(struct Curl_URL), 1);
973 void curl_url_cleanup(CURLU *u)
981 #define DUP(dest, src, name) \
983 dest->name = strdup(src->name); \
988 CURLU *curl_url_dup(CURLU *in)
990 struct Curl_URL *u = calloc(sizeof(struct Curl_URL), 1);
994 DUP(u, in, password);
1000 DUP(u, in, fragment);
1001 u->portnum = in->portnum;
1005 curl_url_cleanup(u);
1009 CURLUcode curl_url_get(CURLU *u, CURLUPart what,
1010 char **part, unsigned int flags)
1013 CURLUcode ifmissing = CURLUE_UNKNOWN_PART;
1015 bool urldecode = (flags & CURLU_URLDECODE)?1:0;
1016 bool plusdecode = FALSE;
1019 return CURLUE_BAD_HANDLE;
1021 return CURLUE_BAD_PARTPOINTER;
1025 case CURLUPART_SCHEME:
1027 ifmissing = CURLUE_NO_SCHEME;
1028 urldecode = FALSE; /* never for schemes */
1030 case CURLUPART_USER:
1032 ifmissing = CURLUE_NO_USER;
1034 case CURLUPART_PASSWORD:
1036 ifmissing = CURLUE_NO_PASSWORD;
1038 case CURLUPART_OPTIONS:
1040 ifmissing = CURLUE_NO_OPTIONS;
1042 case CURLUPART_HOST:
1044 ifmissing = CURLUE_NO_HOST;
1046 case CURLUPART_ZONEID:
1049 case CURLUPART_PORT:
1051 ifmissing = CURLUE_NO_PORT;
1052 urldecode = FALSE; /* never for port */
1053 if(!ptr && (flags & CURLU_DEFAULT_PORT) && u->scheme) {
1054 /* there's no stored port number, but asked to deliver
1055 a default one for the scheme */
1056 const struct Curl_handler *h =
1057 Curl_builtin_scheme(u->scheme);
1059 msnprintf(portbuf, sizeof(portbuf), "%ld", h->defport);
1063 else if(ptr && u->scheme) {
1064 /* there is a stored port number, but ask to inhibit if
1065 it matches the default one for the scheme */
1066 const struct Curl_handler *h =
1067 Curl_builtin_scheme(u->scheme);
1068 if(h && (h->defport == u->portnum) &&
1069 (flags & CURLU_NO_DEFAULT_PORT))
1073 case CURLUPART_PATH:
1076 ptr = u->path = strdup("/");
1078 return CURLUE_OUT_OF_MEMORY;
1081 case CURLUPART_QUERY:
1083 ifmissing = CURLUE_NO_QUERY;
1084 plusdecode = urldecode;
1086 case CURLUPART_FRAGMENT:
1088 ifmissing = CURLUE_NO_FRAGMENT;
1090 case CURLUPART_URL: {
1093 char *options = u->options;
1094 char *port = u->port;
1095 char *allochost = NULL;
1096 if(u->scheme && strcasecompare("file", u->scheme)) {
1097 url = aprintf("file://%s%s%s",
1099 u->fragment? "#": "",
1100 u->fragment? u->fragment : "");
1103 return CURLUE_NO_HOST;
1105 const struct Curl_handler *h = NULL;
1108 else if(flags & CURLU_DEFAULT_SCHEME)
1109 scheme = (char *) DEFAULT_SCHEME;
1111 return CURLUE_NO_SCHEME;
1113 h = Curl_builtin_scheme(scheme);
1114 if(!port && (flags & CURLU_DEFAULT_PORT)) {
1115 /* there's no stored port number, but asked to deliver
1116 a default one for the scheme */
1118 msnprintf(portbuf, sizeof(portbuf), "%ld", h->defport);
1123 /* there is a stored port number, but asked to inhibit if it matches
1124 the default one for the scheme */
1125 if(h && (h->defport == u->portnum) &&
1126 (flags & CURLU_NO_DEFAULT_PORT))
1130 if(h && !(h->flags & PROTOPT_URLOPTIONS))
1133 if((u->host[0] == '[') && u->zoneid) {
1134 /* make it '[ host %25 zoneid ]' */
1135 size_t hostlen = strlen(u->host);
1136 size_t alen = hostlen + 3 + strlen(u->zoneid) + 1;
1137 allochost = malloc(alen);
1139 return CURLUE_OUT_OF_MEMORY;
1140 memcpy(allochost, u->host, hostlen - 1);
1141 msnprintf(&allochost[hostlen - 1], alen - hostlen + 1,
1142 "%%25%s]", u->zoneid);
1145 url = aprintf("%s://%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
1147 u->user ? u->user : "",
1148 u->password ? ":": "",
1149 u->password ? u->password : "",
1151 options ? options : "",
1152 (u->user || u->password || options) ? "@": "",
1153 allochost ? allochost : u->host,
1156 (u->path && (u->path[0] != '/')) ? "/": "",
1157 u->path ? u->path : "/",
1158 (u->query && u->query[0]) ? "?": "",
1159 (u->query && u->query[0]) ? u->query : "",
1160 u->fragment? "#": "",
1161 u->fragment? u->fragment : "");
1165 return CURLUE_OUT_OF_MEMORY;
1174 *part = strdup(ptr);
1176 return CURLUE_OUT_OF_MEMORY;
1178 /* convert + to space */
1180 for(plus = *part; *plus; ++plus) {
1188 /* this unconditional rejection of control bytes is documented
1190 CURLcode res = Curl_urldecode(NULL, *part, 0, &decoded, &dlen,
1195 return CURLUE_URLDECODE;
1205 CURLUcode curl_url_set(CURLU *u, CURLUPart what,
1206 const char *part, unsigned int flags)
1208 char **storep = NULL;
1210 bool urlencode = (flags & CURLU_URLENCODE)? 1 : 0;
1211 bool plusencode = FALSE;
1212 bool urlskipslash = FALSE;
1213 bool appendquery = FALSE;
1214 bool equalsencode = FALSE;
1217 return CURLUE_BAD_HANDLE;
1219 /* setting a part to NULL clears it */
1223 case CURLUPART_SCHEME:
1224 storep = &u->scheme;
1226 case CURLUPART_USER:
1229 case CURLUPART_PASSWORD:
1230 storep = &u->password;
1232 case CURLUPART_OPTIONS:
1233 storep = &u->options;
1235 case CURLUPART_HOST:
1238 case CURLUPART_ZONEID:
1239 storep = &u->zoneid;
1241 case CURLUPART_PORT:
1245 case CURLUPART_PATH:
1248 case CURLUPART_QUERY:
1251 case CURLUPART_FRAGMENT:
1252 storep = &u->fragment;
1255 return CURLUE_UNKNOWN_PART;
1257 if(storep && *storep) {
1258 Curl_safefree(*storep);
1264 case CURLUPART_SCHEME:
1265 if(strlen(part) > MAX_SCHEME_LEN)
1267 return CURLUE_MALFORMED_INPUT;
1268 if(!(flags & CURLU_NON_SUPPORT_SCHEME) &&
1269 /* verify that it is a fine scheme */
1270 !Curl_builtin_scheme(part))
1271 return CURLUE_UNSUPPORTED_SCHEME;
1272 storep = &u->scheme;
1273 urlencode = FALSE; /* never */
1275 case CURLUPART_USER:
1278 case CURLUPART_PASSWORD:
1279 storep = &u->password;
1281 case CURLUPART_OPTIONS:
1282 storep = &u->options;
1284 case CURLUPART_HOST:
1286 Curl_safefree(u->zoneid);
1288 case CURLUPART_ZONEID:
1289 storep = &u->zoneid;
1291 case CURLUPART_PORT:
1294 urlencode = FALSE; /* never */
1295 port = strtol(part, &endp, 10); /* Port number must be decimal */
1296 if((port <= 0) || (port > 0xffff))
1297 return CURLUE_BAD_PORT_NUMBER;
1299 /* weirdly provided number, not good! */
1300 return CURLUE_MALFORMED_INPUT;
1304 case CURLUPART_PATH:
1305 urlskipslash = TRUE;
1308 case CURLUPART_QUERY:
1309 plusencode = urlencode;
1310 appendquery = (flags & CURLU_APPENDQUERY)?1:0;
1311 equalsencode = appendquery;
1314 case CURLUPART_FRAGMENT:
1315 storep = &u->fragment;
1317 case CURLUPART_URL: {
1319 * Allow a new URL to replace the existing (if any) contents.
1321 * If the existing contents is enough for a URL, allow a relative URL to
1329 if(Curl_is_absolute_url(part, NULL, MAX_SCHEME_LEN + 1)) {
1330 handle2 = curl_url();
1332 return CURLUE_OUT_OF_MEMORY;
1333 result = parseurl(part, handle2, flags);
1335 mv_urlhandle(handle2, u);
1337 curl_url_cleanup(handle2);
1340 /* extract the full "old" URL to do the redirect on */
1341 result = curl_url_get(u, CURLUPART_URL, &oldurl, flags);
1343 /* couldn't get the old URL, just use the new! */
1344 handle2 = curl_url();
1346 return CURLUE_OUT_OF_MEMORY;
1347 result = parseurl(part, handle2, flags);
1349 mv_urlhandle(handle2, u);
1351 curl_url_cleanup(handle2);
1355 /* apply the relative part to create a new URL */
1356 redired_url = concat_url(oldurl, part);
1359 return CURLUE_OUT_OF_MEMORY;
1361 /* now parse the new URL */
1362 handle2 = curl_url();
1365 return CURLUE_OUT_OF_MEMORY;
1367 result = parseurl(redired_url, handle2, flags);
1370 mv_urlhandle(handle2, u);
1372 curl_url_cleanup(handle2);
1376 return CURLUE_UNKNOWN_PART;
1378 DEBUGASSERT(storep);
1380 const char *newp = part;
1381 size_t nalloc = strlen(part);
1383 if(nalloc > CURL_MAX_INPUT_LENGTH)
1384 /* excessive input length */
1385 return CURLUE_MALFORMED_INPUT;
1388 const unsigned char *i;
1390 bool free_part = FALSE;
1391 char *enc = malloc(nalloc * 3 + 1); /* for worst case! */
1393 return CURLUE_OUT_OF_MEMORY;
1396 i = (const unsigned char *)part;
1397 for(o = enc; *i; ++o, ++i)
1398 *o = (*i == ' ') ? '+' : *i;
1399 *o = 0; /* null-terminate */
1403 return CURLUE_OUT_OF_MEMORY;
1407 for(i = (const unsigned char *)part, o = enc; *i; i++) {
1408 if(Curl_isunreserved(*i) ||
1409 ((*i == '/') && urlskipslash) ||
1410 ((*i == '=') && equalsencode) ||
1411 ((*i == '+') && plusencode)) {
1412 if((*i == '=') && equalsencode)
1413 /* only skip the first equals sign */
1414 equalsencode = FALSE;
1419 msnprintf(o, 4, "%%%02x", *i);
1423 *o = 0; /* null-terminate */
1430 newp = strdup(part);
1432 return CURLUE_OUT_OF_MEMORY;
1435 /* make sure percent encoded are lower case */
1436 if((*p == '%') && ISXDIGIT(p[1]) && ISXDIGIT(p[2]) &&
1437 (ISUPPER(p[1]) || ISUPPER(p[2]))) {
1438 p[1] = (char)TOLOWER(p[1]);
1439 p[2] = (char)TOLOWER(p[2]);
1448 /* Append the string onto the old query. Add a '&' separator if none is
1449 present at the end of the exsting query already */
1450 size_t querylen = u->query ? strlen(u->query) : 0;
1451 bool addamperand = querylen && (u->query[querylen -1] != '&');
1453 size_t newplen = strlen(newp);
1454 char *p = malloc(querylen + addamperand + newplen + 1);
1457 return CURLUE_OUT_OF_MEMORY;
1459 strcpy(p, u->query); /* original query */
1461 p[querylen] = '&'; /* ampersand */
1462 strcpy(&p[querylen + addamperand], newp); /* new suffix */
1470 if(what == CURLUPART_HOST) {
1471 if(0 == strlen(newp) && (flags & CURLU_NO_AUTHORITY)) {
1472 /* Skip hostname check, it's allowed to be empty. */
1475 if(hostname_check(u, (char *)newp)) {
1477 return CURLUE_MALFORMED_INPUT;
1483 *storep = (char *)newp;
1485 /* set after the string, to make it not assigned if the allocation above