1 /***************************************************************************
3 * Project ___| | | | _ \| |
5 * | (__| |_| | _ <| |___
6 * \___|\___/|_| \_\_____|
8 * Copyright (C) 1998 - 2022, Daniel Stenberg, <daniel@haxx.se>, et al.
10 * This software is licensed as described in the file COPYING, which
11 * you should have received as part of this distribution. The terms
12 * are also available at https://curl.se/docs/copyright.html.
14 * You may opt to use, copy, modify, merge, publish, distribute and/or sell
15 * copies of the Software, and permit persons to whom the Software is
16 * furnished to do so, under the terms of the COPYING file.
18 * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
19 * KIND, either express or implied.
21 ***************************************************************************/
23 #include "curl_setup.h"
26 #include "urlapi-int.h"
31 #include "curl_ctype.h"
32 #include "inet_pton.h"
33 #include "inet_ntop.h"
35 /* The last 3 #include files should be in this order */
36 #include "curl_printf.h"
37 #include "curl_memory.h"
40 /* MSDOS/Windows style drive prefix, eg c: in c:foo */
41 #define STARTS_WITH_DRIVE_PREFIX(str) \
42 ((('a' <= str[0] && str[0] <= 'z') || \
43 ('A' <= str[0] && str[0] <= 'Z')) && \
46 /* MSDOS/Windows style drive prefix, optionally with
47 * a '|' instead of ':', followed by a slash or NUL */
48 #define STARTS_WITH_URL_DRIVE_PREFIX(str) \
49 ((('a' <= (str)[0] && (str)[0] <= 'z') || \
50 ('A' <= (str)[0] && (str)[0] <= 'Z')) && \
51 ((str)[1] == ':' || (str)[1] == '|') && \
52 ((str)[2] == '/' || (str)[2] == '\\' || (str)[2] == 0))
54 /* scheme is not URL encoded, the longest libcurl supported ones are... */
55 #define MAX_SCHEME_LEN 40
57 /* Internal representation of CURLU. Point to URL-encoded strings. */
62 char *options; /* IMAP only? */
64 char *zoneid; /* for numerical IPv6 addresses */
70 char *scratch; /* temporary scratch area */
71 char *temppath; /* temporary path pointer */
72 long portnum; /* the numerical version */
75 #define DEFAULT_SCHEME "https"
77 static void free_urlhandle(struct Curl_URL *u)
94 * Find the separator at the end of the host name, or the '?' in cases like
95 * http://www.url.com?id=2380
97 static const char *find_host_sep(const char *url)
102 /* Find the start of the hostname */
103 sep = strstr(url, "//");
109 query = strchr(sep, '?');
110 sep = strchr(sep, '/');
113 sep = url + strlen(url);
116 query = url + strlen(url);
118 return sep < query ? sep : query;
122 * Decide in an encoding-independent manner whether a character in an
123 * URL must be escaped. The same criterion must be used in strlen_url()
126 static bool urlchar_needs_escaping(int c)
128 return !(ISCNTRL(c) || ISSPACE(c) || ISGRAPH(c));
132 * strlen_url() returns the length of the given URL if the spaces within the
133 * URL were properly URL encoded.
134 * URL encoding should be skipped for host names, otherwise IDN resolution
137 static size_t strlen_url(const char *url, bool relative)
139 const unsigned char *ptr;
141 bool left = TRUE; /* left side of the ? */
142 const unsigned char *host_sep = (const unsigned char *) url;
145 host_sep = (const unsigned char *) find_host_sep(url);
147 for(ptr = (unsigned char *)url; *ptr; ptr++) {
165 if(urlchar_needs_escaping(*ptr))
174 /* strcpy_url() copies a url to a output buffer and URL-encodes the spaces in
175 * the source URL accordingly.
176 * URL encoding should be skipped for host names, otherwise IDN resolution
179 static void strcpy_url(char *output, const char *url, bool relative)
181 /* we must add this with whitespace-replacing */
183 const unsigned char *iptr;
185 const unsigned char *host_sep = (const unsigned char *) url;
188 host_sep = (const unsigned char *) find_host_sep(url);
190 for(iptr = (unsigned char *)url; /* read from here */
191 *iptr; /* until zero byte */
194 if(iptr < host_sep) {
201 *optr++='%'; /* add a '%' */
202 *optr++='2'; /* add a '2' */
203 *optr++='0'; /* add a '0' */
206 *optr++='+'; /* add a '+' here */
213 if(urlchar_needs_escaping(*iptr)) {
214 msnprintf(optr, 4, "%%%02x", *iptr);
220 *optr = 0; /* null-terminate output buffer */
225 * Returns true if the given URL is absolute (as opposed to relative). Returns
226 * the scheme in the buffer if TRUE and 'buf' is non-NULL. The buflen must
227 * be larger than MAX_SCHEME_LEN if buf is set.
229 bool Curl_is_absolute_url(const char *url, char *buf, size_t buflen)
232 DEBUGASSERT(!buf || (buflen > MAX_SCHEME_LEN));
233 (void)buflen; /* only used in debug-builds */
235 buf[0] = 0; /* always leave a defined value in buf */
237 if(STARTS_WITH_DRIVE_PREFIX(url))
240 for(i = 0; i < MAX_SCHEME_LEN; ++i) {
242 if(s && (ISALNUM(s) || (s == '+') || (s == '-') || (s == '.') )) {
243 /* RFC 3986 3.1 explains:
244 scheme = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
251 if(i && (url[i] == ':') && (url[i + 1] == '/')) {
255 buf[i] = (char)TOLOWER(url[i]);
264 * Concatenate a relative URL to a base URL making it absolute.
265 * URL-encodes any spaces.
266 * The returned pointer must be freed by the caller unless NULL
267 * (returns NULL on out of memory).
269 static char *concat_url(const char *base, const char *relurl)
272 TRY to append this new path to the old URL
273 to the right of the host part. Oh crap, this is doomed to cause
274 problems in the future...
280 bool host_changed = FALSE;
282 const char *useurl = relurl;
285 /* we must make our own copy of the URL to play with, as it may
286 point to read-only data */
287 char *url_clone = strdup(base);
290 return NULL; /* skip out of this NOW */
292 /* protsep points to the start of the host name */
293 protsep = strstr(url_clone, "//");
297 protsep += 2; /* pass the slashes */
299 if('/' != relurl[0]) {
302 /* First we need to find out if there's a ?-letter in the URL,
303 and cut it and the right-side of that off */
304 pathsep = strchr(protsep, '?');
308 /* we have a relative path to append to the last slash if there's one
309 available, or if the new URL is just a query string (starts with a
310 '?') we append the new one at the end of the entire currently worked
312 if(useurl[0] != '?') {
313 pathsep = strrchr(protsep, '/');
318 /* Check if there's any slash after the host name, and if so, remember
319 that position instead */
320 pathsep = strchr(protsep, '/');
322 protsep = pathsep + 1;
326 /* now deal with one "./" or any amount of "../" in the newurl
327 and act accordingly */
329 if((useurl[0] == '.') && (useurl[1] == '/'))
330 useurl += 2; /* just skip the "./" */
332 while((useurl[0] == '.') &&
333 (useurl[1] == '.') &&
334 (useurl[2] == '/')) {
336 useurl += 3; /* pass the "../" */
341 /* cut off one more level from the right of the original URL */
342 pathsep = strrchr(protsep, '/');
353 /* We got a new absolute path for this server */
355 if(relurl[1] == '/') {
356 /* the new URL starts with //, just keep the protocol part from the
359 useurl = &relurl[2]; /* we keep the slashes from the original, so we
364 /* cut off the original URL from the first slash, or deal with URLs
366 pathsep = strchr(protsep, '/');
368 /* When people use badly formatted URLs, such as
369 "http://www.url.com?dir=/home/daniel" we must not use the first
370 slash, if there's a ?-letter before it! */
371 char *sep = strchr(protsep, '?');
372 if(sep && (sep < pathsep))
377 /* There was no slash. Now, since we might be operating on a badly
378 formatted URL, such as "http://www.url.com?id=2380" which doesn't
379 use a slash separator as it is supposed to, we need to check for a
381 pathsep = strchr(protsep, '?');
388 /* If the new part contains a space, this is a mighty stupid redirect
389 but we still make an effort to do "right". To the left of a '?'
390 letter we replace each space with %20 while it is replaced with '+'
391 on the right side of the '?' letter.
393 newlen = strlen_url(useurl, !host_changed);
395 urllen = strlen(url_clone);
397 newest = malloc(urllen + 1 + /* possible slash */
398 newlen + 1 /* zero byte */);
401 free(url_clone); /* don't leak this */
405 /* copy over the root url part */
406 memcpy(newest, url_clone, urllen);
408 /* check if we need to append a slash */
409 if(('/' == useurl[0]) || (protsep && !*protsep) || ('?' == useurl[0]))
412 newest[urllen++]='/';
414 /* then append the new piece on the right side */
415 strcpy_url(&newest[urllen], useurl, !host_changed);
422 /* scan for byte values < 31 or 127 */
423 static bool junkscan(const char *part, unsigned int flags)
426 static const char badbytes[]={
427 /* */ 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
428 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
429 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
430 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f,
431 0x7f, 0x00 /* null-terminate */
433 size_t n = strlen(part);
434 size_t nfine = strcspn(part, badbytes);
436 /* since we don't know which part is scanned, return a generic error
439 if(!(flags & CURLU_ALLOW_SPACE) && strchr(part, ' '))
446 * parse_hostname_login()
448 * Parse the login details (user name, password and options) from the URL and
449 * strip them out of the host name
452 static CURLUcode parse_hostname_login(struct Curl_URL *u,
456 CURLUcode result = CURLUE_OK;
459 char *passwdp = NULL;
460 char *optionsp = NULL;
461 const struct Curl_handler *h = NULL;
463 /* At this point, we're hoping all the other special cases have
464 * been taken care of, so conn->host.name is at most
465 * [user[:password][;options]]@]hostname
467 * We need somewhere to put the embedded details, so do that first.
470 char *ptr = strchr(*hostname, '@');
471 char *login = *hostname;
476 /* We will now try to extract the
477 * possible login information in a string like:
478 * ftp://user:password@ftp.my.site:8021/README */
481 /* if this is a known scheme, get some details */
483 h = Curl_builtin_scheme(u->scheme);
485 /* We could use the login information in the URL so extract it. Only parse
486 options if the handler says we should. Note that 'h' might be NULL! */
487 ccode = Curl_parse_login_details(login, ptr - login - 1,
489 (h && (h->flags & PROTOPT_URLOPTIONS)) ?
492 result = CURLUE_BAD_LOGIN;
497 if(flags & CURLU_DISALLOW_USER) {
498 /* Option DISALLOW_USER is set and url contains username. */
499 result = CURLUE_USER_NOT_ALLOWED;
502 if(junkscan(userp, flags)) {
503 result = CURLUE_BAD_USER;
510 if(junkscan(passwdp, flags)) {
511 result = CURLUE_BAD_PASSWORD;
514 u->password = passwdp;
518 if(junkscan(optionsp, flags)) {
519 result = CURLUE_BAD_LOGIN;
522 u->options = optionsp;
538 UNITTEST CURLUcode Curl_parse_port(struct Curl_URL *u, char *hostname,
541 char *portptr = NULL;
546 * Find the end of an IPv6 address, either on the ']' ending bracket or
547 * a percent-encoded zone index.
549 if(1 == sscanf(hostname, "[%*45[0123456789abcdefABCDEF:.]%c%n",
550 &endbracket, &len)) {
551 if(']' == endbracket)
552 portptr = &hostname[len];
553 else if('%' == endbracket) {
555 if(1 == sscanf(hostname + zonelen, "%*[^]]%c%n", &endbracket, &len)) {
556 if(']' != endbracket)
557 return CURLUE_BAD_IPV6;
558 portptr = &hostname[--zonelen + len + 1];
561 return CURLUE_BAD_IPV6;
564 return CURLUE_BAD_IPV6;
566 /* this is a RFC2732-style specified IP-address */
567 if(portptr && *portptr) {
569 return CURLUE_BAD_IPV6;
575 portptr = strchr(hostname, ':');
582 /* Browser behavior adaptation. If there's a colon with no digits after,
583 just cut off the name there which makes us ignore the colon and just
584 use the default port. Firefox, Chrome and Safari all do that.
586 Don't do it if the URL has no scheme, to make something that looks like
591 return has_scheme ? CURLUE_OK : CURLUE_BAD_PORT_NUMBER;
594 if(!ISDIGIT(portptr[1]))
595 return CURLUE_BAD_PORT_NUMBER;
597 port = strtol(portptr + 1, &rest, 10); /* Port number must be decimal */
600 return CURLUE_BAD_PORT_NUMBER;
603 return CURLUE_BAD_PORT_NUMBER;
605 *portptr++ = '\0'; /* cut off the name there */
607 /* generate a new port number string to get rid of leading zeroes etc */
608 msnprintf(portbuf, sizeof(portbuf), "%ld", port);
610 u->port = strdup(portbuf);
612 return CURLUE_OUT_OF_MEMORY;
618 static CURLUcode hostname_check(struct Curl_URL *u, char *hostname)
621 size_t hlen = strlen(hostname);
623 if(hostname[0] == '[') {
624 const char *l = "0123456789abcdefABCDEF:.";
625 if(hlen < 4) /* '[::]' is the shortest possible valid string */
626 return CURLUE_BAD_IPV6;
630 if(hostname[hlen] != ']')
631 return CURLUE_BAD_IPV6;
633 /* only valid letters are ok */
634 len = strspn(hostname, l);
637 if(hostname[len] == '%') {
638 /* this could now be '%[zone id]' */
641 char *h = &hostname[len + 1];
642 /* pass '25' if present and is a url encoded percent sign */
643 if(!strncmp(h, "25", 2) && h[2] && (h[2] != ']'))
645 while(*h && (*h != ']') && (i < 15))
647 if(!i || (']' != *h))
648 /* impossible to reach? */
649 return CURLUE_MALFORMED_INPUT;
651 u->zoneid = strdup(zoneid);
653 return CURLUE_OUT_OF_MEMORY;
654 hostname[len] = ']'; /* insert end bracket */
655 hostname[len + 1] = 0; /* terminate the hostname */
658 return CURLUE_BAD_IPV6;
659 /* hostname is fine */
663 char dest[16]; /* fits a binary IPv6 address */
664 char norm[MAX_IPADR_LEN];
665 hostname[hlen] = 0; /* end the address there */
666 if(1 != Curl_inet_pton(AF_INET6, hostname, dest))
667 return CURLUE_BAD_IPV6;
669 /* check if it can be done shorter */
670 if(Curl_inet_ntop(AF_INET6, dest, norm, sizeof(norm)) &&
671 (strlen(norm) < hlen)) {
672 strcpy(hostname, norm);
674 hostname[hlen + 1] = 0;
676 hostname[hlen] = ']'; /* restore ending bracket */
681 /* letters from the second string are not ok */
682 len = strcspn(hostname, " \r\n\t/:#?!@");
684 /* hostname with bad content */
685 return CURLUE_BAD_HOSTNAME;
688 return CURLUE_NO_HOST;
692 #define HOSTNAME_END(x) (((x) == '/') || ((x) == '?') || ((x) == '#'))
695 * Handle partial IPv4 numerical addresses and different bases, like
696 * '16843009', '0x7f', '0x7f.1' '0177.1.1.1' etc.
698 * If the given input string is syntactically wrong or any part for example is
699 * too big, this function returns FALSE and doesn't create any output.
701 * Output the "normalized" version of that input string in plain quad decimal
702 * integers and return TRUE.
704 static bool ipv4_normalize(const char *hostname, char *outp, size_t olen)
708 const char *c = hostname;
709 unsigned long parts[4] = {0, 0, 0, 0};
714 if((*c < '0') || (*c > '9'))
715 /* most importantly this doesn't allow a leading plus or minus */
717 l = strtoul(c, &endp, 0);
719 /* overflow or nothing parsed at all */
720 if(((l == ULONG_MAX) && (errno == ERANGE)) || (endp == c))
724 /* a value larger than 32 bits */
749 /* this is deemed a valid IPv4 numerical address */
752 case 0: /* a -- 32 bits */
753 msnprintf(outp, olen, "%u.%u.%u.%u",
754 parts[0] >> 24, (parts[0] >> 16) & 0xff,
755 (parts[0] >> 8) & 0xff, parts[0] & 0xff);
757 case 1: /* a.b -- 8.24 bits */
758 if((parts[0] > 0xff) || (parts[1] > 0xffffff))
760 msnprintf(outp, olen, "%u.%u.%u.%u",
761 parts[0], (parts[1] >> 16) & 0xff,
762 (parts[1] >> 8) & 0xff, parts[1] & 0xff);
764 case 2: /* a.b.c -- 8.8.16 bits */
765 if((parts[0] > 0xff) || (parts[1] > 0xff) || (parts[2] > 0xffff))
767 msnprintf(outp, olen, "%u.%u.%u.%u",
768 parts[0], parts[1], (parts[2] >> 8) & 0xff,
771 case 3: /* a.b.c.d -- 8.8.8.8 bits */
772 if((parts[0] > 0xff) || (parts[1] > 0xff) || (parts[2] > 0xff) ||
775 msnprintf(outp, olen, "%u.%u.%u.%u",
776 parts[0], parts[1], parts[2], parts[3]);
782 /* return strdup'ed version in 'outp', possibly percent decoded */
783 static CURLUcode decode_host(char *hostname, char **outp)
786 if(hostname[0] != '[')
787 /* only decode if not an ipv6 numerical */
788 per = strchr(hostname, '%');
790 *outp = strdup(hostname);
792 return CURLUE_OUT_OF_MEMORY;
795 /* might be encoded */
797 CURLcode result = Curl_urldecode(hostname, 0, outp, &dlen, REJECT_CTRL);
799 return CURLUE_BAD_HOSTNAME;
805 static CURLUcode seturl(const char *url, CURLU *u, unsigned int flags)
808 bool path_alloced = FALSE;
809 bool uncpath = FALSE;
812 char *fragment = NULL;
814 bool url_has_scheme = FALSE;
815 char schemebuf[MAX_SCHEME_LEN + 1];
816 const char *schemep = NULL;
817 size_t schemelen = 0;
822 /*************************************************************
824 ************************************************************/
825 /* allocate scratch area */
826 urllen = strlen(url);
827 if(urllen > CURL_MAX_INPUT_LENGTH)
828 /* excessive input length */
829 return CURLUE_MALFORMED_INPUT;
831 path = u->scratch = malloc(urllen * 2 + 2);
833 return CURLUE_OUT_OF_MEMORY;
835 hostname = &path[urllen + 1];
838 if(Curl_is_absolute_url(url, schemebuf, sizeof(schemebuf))) {
839 url_has_scheme = TRUE;
840 schemelen = strlen(schemebuf);
843 /* handle the file: scheme */
844 if(url_has_scheme && !strcmp(schemebuf, "file")) {
846 /* file:/ is not enough to actually be a complete file: URL */
847 return CURLUE_BAD_FILE_URL;
849 /* path has been allocated large enough to hold this */
850 strcpy(path, &url[5]);
852 u->scheme = strdup("file");
854 return CURLUE_OUT_OF_MEMORY;
856 /* Extra handling URLs with an authority component (i.e. that start with
859 * We allow omitted hostname (e.g. file:/<path>) -- valid according to
860 * RFC 8089, but not the (current) WHAT-WG URL spec.
862 if(path[0] == '/' && path[1] == '/') {
863 /* swallow the two slashes */
864 char *ptr = &path[2];
867 * According to RFC 8089, a file: URL can be reliably dereferenced if:
869 * o it has no/blank hostname, or
871 * o the hostname matches "localhost" (case-insensitively), or
873 * o the hostname is a FQDN that resolves to this machine, or
875 * o it is an UNC String transformed to an URI (Windows only, RFC 8089
878 * For brevity, we only consider URLs with empty, "localhost", or
879 * "127.0.0.1" hostnames as local, otherwise as an UNC String.
881 * Additionally, there is an exception for URLs with a Windows drive
882 * letter in the authority (which was accidentally omitted from RFC 8089
883 * Appendix E, but believe me, it was meant to be there. --MK)
885 if(ptr[0] != '/' && !STARTS_WITH_URL_DRIVE_PREFIX(ptr)) {
886 /* the URL includes a host name, it must match "localhost" or
887 "127.0.0.1" to be valid */
888 if(checkprefix("localhost/", ptr) ||
889 checkprefix("127.0.0.1/", ptr)) {
890 ptr += 9; /* now points to the slash after the host */
896 /* the host name, NetBIOS computer name, can not contain disallowed
897 chars, and the delimiting slash character must be appended to the
899 path = strpbrk(ptr, "/\\:*?\"<>|");
900 if(!path || *path != '/')
901 return CURLUE_BAD_FILE_URL;
905 memcpy(hostname, ptr, len);
910 ptr -= 2; /* now points to the // before the host in UNC */
912 /* Invalid file://hostname/, expected localhost or 127.0.0.1 or
914 return CURLUE_BAD_FILE_URL;
923 hostname = NULL; /* no host for file: URLs by default */
925 #if !defined(MSDOS) && !defined(WIN32) && !defined(__CYGWIN__)
926 /* Don't allow Windows drive letters when not in Windows.
927 * This catches both "file:/c:" and "file:c:" */
928 if(('/' == path[0] && STARTS_WITH_URL_DRIVE_PREFIX(&path[1])) ||
929 STARTS_WITH_URL_DRIVE_PREFIX(path)) {
930 /* File drive letters are only accepted in MSDOS/Windows */
931 return CURLUE_BAD_FILE_URL;
934 /* If the path starts with a slash and a drive letter, ditch the slash */
935 if('/' == path[0] && STARTS_WITH_URL_DRIVE_PREFIX(&path[1])) {
936 /* This cannot be done with strcpy, as the memory chunks overlap! */
937 memmove(path, &path[1], strlen(&path[1]) + 1);
951 p = &url[schemelen + 1];
952 while(p && (*p == '/') && (i < 4)) {
957 /* less than one or more than three slashes */
958 return CURLUE_BAD_SLASHES;
961 if(!Curl_builtin_scheme(schemep) &&
962 !(flags & CURLU_NON_SUPPORT_SCHEME))
963 return CURLUE_UNSUPPORTED_SCHEME;
965 if(junkscan(schemep, flags))
966 return CURLUE_BAD_SCHEME;
971 if(!(flags & (CURLU_DEFAULT_SCHEME|CURLU_GUESS_SCHEME)))
972 return CURLUE_BAD_SCHEME;
973 if(flags & CURLU_DEFAULT_SCHEME)
974 schemep = DEFAULT_SCHEME;
977 * The URL was badly formatted, let's try without scheme specified.
981 hostp = p; /* host name starts here */
983 /* find the end of the host name + port number */
984 while(*p && !HOSTNAME_END(*p))
989 memcpy(hostname, hostp, len);
993 if(!(flags & CURLU_NO_AUTHORITY))
994 return CURLUE_NO_HOST;
1000 u->scheme = strdup(schemep);
1002 return CURLUE_OUT_OF_MEMORY;
1006 if((flags & CURLU_URLENCODE) && path[0]) {
1007 /* worst case output length is 3x the original! */
1008 char *newp = malloc(strlen(path) * 3);
1010 return CURLUE_OUT_OF_MEMORY;
1011 path_alloced = TRUE;
1012 strcpy_url(newp, path, TRUE); /* consider it relative */
1013 u->temppath = path = newp;
1016 fragment = strchr(path, '#');
1019 if(junkscan(fragment, flags))
1020 return CURLUE_BAD_FRAGMENT;
1022 u->fragment = strdup(fragment);
1024 return CURLUE_OUT_OF_MEMORY;
1028 query = strchr(path, '?');
1031 if(junkscan(query, flags))
1032 return CURLUE_BAD_QUERY;
1033 /* done even if the query part is a blank string */
1034 u->query = strdup(query);
1036 return CURLUE_OUT_OF_MEMORY;
1039 if(junkscan(path, flags))
1040 return CURLUE_BAD_PATH;
1043 /* if there's no path left set, unset */
1046 if(!(flags & CURLU_PATH_AS_IS)) {
1047 /* remove ../ and ./ sequences according to RFC3986 */
1048 char *newp = Curl_dedotdotify(path);
1050 return CURLUE_OUT_OF_MEMORY;
1052 if(strcmp(newp, path)) {
1053 /* if we got a new version */
1055 Curl_safefree(u->temppath);
1056 u->temppath = path = newp;
1057 path_alloced = TRUE;
1063 u->path = path_alloced?path:strdup(path);
1065 return CURLUE_OUT_OF_MEMORY;
1066 u->temppath = NULL; /* used now */
1070 char normalized_ipv4[sizeof("255.255.255.255") + 1];
1073 * Parse the login details and strip them out of the host name.
1075 result = parse_hostname_login(u, &hostname, flags);
1079 result = Curl_parse_port(u, hostname, url_has_scheme);
1083 if(junkscan(hostname, flags))
1084 return CURLUE_BAD_HOSTNAME;
1086 if(0 == strlen(hostname) && (flags & CURLU_NO_AUTHORITY)) {
1087 /* Skip hostname check, it's allowed to be empty. */
1088 u->host = strdup("");
1091 if(ipv4_normalize(hostname, normalized_ipv4, sizeof(normalized_ipv4)))
1092 u->host = strdup(normalized_ipv4);
1094 result = decode_host(hostname, &u->host);
1097 result = hostname_check(u, u->host);
1103 return CURLUE_OUT_OF_MEMORY;
1104 if((flags & CURLU_GUESS_SCHEME) && !schemep) {
1105 /* legacy curl-style guess based on host name */
1106 if(checkprefix("ftp.", hostname))
1108 else if(checkprefix("dict.", hostname))
1110 else if(checkprefix("ldap.", hostname))
1112 else if(checkprefix("imap.", hostname))
1114 else if(checkprefix("smtp.", hostname))
1116 else if(checkprefix("pop3.", hostname))
1121 u->scheme = strdup(schemep);
1123 return CURLUE_OUT_OF_MEMORY;
1127 Curl_safefree(u->scratch);
1128 Curl_safefree(u->temppath);
1134 * Parse the URL and set the relevant members of the Curl_URL struct.
1136 static CURLUcode parseurl(const char *url, CURLU *u, unsigned int flags)
1138 CURLUcode result = seturl(url, u, flags);
1141 memset(u, 0, sizeof(struct Curl_URL));
1147 * Parse the URL and, if successful, replace everything in the Curl_URL struct.
1149 static CURLUcode parseurl_and_replace(const char *url, CURLU *u,
1154 memset(&tmpurl, 0, sizeof(tmpurl));
1155 result = parseurl(url, &tmpurl, flags);
1161 free_urlhandle(&tmpurl);
1167 CURLU *curl_url(void)
1169 return calloc(sizeof(struct Curl_URL), 1);
1172 void curl_url_cleanup(CURLU *u)
1180 #define DUP(dest, src, name) \
1183 dest->name = strdup(src->name); \
1189 CURLU *curl_url_dup(CURLU *in)
1191 struct Curl_URL *u = calloc(sizeof(struct Curl_URL), 1);
1195 DUP(u, in, password);
1196 DUP(u, in, options);
1201 DUP(u, in, fragment);
1202 u->portnum = in->portnum;
1206 curl_url_cleanup(u);
1210 CURLUcode curl_url_get(CURLU *u, CURLUPart what,
1211 char **part, unsigned int flags)
1214 CURLUcode ifmissing = CURLUE_UNKNOWN_PART;
1216 bool urldecode = (flags & CURLU_URLDECODE)?1:0;
1217 bool urlencode = (flags & CURLU_URLENCODE)?1:0;
1218 bool plusdecode = FALSE;
1221 return CURLUE_BAD_HANDLE;
1223 return CURLUE_BAD_PARTPOINTER;
1227 case CURLUPART_SCHEME:
1229 ifmissing = CURLUE_NO_SCHEME;
1230 urldecode = FALSE; /* never for schemes */
1232 case CURLUPART_USER:
1234 ifmissing = CURLUE_NO_USER;
1236 case CURLUPART_PASSWORD:
1238 ifmissing = CURLUE_NO_PASSWORD;
1240 case CURLUPART_OPTIONS:
1242 ifmissing = CURLUE_NO_OPTIONS;
1244 case CURLUPART_HOST:
1246 ifmissing = CURLUE_NO_HOST;
1248 case CURLUPART_ZONEID:
1250 ifmissing = CURLUE_NO_ZONEID;
1252 case CURLUPART_PORT:
1254 ifmissing = CURLUE_NO_PORT;
1255 urldecode = FALSE; /* never for port */
1256 if(!ptr && (flags & CURLU_DEFAULT_PORT) && u->scheme) {
1257 /* there's no stored port number, but asked to deliver
1258 a default one for the scheme */
1259 const struct Curl_handler *h =
1260 Curl_builtin_scheme(u->scheme);
1262 msnprintf(portbuf, sizeof(portbuf), "%u", h->defport);
1266 else if(ptr && u->scheme) {
1267 /* there is a stored port number, but ask to inhibit if
1268 it matches the default one for the scheme */
1269 const struct Curl_handler *h =
1270 Curl_builtin_scheme(u->scheme);
1271 if(h && (h->defport == u->portnum) &&
1272 (flags & CURLU_NO_DEFAULT_PORT))
1276 case CURLUPART_PATH:
1279 ptr = u->path = strdup("/");
1281 return CURLUE_OUT_OF_MEMORY;
1284 case CURLUPART_QUERY:
1286 ifmissing = CURLUE_NO_QUERY;
1287 plusdecode = urldecode;
1289 case CURLUPART_FRAGMENT:
1291 ifmissing = CURLUE_NO_FRAGMENT;
1293 case CURLUPART_URL: {
1296 char *options = u->options;
1297 char *port = u->port;
1298 char *allochost = NULL;
1299 if(u->scheme && strcasecompare("file", u->scheme)) {
1300 url = aprintf("file://%s%s%s",
1302 u->fragment? "#": "",
1303 u->fragment? u->fragment : "");
1306 return CURLUE_NO_HOST;
1308 const struct Curl_handler *h = NULL;
1311 else if(flags & CURLU_DEFAULT_SCHEME)
1312 scheme = (char *) DEFAULT_SCHEME;
1314 return CURLUE_NO_SCHEME;
1316 h = Curl_builtin_scheme(scheme);
1317 if(!port && (flags & CURLU_DEFAULT_PORT)) {
1318 /* there's no stored port number, but asked to deliver
1319 a default one for the scheme */
1321 msnprintf(portbuf, sizeof(portbuf), "%u", h->defport);
1326 /* there is a stored port number, but asked to inhibit if it matches
1327 the default one for the scheme */
1328 if(h && (h->defport == u->portnum) &&
1329 (flags & CURLU_NO_DEFAULT_PORT))
1333 if(h && !(h->flags & PROTOPT_URLOPTIONS))
1336 if(u->host[0] == '[') {
1338 /* make it '[ host %25 zoneid ]' */
1339 size_t hostlen = strlen(u->host);
1340 size_t alen = hostlen + 3 + strlen(u->zoneid) + 1;
1341 allochost = malloc(alen);
1343 return CURLUE_OUT_OF_MEMORY;
1344 memcpy(allochost, u->host, hostlen - 1);
1345 msnprintf(&allochost[hostlen - 1], alen - hostlen + 1,
1346 "%%25%s]", u->zoneid);
1349 else if(urlencode) {
1350 allochost = curl_easy_escape(NULL, u->host, 0);
1352 return CURLUE_OUT_OF_MEMORY;
1355 /* only encode '%' in output host name */
1356 char *host = u->host;
1358 /* first, count number of percents present in the name */
1364 /* if there were percents, encode the host name */
1366 size_t hostlen = strlen(u->host);
1367 size_t alen = hostlen + 2 * pcount + 1;
1368 char *o = allochost = malloc(alen);
1370 return CURLUE_OUT_OF_MEMORY;
1375 memcpy(o, "%25", 3);
1386 url = aprintf("%s://%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
1388 u->user ? u->user : "",
1389 u->password ? ":": "",
1390 u->password ? u->password : "",
1392 options ? options : "",
1393 (u->user || u->password || options) ? "@": "",
1394 allochost ? allochost : u->host,
1397 (u->path && (u->path[0] != '/')) ? "/": "",
1398 u->path ? u->path : "/",
1399 (u->query && u->query[0]) ? "?": "",
1400 (u->query && u->query[0]) ? u->query : "",
1401 u->fragment? "#": "",
1402 u->fragment? u->fragment : "");
1406 return CURLUE_OUT_OF_MEMORY;
1415 *part = strdup(ptr);
1417 return CURLUE_OUT_OF_MEMORY;
1419 /* convert + to space */
1421 for(plus = *part; *plus; ++plus) {
1429 /* this unconditional rejection of control bytes is documented
1431 CURLcode res = Curl_urldecode(*part, 0, &decoded, &dlen, REJECT_CTRL);
1435 return CURLUE_URLDECODE;
1445 CURLUcode curl_url_set(CURLU *u, CURLUPart what,
1446 const char *part, unsigned int flags)
1448 char **storep = NULL;
1450 bool urlencode = (flags & CURLU_URLENCODE)? 1 : 0;
1451 bool plusencode = FALSE;
1452 bool urlskipslash = FALSE;
1453 bool appendquery = FALSE;
1454 bool equalsencode = FALSE;
1457 return CURLUE_BAD_HANDLE;
1459 /* setting a part to NULL clears it */
1463 case CURLUPART_SCHEME:
1464 storep = &u->scheme;
1466 case CURLUPART_USER:
1469 case CURLUPART_PASSWORD:
1470 storep = &u->password;
1472 case CURLUPART_OPTIONS:
1473 storep = &u->options;
1475 case CURLUPART_HOST:
1478 case CURLUPART_ZONEID:
1479 storep = &u->zoneid;
1481 case CURLUPART_PORT:
1485 case CURLUPART_PATH:
1488 case CURLUPART_QUERY:
1491 case CURLUPART_FRAGMENT:
1492 storep = &u->fragment;
1495 return CURLUE_UNKNOWN_PART;
1497 if(storep && *storep) {
1498 Curl_safefree(*storep);
1504 case CURLUPART_SCHEME:
1505 if(strlen(part) > MAX_SCHEME_LEN)
1507 return CURLUE_BAD_SCHEME;
1508 if(!(flags & CURLU_NON_SUPPORT_SCHEME) &&
1509 /* verify that it is a fine scheme */
1510 !Curl_builtin_scheme(part))
1511 return CURLUE_UNSUPPORTED_SCHEME;
1512 storep = &u->scheme;
1513 urlencode = FALSE; /* never */
1515 case CURLUPART_USER:
1518 case CURLUPART_PASSWORD:
1519 storep = &u->password;
1521 case CURLUPART_OPTIONS:
1522 storep = &u->options;
1524 case CURLUPART_HOST: {
1525 size_t len = strcspn(part, " \r\n");
1526 if(strlen(part) != len)
1527 /* hostname with bad content */
1528 return CURLUE_BAD_HOSTNAME;
1530 Curl_safefree(u->zoneid);
1533 case CURLUPART_ZONEID:
1534 storep = &u->zoneid;
1536 case CURLUPART_PORT:
1539 urlencode = FALSE; /* never */
1540 port = strtol(part, &endp, 10); /* Port number must be decimal */
1541 if((port <= 0) || (port > 0xffff))
1542 return CURLUE_BAD_PORT_NUMBER;
1544 /* weirdly provided number, not good! */
1545 return CURLUE_BAD_PORT_NUMBER;
1549 case CURLUPART_PATH:
1550 urlskipslash = TRUE;
1553 case CURLUPART_QUERY:
1554 plusencode = urlencode;
1555 appendquery = (flags & CURLU_APPENDQUERY)?1:0;
1556 equalsencode = appendquery;
1559 case CURLUPART_FRAGMENT:
1560 storep = &u->fragment;
1562 case CURLUPART_URL: {
1564 * Allow a new URL to replace the existing (if any) contents.
1566 * If the existing contents is enough for a URL, allow a relative URL to
1573 /* if the new thing is absolute or the old one is not
1574 * (we could not get an absolute url in 'oldurl'),
1575 * then replace the existing with the new. */
1576 if(Curl_is_absolute_url(part, NULL, 0)
1577 || curl_url_get(u, CURLUPART_URL, &oldurl, flags)) {
1578 return parseurl_and_replace(part, u, flags);
1581 /* apply the relative part to create a new URL
1582 * and replace the existing one with it. */
1583 redired_url = concat_url(oldurl, part);
1586 return CURLUE_OUT_OF_MEMORY;
1588 result = parseurl_and_replace(redired_url, u, flags);
1593 return CURLUE_UNKNOWN_PART;
1595 DEBUGASSERT(storep);
1597 const char *newp = part;
1598 size_t nalloc = strlen(part);
1600 if(nalloc > CURL_MAX_INPUT_LENGTH)
1601 /* excessive input length */
1602 return CURLUE_MALFORMED_INPUT;
1605 const unsigned char *i;
1607 char *enc = malloc(nalloc * 3 + 1); /* for worst case! */
1609 return CURLUE_OUT_OF_MEMORY;
1610 for(i = (const unsigned char *)part, o = enc; *i; i++) {
1611 if((*i == ' ') && plusencode) {
1615 else if(Curl_isunreserved(*i) ||
1616 ((*i == '/') && urlskipslash) ||
1617 ((*i == '=') && equalsencode)) {
1618 if((*i == '=') && equalsencode)
1619 /* only skip the first equals sign */
1620 equalsencode = FALSE;
1625 msnprintf(o, 4, "%%%02x", *i);
1629 *o = 0; /* null-terminate */
1634 newp = strdup(part);
1636 return CURLUE_OUT_OF_MEMORY;
1639 /* make sure percent encoded are lower case */
1640 if((*p == '%') && ISXDIGIT(p[1]) && ISXDIGIT(p[2]) &&
1641 (ISUPPER(p[1]) || ISUPPER(p[2]))) {
1642 p[1] = (char)TOLOWER(p[1]);
1643 p[2] = (char)TOLOWER(p[2]);
1652 /* Append the string onto the old query. Add a '&' separator if none is
1653 present at the end of the exsting query already */
1654 size_t querylen = u->query ? strlen(u->query) : 0;
1655 bool addamperand = querylen && (u->query[querylen -1] != '&');
1657 size_t newplen = strlen(newp);
1658 char *p = malloc(querylen + addamperand + newplen + 1);
1661 return CURLUE_OUT_OF_MEMORY;
1663 strcpy(p, u->query); /* original query */
1665 p[querylen] = '&'; /* ampersand */
1666 strcpy(&p[querylen + addamperand], newp); /* new suffix */
1674 if(what == CURLUPART_HOST) {
1675 if(0 == strlen(newp) && (flags & CURLU_NO_AUTHORITY)) {
1676 /* Skip hostname check, it's allowed to be empty. */
1679 if(hostname_check(u, (char *)newp)) {
1681 return CURLUE_BAD_HOSTNAME;
1687 *storep = (char *)newp;
1689 /* set after the string, to make it not assigned if the allocation above