1 /***************************************************************************
3 * Project ___| | | | _ \| |
5 * | (__| |_| | _ <| |___
6 * \___|\___/|_| \_\_____|
8 * Copyright (C) 1998 - 2022, Daniel Stenberg, <daniel@haxx.se>, et al.
10 * This software is licensed as described in the file COPYING, which
11 * you should have received as part of this distribution. The terms
12 * are also available at https://curl.se/docs/copyright.html.
14 * You may opt to use, copy, modify, merge, publish, distribute and/or sell
15 * copies of the Software, and permit persons to whom the Software is
16 * furnished to do so, under the terms of the COPYING file.
18 * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
19 * KIND, either express or implied.
21 * SPDX-License-Identifier: curl
23 ***************************************************************************/
25 #include "curl_setup.h"
28 #include "urlapi-int.h"
32 #include "curl_ctype.h"
33 #include "inet_pton.h"
34 #include "inet_ntop.h"
37 /* The last 3 #include files should be in this order */
38 #include "curl_printf.h"
39 #include "curl_memory.h"
42 /* MSDOS/Windows style drive prefix, eg c: in c:foo */
43 #define STARTS_WITH_DRIVE_PREFIX(str) \
44 ((('a' <= str[0] && str[0] <= 'z') || \
45 ('A' <= str[0] && str[0] <= 'Z')) && \
48 /* MSDOS/Windows style drive prefix, optionally with
49 * a '|' instead of ':', followed by a slash or NUL */
50 #define STARTS_WITH_URL_DRIVE_PREFIX(str) \
51 ((('a' <= (str)[0] && (str)[0] <= 'z') || \
52 ('A' <= (str)[0] && (str)[0] <= 'Z')) && \
53 ((str)[1] == ':' || (str)[1] == '|') && \
54 ((str)[2] == '/' || (str)[2] == '\\' || (str)[2] == 0))
56 /* scheme is not URL encoded, the longest libcurl supported ones are... */
57 #define MAX_SCHEME_LEN 40
59 /* Internal representation of CURLU. Point to URL-encoded strings. */
64 char *options; /* IMAP only? */
66 char *zoneid; /* for numerical IPv6 addresses */
71 long portnum; /* the numerical version */
74 #define DEFAULT_SCHEME "https"
76 static void free_urlhandle(struct Curl_URL *u)
91 * Find the separator at the end of the host name, or the '?' in cases like
92 * http://www.url.com?id=2380
94 static const char *find_host_sep(const char *url)
99 /* Find the start of the hostname */
100 sep = strstr(url, "//");
106 query = strchr(sep, '?');
107 sep = strchr(sep, '/');
110 sep = url + strlen(url);
113 query = url + strlen(url);
115 return sep < query ? sep : query;
119 * Decide in an encoding-independent manner whether a character in a URL must
120 * be escaped. This is used in urlencode_str().
122 static bool urlchar_needs_escaping(int c)
124 return !(ISCNTRL(c) || ISSPACE(c) || ISGRAPH(c));
127 /* urlencode_str() writes data into an output dynbuf and URL-encodes the
128 * spaces in the source URL accordingly.
130 * URL encoding should be skipped for host names, otherwise IDN resolution
133 static CURLUcode urlencode_str(struct dynbuf *o, const char *url,
134 size_t len, bool relative,
137 /* we must add this with whitespace-replacing */
139 const unsigned char *iptr;
140 const unsigned char *host_sep = (const unsigned char *) url;
143 host_sep = (const unsigned char *) find_host_sep(url);
145 for(iptr = (unsigned char *)url; /* read from here */
146 len; iptr++, len--) {
148 if(iptr < host_sep) {
149 if(Curl_dyn_addn(o, iptr, 1))
150 return CURLUE_OUT_OF_MEMORY;
156 if(Curl_dyn_addn(o, "%20", 3))
157 return CURLUE_OUT_OF_MEMORY;
160 if(Curl_dyn_addn(o, "+", 1))
161 return CURLUE_OUT_OF_MEMORY;
169 if(urlchar_needs_escaping(*iptr)) {
170 if(Curl_dyn_addf(o, "%%%02x", *iptr))
171 return CURLUE_OUT_OF_MEMORY;
174 if(Curl_dyn_addn(o, iptr, 1))
175 return CURLUE_OUT_OF_MEMORY;
183 * Returns the length of the scheme if the given URL is absolute (as opposed
184 * to relative). Stores the scheme in the buffer if TRUE and 'buf' is
185 * non-NULL. The buflen must be larger than MAX_SCHEME_LEN if buf is set.
187 * If 'guess_scheme' is TRUE, it means the URL might be provided without
190 size_t Curl_is_absolute_url(const char *url, char *buf, size_t buflen,
194 DEBUGASSERT(!buf || (buflen > MAX_SCHEME_LEN));
195 (void)buflen; /* only used in debug-builds */
197 buf[0] = 0; /* always leave a defined value in buf */
199 if(guess_scheme && STARTS_WITH_DRIVE_PREFIX(url))
202 for(i = 0; i < MAX_SCHEME_LEN; ++i) {
204 if(s && (ISALNUM(s) || (s == '+') || (s == '-') || (s == '.') )) {
205 /* RFC 3986 3.1 explains:
206 scheme = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
213 if(i && (url[i] == ':') && ((url[i + 1] == '/') || !guess_scheme)) {
214 /* If this does not guess scheme, the scheme always ends with the colon so
215 that this also detects data: URLs etc. In guessing mode, data: could
216 be the host name "data" with a specified port number. */
218 /* the length of the scheme is the name part only */
223 buf[i] = Curl_raw_tolower(url[i]);
232 * Concatenate a relative URL to a base URL making it absolute.
233 * URL-encodes any spaces.
234 * The returned pointer must be freed by the caller unless NULL
235 * (returns NULL on out of memory).
237 * Note that this function destroys the 'base' string.
239 static char *concat_url(char *base, const char *relurl)
242 TRY to append this new path to the old URL
243 to the right of the host part. Oh crap, this is doomed to cause
244 problems in the future...
246 struct dynbuf newest;
249 bool host_changed = FALSE;
250 const char *useurl = relurl;
252 /* protsep points to the start of the host name */
253 protsep = strstr(base, "//");
257 protsep += 2; /* pass the slashes */
259 if('/' != relurl[0]) {
262 /* First we need to find out if there's a ?-letter in the URL,
263 and cut it and the right-side of that off */
264 pathsep = strchr(protsep, '?');
268 /* we have a relative path to append to the last slash if there's one
269 available, or if the new URL is just a query string (starts with a
270 '?') we append the new one at the end of the entire currently worked
272 if(useurl[0] != '?') {
273 pathsep = strrchr(protsep, '/');
278 /* Check if there's any slash after the host name, and if so, remember
279 that position instead */
280 pathsep = strchr(protsep, '/');
282 protsep = pathsep + 1;
286 /* now deal with one "./" or any amount of "../" in the newurl
287 and act accordingly */
289 if((useurl[0] == '.') && (useurl[1] == '/'))
290 useurl += 2; /* just skip the "./" */
292 while((useurl[0] == '.') &&
293 (useurl[1] == '.') &&
294 (useurl[2] == '/')) {
296 useurl += 3; /* pass the "../" */
301 /* cut off one more level from the right of the original URL */
302 pathsep = strrchr(protsep, '/');
313 /* We got a new absolute path for this server */
315 if(relurl[1] == '/') {
316 /* the new URL starts with //, just keep the protocol part from the
319 useurl = &relurl[2]; /* we keep the slashes from the original, so we
324 /* cut off the original URL from the first slash, or deal with URLs
326 pathsep = strchr(protsep, '/');
328 /* When people use badly formatted URLs, such as
329 "http://www.url.com?dir=/home/daniel" we must not use the first
330 slash, if there's a ?-letter before it! */
331 char *sep = strchr(protsep, '?');
332 if(sep && (sep < pathsep))
337 /* There was no slash. Now, since we might be operating on a badly
338 formatted URL, such as "http://www.url.com?id=2380" which doesn't
339 use a slash separator as it is supposed to, we need to check for a
341 pathsep = strchr(protsep, '?');
348 Curl_dyn_init(&newest, CURL_MAX_INPUT_LENGTH);
350 /* copy over the root url part */
351 if(Curl_dyn_add(&newest, base))
354 /* check if we need to append a slash */
355 if(('/' == useurl[0]) || (protsep && !*protsep) || ('?' == useurl[0]))
358 if(Curl_dyn_addn(&newest, "/", 1))
362 /* then append the new piece on the right side */
363 urlencode_str(&newest, useurl, strlen(useurl), !host_changed, FALSE);
365 return Curl_dyn_ptr(&newest);
368 /* scan for byte values < 31 or 127 */
369 static bool junkscan(const char *part, unsigned int flags)
372 static const char badbytes[]={
373 /* */ 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
374 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
375 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
376 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f,
377 0x7f, 0x00 /* null-terminate */
379 size_t n = strlen(part);
380 size_t nfine = strcspn(part, badbytes);
382 /* since we don't know which part is scanned, return a generic error
385 if(!(flags & CURLU_ALLOW_SPACE) && strchr(part, ' '))
392 * parse_hostname_login()
394 * Parse the login details (user name, password and options) from the URL and
395 * strip them out of the host name
398 static CURLUcode parse_hostname_login(struct Curl_URL *u,
402 CURLUcode result = CURLUE_OK;
405 char *passwdp = NULL;
406 char *optionsp = NULL;
407 const struct Curl_handler *h = NULL;
409 /* At this point, we assume all the other special cases have been taken
410 * care of, so the host is at most
412 * [user[:password][;options]]@]hostname
414 * We need somewhere to put the embedded details, so do that first.
417 char *login = Curl_dyn_ptr(host);
422 ptr = strchr(login, '@');
426 /* We will now try to extract the
427 * possible login information in a string like:
428 * ftp://user:password@ftp.my.site:8021/README */
431 /* if this is a known scheme, get some details */
433 h = Curl_builtin_scheme(u->scheme, CURL_ZERO_TERMINATED);
435 /* We could use the login information in the URL so extract it. Only parse
436 options if the handler says we should. Note that 'h' might be NULL! */
437 ccode = Curl_parse_login_details(login, ptr - login - 1,
439 (h && (h->flags & PROTOPT_URLOPTIONS)) ?
442 result = CURLUE_BAD_LOGIN;
447 if(flags & CURLU_DISALLOW_USER) {
448 /* Option DISALLOW_USER is set and url contains username. */
449 result = CURLUE_USER_NOT_ALLOWED;
452 if(junkscan(userp, flags)) {
453 result = CURLUE_BAD_USER;
460 if(junkscan(passwdp, flags)) {
461 result = CURLUE_BAD_PASSWORD;
464 u->password = passwdp;
468 if(junkscan(optionsp, flags)) {
469 result = CURLUE_BAD_LOGIN;
472 u->options = optionsp;
475 /* move the name to the start of the host buffer */
476 if(Curl_dyn_tail(host, strlen(ptr)))
477 return CURLUE_OUT_OF_MEMORY;
492 UNITTEST CURLUcode Curl_parse_port(struct Curl_URL *u, struct dynbuf *host,
495 char *portptr = NULL;
498 char *hostname = Curl_dyn_ptr(host);
500 * Find the end of an IPv6 address, either on the ']' ending bracket or
501 * a percent-encoded zone index.
503 if(1 == sscanf(hostname, "[%*45[0123456789abcdefABCDEF:.]%c%n",
504 &endbracket, &len)) {
505 if(']' == endbracket)
506 portptr = &hostname[len];
507 else if('%' == endbracket) {
509 if(1 == sscanf(hostname + zonelen, "%*[^]]%c%n", &endbracket, &len)) {
510 if(']' != endbracket)
511 return CURLUE_BAD_IPV6;
512 portptr = &hostname[--zonelen + len + 1];
515 return CURLUE_BAD_IPV6;
518 return CURLUE_BAD_IPV6;
520 /* this is a RFC2732-style specified IP-address */
521 if(portptr && *portptr) {
523 return CURLUE_BAD_IPV6;
529 portptr = strchr(hostname, ':');
535 size_t keep = portptr - hostname;
537 /* Browser behavior adaptation. If there's a colon with no digits after,
538 just cut off the name there which makes us ignore the colon and just
539 use the default port. Firefox, Chrome and Safari all do that.
541 Don't do it if the URL has no scheme, to make something that looks like
544 Curl_dyn_setlen(host, keep);
547 return has_scheme ? CURLUE_OK : CURLUE_BAD_PORT_NUMBER;
549 if(!ISDIGIT(*portptr))
550 return CURLUE_BAD_PORT_NUMBER;
552 port = strtol(portptr, &rest, 10); /* Port number must be decimal */
555 return CURLUE_BAD_PORT_NUMBER;
558 return CURLUE_BAD_PORT_NUMBER;
561 /* generate a new port number string to get rid of leading zeroes etc */
562 msnprintf(portbuf, sizeof(portbuf), "%ld", port);
564 u->port = strdup(portbuf);
566 return CURLUE_OUT_OF_MEMORY;
572 static CURLUcode hostname_check(struct Curl_URL *u, char *hostname,
573 size_t hlen) /* length of hostname */
576 DEBUGASSERT(hostname);
579 return CURLUE_NO_HOST;
580 else if(hostname[0] == '[') {
581 const char *l = "0123456789abcdefABCDEF:.";
582 if(hlen < 4) /* '[::]' is the shortest possible valid string */
583 return CURLUE_BAD_IPV6;
587 if(hostname[hlen] != ']')
588 return CURLUE_BAD_IPV6;
590 /* only valid letters are ok */
591 len = strspn(hostname, l);
594 if(hostname[len] == '%') {
595 /* this could now be '%[zone id]' */
598 char *h = &hostname[len + 1];
599 /* pass '25' if present and is a url encoded percent sign */
600 if(!strncmp(h, "25", 2) && h[2] && (h[2] != ']'))
602 while(*h && (*h != ']') && (i < 15))
604 if(!i || (']' != *h))
605 /* impossible to reach? */
606 return CURLUE_MALFORMED_INPUT;
608 u->zoneid = strdup(zoneid);
610 return CURLUE_OUT_OF_MEMORY;
611 hostname[len] = ']'; /* insert end bracket */
612 hostname[len + 1] = 0; /* terminate the hostname */
615 return CURLUE_BAD_IPV6;
616 /* hostname is fine */
620 char dest[16]; /* fits a binary IPv6 address */
621 char norm[MAX_IPADR_LEN];
622 hostname[hlen] = 0; /* end the address there */
623 if(1 != Curl_inet_pton(AF_INET6, hostname, dest))
624 return CURLUE_BAD_IPV6;
626 /* check if it can be done shorter */
627 if(Curl_inet_ntop(AF_INET6, dest, norm, sizeof(norm)) &&
628 (strlen(norm) < hlen)) {
629 strcpy(hostname, norm);
631 hostname[hlen + 1] = 0;
633 hostname[hlen] = ']'; /* restore ending bracket */
638 /* letters from the second string are not ok */
639 len = strcspn(hostname, " \r\n\t/:#?!@{}[]\\$\'\"^`*<>=;,");
641 /* hostname with bad content */
642 return CURLUE_BAD_HOSTNAME;
647 #define HOSTNAME_END(x) (((x) == '/') || ((x) == '?') || ((x) == '#'))
650 * Handle partial IPv4 numerical addresses and different bases, like
651 * '16843009', '0x7f', '0x7f.1' '0177.1.1.1' etc.
653 * If the given input string is syntactically wrong or any part for example is
654 * too big, this function returns FALSE and doesn't create any output.
656 * Output the "normalized" version of that input string in plain quad decimal
657 * integers and return TRUE.
659 static bool ipv4_normalize(const char *hostname, char *outp, size_t olen)
663 const char *c = hostname;
664 unsigned long parts[4] = {0, 0, 0, 0};
669 if((*c < '0') || (*c > '9'))
670 /* most importantly this doesn't allow a leading plus or minus */
672 l = strtoul(c, &endp, 0);
674 /* overflow or nothing parsed at all */
675 if(((l == ULONG_MAX) && (errno == ERANGE)) || (endp == c))
679 /* a value larger than 32 bits */
704 /* this is deemed a valid IPv4 numerical address */
707 case 0: /* a -- 32 bits */
708 msnprintf(outp, olen, "%u.%u.%u.%u",
709 parts[0] >> 24, (parts[0] >> 16) & 0xff,
710 (parts[0] >> 8) & 0xff, parts[0] & 0xff);
712 case 1: /* a.b -- 8.24 bits */
713 if((parts[0] > 0xff) || (parts[1] > 0xffffff))
715 msnprintf(outp, olen, "%u.%u.%u.%u",
716 parts[0], (parts[1] >> 16) & 0xff,
717 (parts[1] >> 8) & 0xff, parts[1] & 0xff);
719 case 2: /* a.b.c -- 8.8.16 bits */
720 if((parts[0] > 0xff) || (parts[1] > 0xff) || (parts[2] > 0xffff))
722 msnprintf(outp, olen, "%u.%u.%u.%u",
723 parts[0], parts[1], (parts[2] >> 8) & 0xff,
726 case 3: /* a.b.c.d -- 8.8.8.8 bits */
727 if((parts[0] > 0xff) || (parts[1] > 0xff) || (parts[2] > 0xff) ||
730 msnprintf(outp, olen, "%u.%u.%u.%u",
731 parts[0], parts[1], parts[2], parts[3]);
737 /* if necessary, replace the host content with a URL decoded version */
738 static CURLUcode decode_host(struct dynbuf *host)
741 const char *hostname = Curl_dyn_ptr(host);
742 if(hostname[0] == '[')
743 /* only decode if not an ipv6 numerical */
745 per = strchr(hostname, '%');
747 /* nothing to decode */
753 CURLcode result = Curl_urldecode(hostname, 0, &decoded, &dlen,
756 return CURLUE_BAD_HOSTNAME;
757 Curl_dyn_reset(host);
758 result = Curl_dyn_addn(host, decoded, dlen);
761 return CURLUE_OUT_OF_MEMORY;
768 * "Remove Dot Segments"
769 * https://datatracker.ietf.org/doc/html/rfc3986#section-5.2.4
776 * This function gets a null-terminated path with dot and dotdot sequences
777 * passed in and strips them off according to the rules in RFC 3986 section
780 * The function handles a query part ('?' + stuff) appended but it expects
781 * that fragments ('#' + stuff) have already been cut off.
785 * an allocated dedotdotified output string
787 UNITTEST char *dedotdotify(const char *input, size_t clen);
788 UNITTEST char *dedotdotify(const char *input, size_t clen)
790 char *out = malloc(clen + 1);
792 const char *orginput = input;
795 return NULL; /* out of memory */
797 *out = 0; /* null-terminates, for inputs like "./" */
801 /* zero length input string, return that */
805 * To handle query-parts properly, we must find it and remove it during the
806 * dotdot-operation and then append it again at the end to the output
809 queryp = strchr(input, '?');
814 /* A. If the input buffer begins with a prefix of "../" or "./", then
815 remove that prefix from the input buffer; otherwise, */
817 if(!strncmp("./", input, 2)) {
821 else if(!strncmp("../", input, 3)) {
825 /* D. if the input buffer consists only of "." or "..", then remove
826 that from the input buffer; otherwise, */
828 else if(!strcmp(".", input) || !strcmp("..", input) ||
829 !strncmp(".?", input, 2) || !strncmp("..?", input, 3)) {
836 else if(*input == '/') {
837 /* B. if the input buffer begins with a prefix of "/./" or "/.", where
838 "." is a complete path segment, then replace that prefix with "/" in
839 the input buffer; otherwise, */
840 if(!strncmp("/./", input, 3)) {
844 else if(!strcmp("/.", input) || !strncmp("/.?", input, 3)) {
850 /* C. if the input buffer begins with a prefix of "/../" or "/..",
851 where ".." is a complete path segment, then replace that prefix with
852 "/" in the input buffer and remove the last segment and its
853 preceding "/" (if any) from the output buffer; otherwise, */
855 else if(!strncmp("/../", input, 4)) {
858 /* remove the last segment from the output buffer */
859 while(outptr > out) {
864 *outptr = 0; /* null-terminate where it stops */
866 else if(!strcmp("/..", input) || !strncmp("/..?", input, 4)) {
867 /* remove the last segment from the output buffer */
868 while(outptr > out) {
874 *outptr = 0; /* null-terminate where it stops */
884 /* E. move the first path segment in the input buffer to the end of
885 the output buffer, including the initial "/" character (if any) and
886 any subsequent characters up to, but not including, the next "/"
887 character or the end of the input buffer. */
890 *outptr++ = *input++;
892 } while(*input && (*input != '/') && (*input != '?'));
896 /* continue until end of input string OR, if there is a terminating
897 query part, stop there */
898 } while(*input && (!queryp || (input < queryp)));
902 /* There was a query part, append that to the output. */
903 size_t oindex = queryp - orginput;
904 qlen = strlen(&orginput[oindex]);
905 memcpy(outptr, &orginput[oindex], qlen + 1); /* include zero byte */
911 static CURLUcode parseurl(const char *url, CURLU *u, unsigned int flags)
915 bool uncpath = FALSE;
917 char *fragment = NULL;
918 char schemebuf[MAX_SCHEME_LEN + 1];
919 const char *schemep = NULL;
920 size_t schemelen = 0;
922 CURLUcode result = CURLUE_OK;
928 Curl_dyn_init(&host, CURL_MAX_INPUT_LENGTH);
930 /*************************************************************
932 ************************************************************/
933 /* allocate scratch area */
934 urllen = strlen(url);
935 if(urllen > CURL_MAX_INPUT_LENGTH) {
936 /* excessive input length */
937 result = CURLUE_MALFORMED_INPUT;
941 schemelen = Curl_is_absolute_url(url, schemebuf, sizeof(schemebuf),
942 flags & (CURLU_GUESS_SCHEME|
943 CURLU_DEFAULT_SCHEME));
945 /* handle the file: scheme */
946 if(schemelen && !strcmp(schemebuf, "file")) {
948 /* file:/ is not enough to actually be a complete file: URL */
949 result = CURLUE_BAD_FILE_URL;
953 /* path has been allocated large enough to hold this */
954 path = (char *)&url[5];
956 schemep = u->scheme = strdup("file");
958 result = CURLUE_OUT_OF_MEMORY;
962 /* Extra handling URLs with an authority component (i.e. that start with
965 * We allow omitted hostname (e.g. file:/<path>) -- valid according to
966 * RFC 8089, but not the (current) WHAT-WG URL spec.
968 if(path[0] == '/' && path[1] == '/') {
969 /* swallow the two slashes */
970 const char *ptr = &path[2];
973 * According to RFC 8089, a file: URL can be reliably dereferenced if:
975 * o it has no/blank hostname, or
977 * o the hostname matches "localhost" (case-insensitively), or
979 * o the hostname is a FQDN that resolves to this machine, or
981 * o it is an UNC String transformed to an URI (Windows only, RFC 8089
984 * For brevity, we only consider URLs with empty, "localhost", or
985 * "127.0.0.1" hostnames as local, otherwise as an UNC String.
987 * Additionally, there is an exception for URLs with a Windows drive
988 * letter in the authority (which was accidentally omitted from RFC 8089
989 * Appendix E, but believe me, it was meant to be there. --MK)
991 if(ptr[0] != '/' && !STARTS_WITH_URL_DRIVE_PREFIX(ptr)) {
992 /* the URL includes a host name, it must match "localhost" or
993 "127.0.0.1" to be valid */
994 if(checkprefix("localhost/", ptr) ||
995 checkprefix("127.0.0.1/", ptr)) {
996 ptr += 9; /* now points to the slash after the host */
1002 /* the host name, NetBIOS computer name, can not contain disallowed
1003 chars, and the delimiting slash character must be appended to the
1005 path = strpbrk(ptr, "/\\:*?\"<>|");
1006 if(!path || *path != '/') {
1007 result = CURLUE_BAD_FILE_URL;
1013 if(Curl_dyn_addn(&host, ptr, len)) {
1014 result = CURLUE_OUT_OF_MEMORY;
1020 ptr -= 2; /* now points to the // before the host in UNC */
1022 /* Invalid file://hostname/, expected localhost or 127.0.0.1 or
1024 result = CURLUE_BAD_FILE_URL;
1034 /* no host for file: URLs by default */
1035 Curl_dyn_reset(&host);
1037 #if !defined(MSDOS) && !defined(WIN32) && !defined(__CYGWIN__)
1038 /* Don't allow Windows drive letters when not in Windows.
1039 * This catches both "file:/c:" and "file:c:" */
1040 if(('/' == path[0] && STARTS_WITH_URL_DRIVE_PREFIX(&path[1])) ||
1041 STARTS_WITH_URL_DRIVE_PREFIX(path)) {
1042 /* File drive letters are only accepted in MSDOS/Windows */
1043 result = CURLUE_BAD_FILE_URL;
1047 /* If the path starts with a slash and a drive letter, ditch the slash */
1048 if('/' == path[0] && STARTS_WITH_URL_DRIVE_PREFIX(&path[1])) {
1049 /* This cannot be done with strcpy, as the memory chunks overlap! */
1063 p = &url[schemelen + 1];
1064 while(p && (*p == '/') && (i < 4)) {
1069 schemep = schemebuf;
1070 if(!Curl_builtin_scheme(schemep, CURL_ZERO_TERMINATED) &&
1071 !(flags & CURLU_NON_SUPPORT_SCHEME)) {
1072 result = CURLUE_UNSUPPORTED_SCHEME;
1076 if((i < 1) || (i>3)) {
1077 /* less than one or more than three slashes */
1078 result = CURLUE_BAD_SLASHES;
1081 if(junkscan(schemep, flags)) {
1082 result = CURLUE_BAD_SCHEME;
1089 if(!(flags & (CURLU_DEFAULT_SCHEME|CURLU_GUESS_SCHEME))) {
1090 result = CURLUE_BAD_SCHEME;
1093 if(flags & CURLU_DEFAULT_SCHEME)
1094 schemep = DEFAULT_SCHEME;
1097 * The URL was badly formatted, let's try without scheme specified.
1101 hostp = p; /* host name starts here */
1103 /* find the end of the host name + port number */
1104 while(*p && !HOSTNAME_END(*p))
1109 if(Curl_dyn_addn(&host, hostp, len)) {
1110 result = CURLUE_OUT_OF_MEMORY;
1115 if(!(flags & CURLU_NO_AUTHORITY)) {
1116 result = CURLUE_NO_HOST;
1124 u->scheme = strdup(schemep);
1126 result = CURLUE_OUT_OF_MEMORY;
1132 fragment = strchr(path, '#');
1134 fraglen = strlen(fragment);
1136 /* skip the leading '#' in the copy but include the terminating null */
1137 u->fragment = Curl_memdup(fragment + 1, fraglen);
1139 result = CURLUE_OUT_OF_MEMORY;
1143 if(junkscan(u->fragment, flags)) {
1144 result = CURLUE_BAD_FRAGMENT;
1150 query = strchr(path, '?');
1151 if(query && (!fragment || (query < fragment))) {
1152 size_t qlen = strlen(query) - fraglen; /* includes '?' */
1153 pathlen = strlen(path) - qlen - fraglen;
1155 if(qlen && (flags & CURLU_URLENCODE)) {
1157 Curl_dyn_init(&enc, CURL_MAX_INPUT_LENGTH);
1158 /* skip the leading question mark */
1159 if(urlencode_str(&enc, query + 1, qlen - 1, TRUE, TRUE)) {
1160 result = CURLUE_OUT_OF_MEMORY;
1163 u->query = Curl_dyn_ptr(&enc);
1166 u->query = Curl_memdup(query + 1, qlen);
1168 result = CURLUE_OUT_OF_MEMORY;
1171 u->query[qlen - 1] = 0;
1174 if(junkscan(u->query, flags)) {
1175 result = CURLUE_BAD_QUERY;
1180 /* single byte query */
1181 u->query = strdup("");
1183 result = CURLUE_OUT_OF_MEMORY;
1189 pathlen = strlen(path) - fraglen;
1191 if(pathlen && (flags & CURLU_URLENCODE)) {
1193 Curl_dyn_init(&enc, CURL_MAX_INPUT_LENGTH);
1194 if(urlencode_str(&enc, path, pathlen, TRUE, FALSE)) {
1195 result = CURLUE_OUT_OF_MEMORY;
1198 pathlen = Curl_dyn_len(&enc);
1199 path = u->path = Curl_dyn_ptr(&enc);
1203 /* there is no path left, unset */
1208 u->path = Curl_memdup(path, pathlen + 1);
1210 result = CURLUE_OUT_OF_MEMORY;
1213 u->path[pathlen] = 0;
1216 else if(flags & CURLU_URLENCODE)
1217 /* it might have encoded more than just the path so cut it */
1218 u->path[pathlen] = 0;
1220 if(junkscan(u->path, flags)) {
1221 result = CURLUE_BAD_PATH;
1225 if(!(flags & CURLU_PATH_AS_IS)) {
1226 /* remove ../ and ./ sequences according to RFC3986 */
1227 char *newp = dedotdotify((char *)path, pathlen);
1229 result = CURLUE_OUT_OF_MEMORY;
1237 if(Curl_dyn_len(&host)) {
1238 char normalized_ipv4[sizeof("255.255.255.255") + 1];
1241 * Parse the login details and strip them out of the host name.
1243 result = parse_hostname_login(u, &host, flags);
1245 result = Curl_parse_port(u, &host, schemelen);
1249 if(junkscan(Curl_dyn_ptr(&host), flags)) {
1250 result = CURLUE_BAD_HOSTNAME;
1254 if(ipv4_normalize(Curl_dyn_ptr(&host),
1255 normalized_ipv4, sizeof(normalized_ipv4))) {
1256 Curl_dyn_reset(&host);
1257 if(Curl_dyn_add(&host, normalized_ipv4)) {
1258 result = CURLUE_OUT_OF_MEMORY;
1263 result = decode_host(&host);
1265 result = hostname_check(u, Curl_dyn_ptr(&host), Curl_dyn_len(&host));
1270 if((flags & CURLU_GUESS_SCHEME) && !schemep) {
1271 const char *hostname = Curl_dyn_ptr(&host);
1272 /* legacy curl-style guess based on host name */
1273 if(checkprefix("ftp.", hostname))
1275 else if(checkprefix("dict.", hostname))
1277 else if(checkprefix("ldap.", hostname))
1279 else if(checkprefix("imap.", hostname))
1281 else if(checkprefix("smtp.", hostname))
1283 else if(checkprefix("pop3.", hostname))
1288 u->scheme = strdup(schemep);
1290 result = CURLUE_OUT_OF_MEMORY;
1295 else if(flags & CURLU_NO_AUTHORITY) {
1296 /* allowed to be empty. */
1297 if(Curl_dyn_add(&host, "")) {
1298 result = CURLUE_OUT_OF_MEMORY;
1303 u->host = Curl_dyn_ptr(&host);
1307 Curl_dyn_free(&host);
1313 * Parse the URL and, if successful, replace everything in the Curl_URL struct.
1315 static CURLUcode parseurl_and_replace(const char *url, CURLU *u,
1320 memset(&tmpurl, 0, sizeof(tmpurl));
1321 result = parseurl(url, &tmpurl, flags);
1331 CURLU *curl_url(void)
1333 return calloc(sizeof(struct Curl_URL), 1);
1336 void curl_url_cleanup(CURLU *u)
1344 #define DUP(dest, src, name) \
1347 dest->name = strdup(src->name); \
1353 CURLU *curl_url_dup(CURLU *in)
1355 struct Curl_URL *u = calloc(sizeof(struct Curl_URL), 1);
1359 DUP(u, in, password);
1360 DUP(u, in, options);
1365 DUP(u, in, fragment);
1366 u->portnum = in->portnum;
1370 curl_url_cleanup(u);
1374 CURLUcode curl_url_get(CURLU *u, CURLUPart what,
1375 char **part, unsigned int flags)
1378 CURLUcode ifmissing = CURLUE_UNKNOWN_PART;
1380 bool urldecode = (flags & CURLU_URLDECODE)?1:0;
1381 bool urlencode = (flags & CURLU_URLENCODE)?1:0;
1382 bool plusdecode = FALSE;
1385 return CURLUE_BAD_HANDLE;
1387 return CURLUE_BAD_PARTPOINTER;
1391 case CURLUPART_SCHEME:
1393 ifmissing = CURLUE_NO_SCHEME;
1394 urldecode = FALSE; /* never for schemes */
1396 case CURLUPART_USER:
1398 ifmissing = CURLUE_NO_USER;
1400 case CURLUPART_PASSWORD:
1402 ifmissing = CURLUE_NO_PASSWORD;
1404 case CURLUPART_OPTIONS:
1406 ifmissing = CURLUE_NO_OPTIONS;
1408 case CURLUPART_HOST:
1410 ifmissing = CURLUE_NO_HOST;
1412 case CURLUPART_ZONEID:
1414 ifmissing = CURLUE_NO_ZONEID;
1416 case CURLUPART_PORT:
1418 ifmissing = CURLUE_NO_PORT;
1419 urldecode = FALSE; /* never for port */
1420 if(!ptr && (flags & CURLU_DEFAULT_PORT) && u->scheme) {
1421 /* there's no stored port number, but asked to deliver
1422 a default one for the scheme */
1423 const struct Curl_handler *h =
1424 Curl_builtin_scheme(u->scheme, CURL_ZERO_TERMINATED);
1426 msnprintf(portbuf, sizeof(portbuf), "%u", h->defport);
1430 else if(ptr && u->scheme) {
1431 /* there is a stored port number, but ask to inhibit if
1432 it matches the default one for the scheme */
1433 const struct Curl_handler *h =
1434 Curl_builtin_scheme(u->scheme, CURL_ZERO_TERMINATED);
1435 if(h && (h->defport == u->portnum) &&
1436 (flags & CURLU_NO_DEFAULT_PORT))
1440 case CURLUPART_PATH:
1443 ptr = u->path = strdup("/");
1445 return CURLUE_OUT_OF_MEMORY;
1448 case CURLUPART_QUERY:
1450 ifmissing = CURLUE_NO_QUERY;
1451 plusdecode = urldecode;
1453 case CURLUPART_FRAGMENT:
1455 ifmissing = CURLUE_NO_FRAGMENT;
1457 case CURLUPART_URL: {
1460 char *options = u->options;
1461 char *port = u->port;
1462 char *allochost = NULL;
1463 if(u->scheme && strcasecompare("file", u->scheme)) {
1464 url = aprintf("file://%s%s%s",
1466 u->fragment? "#": "",
1467 u->fragment? u->fragment : "");
1470 return CURLUE_NO_HOST;
1472 const struct Curl_handler *h = NULL;
1475 else if(flags & CURLU_DEFAULT_SCHEME)
1476 scheme = (char *) DEFAULT_SCHEME;
1478 return CURLUE_NO_SCHEME;
1480 h = Curl_builtin_scheme(scheme, CURL_ZERO_TERMINATED);
1481 if(!port && (flags & CURLU_DEFAULT_PORT)) {
1482 /* there's no stored port number, but asked to deliver
1483 a default one for the scheme */
1485 msnprintf(portbuf, sizeof(portbuf), "%u", h->defport);
1490 /* there is a stored port number, but asked to inhibit if it matches
1491 the default one for the scheme */
1492 if(h && (h->defport == u->portnum) &&
1493 (flags & CURLU_NO_DEFAULT_PORT))
1497 if(h && !(h->flags & PROTOPT_URLOPTIONS))
1500 if(u->host[0] == '[') {
1502 /* make it '[ host %25 zoneid ]' */
1504 size_t hostlen = strlen(u->host);
1505 Curl_dyn_init(&enc, CURL_MAX_INPUT_LENGTH);
1506 if(Curl_dyn_addf(&enc, "%.*s%%25%s]", (int)hostlen - 1, u->host,
1508 return CURLUE_OUT_OF_MEMORY;
1509 allochost = Curl_dyn_ptr(&enc);
1512 else if(urlencode) {
1513 allochost = curl_easy_escape(NULL, u->host, 0);
1515 return CURLUE_OUT_OF_MEMORY;
1518 /* only encode '%' in output host name */
1519 char *host = u->host;
1520 bool percent = FALSE;
1521 /* first, count number of percents present in the name */
1529 /* if there were percent(s), encode the host name */
1533 Curl_dyn_init(&enc, CURL_MAX_INPUT_LENGTH);
1537 result = Curl_dyn_addn(&enc, "%25", 3);
1539 result = Curl_dyn_addn(&enc, host, 1);
1541 return CURLUE_OUT_OF_MEMORY;
1545 u->host = Curl_dyn_ptr(&enc);
1549 url = aprintf("%s://%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
1551 u->user ? u->user : "",
1552 u->password ? ":": "",
1553 u->password ? u->password : "",
1555 options ? options : "",
1556 (u->user || u->password || options) ? "@": "",
1557 allochost ? allochost : u->host,
1560 (u->path && (u->path[0] != '/')) ? "/": "",
1561 u->path ? u->path : "/",
1562 (u->query && u->query[0]) ? "?": "",
1563 (u->query && u->query[0]) ? u->query : "",
1564 u->fragment? "#": "",
1565 u->fragment? u->fragment : "");
1569 return CURLUE_OUT_OF_MEMORY;
1578 size_t partlen = strlen(ptr);
1580 *part = Curl_memdup(ptr, partlen + 1);
1582 return CURLUE_OUT_OF_MEMORY;
1584 /* convert + to space */
1586 for(i = 0; i < partlen; ++plus, i++) {
1594 /* this unconditional rejection of control bytes is documented
1596 CURLcode res = Curl_urldecode(*part, 0, &decoded, &dlen, REJECT_CTRL);
1600 return CURLUE_URLDECODE;
1607 Curl_dyn_init(&enc, CURL_MAX_INPUT_LENGTH);
1608 if(urlencode_str(&enc, *part, partlen, TRUE,
1609 what == CURLUPART_QUERY))
1610 return CURLUE_OUT_OF_MEMORY;
1612 *part = Curl_dyn_ptr(&enc);
1621 CURLUcode curl_url_set(CURLU *u, CURLUPart what,
1622 const char *part, unsigned int flags)
1624 char **storep = NULL;
1626 bool urlencode = (flags & CURLU_URLENCODE)? 1 : 0;
1627 bool plusencode = FALSE;
1628 bool urlskipslash = FALSE;
1629 bool appendquery = FALSE;
1630 bool equalsencode = FALSE;
1633 return CURLUE_BAD_HANDLE;
1635 /* setting a part to NULL clears it */
1639 case CURLUPART_SCHEME:
1640 storep = &u->scheme;
1642 case CURLUPART_USER:
1645 case CURLUPART_PASSWORD:
1646 storep = &u->password;
1648 case CURLUPART_OPTIONS:
1649 storep = &u->options;
1651 case CURLUPART_HOST:
1654 case CURLUPART_ZONEID:
1655 storep = &u->zoneid;
1657 case CURLUPART_PORT:
1661 case CURLUPART_PATH:
1664 case CURLUPART_QUERY:
1667 case CURLUPART_FRAGMENT:
1668 storep = &u->fragment;
1671 return CURLUE_UNKNOWN_PART;
1673 if(storep && *storep) {
1674 Curl_safefree(*storep);
1678 memset(u, 0, sizeof(struct Curl_URL));
1684 case CURLUPART_SCHEME:
1685 if(strlen(part) > MAX_SCHEME_LEN)
1687 return CURLUE_BAD_SCHEME;
1688 if(!(flags & CURLU_NON_SUPPORT_SCHEME) &&
1689 /* verify that it is a fine scheme */
1690 !Curl_builtin_scheme(part, CURL_ZERO_TERMINATED))
1691 return CURLUE_UNSUPPORTED_SCHEME;
1692 storep = &u->scheme;
1693 urlencode = FALSE; /* never */
1695 case CURLUPART_USER:
1698 case CURLUPART_PASSWORD:
1699 storep = &u->password;
1701 case CURLUPART_OPTIONS:
1702 storep = &u->options;
1704 case CURLUPART_HOST: {
1705 size_t len = strcspn(part, " \r\n");
1706 if(strlen(part) != len)
1707 /* hostname with bad content */
1708 return CURLUE_BAD_HOSTNAME;
1710 Curl_safefree(u->zoneid);
1713 case CURLUPART_ZONEID:
1714 storep = &u->zoneid;
1716 case CURLUPART_PORT:
1719 urlencode = FALSE; /* never */
1720 port = strtol(part, &endp, 10); /* Port number must be decimal */
1721 if((port <= 0) || (port > 0xffff))
1722 return CURLUE_BAD_PORT_NUMBER;
1724 /* weirdly provided number, not good! */
1725 return CURLUE_BAD_PORT_NUMBER;
1729 case CURLUPART_PATH:
1730 urlskipslash = TRUE;
1733 case CURLUPART_QUERY:
1734 plusencode = urlencode;
1735 appendquery = (flags & CURLU_APPENDQUERY)?1:0;
1736 equalsencode = appendquery;
1739 case CURLUPART_FRAGMENT:
1740 storep = &u->fragment;
1742 case CURLUPART_URL: {
1744 * Allow a new URL to replace the existing (if any) contents.
1746 * If the existing contents is enough for a URL, allow a relative URL to
1753 /* if the new thing is absolute or the old one is not
1754 * (we could not get an absolute url in 'oldurl'),
1755 * then replace the existing with the new. */
1756 if(Curl_is_absolute_url(part, NULL, 0,
1757 flags & (CURLU_GUESS_SCHEME|
1758 CURLU_DEFAULT_SCHEME))
1759 || curl_url_get(u, CURLUPART_URL, &oldurl, flags)) {
1760 return parseurl_and_replace(part, u, flags);
1763 /* apply the relative part to create a new URL
1764 * and replace the existing one with it. */
1765 redired_url = concat_url(oldurl, part);
1768 return CURLUE_OUT_OF_MEMORY;
1770 result = parseurl_and_replace(redired_url, u, flags);
1775 return CURLUE_UNKNOWN_PART;
1777 DEBUGASSERT(storep);
1779 const char *newp = part;
1780 size_t nalloc = strlen(part);
1782 if(nalloc > CURL_MAX_INPUT_LENGTH)
1783 /* excessive input length */
1784 return CURLUE_MALFORMED_INPUT;
1787 const unsigned char *i;
1790 Curl_dyn_init(&enc, nalloc * 3 + 1);
1792 for(i = (const unsigned char *)part; *i; i++) {
1794 if((*i == ' ') && plusencode) {
1795 result = Curl_dyn_addn(&enc, "+", 1);
1797 return CURLUE_OUT_OF_MEMORY;
1799 else if(Curl_isunreserved(*i) ||
1800 ((*i == '/') && urlskipslash) ||
1801 ((*i == '=') && equalsencode)) {
1802 if((*i == '=') && equalsencode)
1803 /* only skip the first equals sign */
1804 equalsencode = FALSE;
1805 result = Curl_dyn_addn(&enc, i, 1);
1807 return CURLUE_OUT_OF_MEMORY;
1810 result = Curl_dyn_addf(&enc, "%%%02x", *i);
1812 return CURLUE_OUT_OF_MEMORY;
1815 newp = Curl_dyn_ptr(&enc);
1819 newp = strdup(part);
1821 return CURLUE_OUT_OF_MEMORY;
1824 /* make sure percent encoded are lower case */
1825 if((*p == '%') && ISXDIGIT(p[1]) && ISXDIGIT(p[2]) &&
1826 (ISUPPER(p[1]) || ISUPPER(p[2]))) {
1827 p[1] = Curl_raw_tolower(p[1]);
1828 p[2] = Curl_raw_tolower(p[2]);
1837 /* Append the 'newp' string onto the old query. Add a '&' separator if
1838 none is present at the end of the existing query already */
1840 size_t querylen = u->query ? strlen(u->query) : 0;
1841 bool addamperand = querylen && (u->query[querylen -1] != '&');
1844 Curl_dyn_init(&enc, CURL_MAX_INPUT_LENGTH);
1846 if(Curl_dyn_addn(&enc, u->query, querylen)) /* add original query */
1850 if(Curl_dyn_addn(&enc, "&", 1))
1853 if(Curl_dyn_add(&enc, newp))
1857 *storep = Curl_dyn_ptr(&enc);
1861 return CURLUE_OUT_OF_MEMORY;
1865 if(what == CURLUPART_HOST) {
1866 size_t n = strlen(newp);
1867 if(!n && (flags & CURLU_NO_AUTHORITY)) {
1868 /* Skip hostname check, it's allowed to be empty. */
1871 if(hostname_check(u, (char *)newp, n)) {
1873 return CURLUE_BAD_HOSTNAME;
1879 *storep = (char *)newp;
1881 /* set after the string, to make it not assigned if the allocation above