Utilities/cmcurl/lib/urlapi.c

   1 /***************************************************************************
   2  *                                  _   _ ____  _
   3  *  Project                     ___| | | |  _ \| |
   4  *                             / __| | | | |_) | |
   5  *                            | (__| |_| |  _ <| |___
   6  *                             \___|\___/|_| \_\_____|
   7  *
   8  * Copyright (C) 1998 - 2022, Daniel Stenberg, <daniel@haxx.se>, et al.
   9  *
  10  * This software is licensed as described in the file COPYING, which
  11  * you should have received as part of this distribution. The terms
  12  * are also available at https://curl.se/docs/copyright.html.
  13  *
  14  * You may opt to use, copy, modify, merge, publish, distribute and/or sell
  15  * copies of the Software, and permit persons to whom the Software is
  16  * furnished to do so, under the terms of the COPYING file.
  17  *
  18  * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
  19  * KIND, either express or implied.
  20  *
  21  * SPDX-License-Identifier: curl
  22  *
  23  ***************************************************************************/
  24
  25 #include "curl_setup.h"
  26
  27 #include "urldata.h"
  28 #include "urlapi-int.h"
  29 #include "strcase.h"
  30 #include "url.h"
  31 #include "escape.h"
  32 #include "curl_ctype.h"
  33 #include "inet_pton.h"
  34 #include "inet_ntop.h"
  35 #include "strdup.h"
  36
  37 /* The last 3 #include files should be in this order */
  38 #include "curl_printf.h"
  39 #include "curl_memory.h"
  40 #include "memdebug.h"
  41
  42   /* MSDOS/Windows style drive prefix, eg c: in c:foo */
  43 #define STARTS_WITH_DRIVE_PREFIX(str) \
  44   ((('a' <= str[0] && str[0] <= 'z') || \
  45     ('A' <= str[0] && str[0] <= 'Z')) && \
  46    (str[1] == ':'))
  47
  48   /* MSDOS/Windows style drive prefix, optionally with
  49    * a '|' instead of ':', followed by a slash or NUL */
  50 #define STARTS_WITH_URL_DRIVE_PREFIX(str) \
  51   ((('a' <= (str)[0] && (str)[0] <= 'z') || \
  52     ('A' <= (str)[0] && (str)[0] <= 'Z')) && \
  53    ((str)[1] == ':' || (str)[1] == '|') && \
  54    ((str)[2] == '/' || (str)[2] == '\\' || (str)[2] == 0))
  55
  56 /* scheme is not URL encoded, the longest libcurl supported ones are... */
  57 #define MAX_SCHEME_LEN 40
  58
  59 /* Internal representation of CURLU. Point to URL-encoded strings. */
  60 struct Curl_URL {
  61   char *scheme;
  62   char *user;
  63   char *password;
  64   char *options; /* IMAP only? */
  65   char *host;
  66   char *zoneid; /* for numerical IPv6 addresses */
  67   char *port;
  68   char *path;
  69   char *query;
  70   char *fragment;
  71   long portnum; /* the numerical version */
  72 };
  73
  74 #define DEFAULT_SCHEME "https"
  75
  76 static void free_urlhandle(struct Curl_URL *u)
  77 {
  78   free(u->scheme);
  79   free(u->user);
  80   free(u->password);
  81   free(u->options);
  82   free(u->host);
  83   free(u->zoneid);
  84   free(u->port);
  85   free(u->path);
  86   free(u->query);
  87   free(u->fragment);
  88 }
  89
  90 /*
  91  * Find the separator at the end of the host name, or the '?' in cases like
  92  * http://www.url.com?id=2380
  93  */
  94 static const char *find_host_sep(const char *url)
  95 {
  96   const char *sep;
  97   const char *query;
  98
  99   /* Find the start of the hostname */
 100   sep = strstr(url, "//");
 101   if(!sep)
 102     sep = url;
 103   else
 104     sep += 2;
 105
 106   query = strchr(sep, '?');
 107   sep = strchr(sep, '/');
 108
 109   if(!sep)
 110     sep = url + strlen(url);
 111
 112   if(!query)
 113     query = url + strlen(url);
 114
 115   return sep < query ? sep : query;
 116 }
 117
 118 /*
 119  * Decide in an encoding-independent manner whether a character in a URL must
 120  * be escaped. This is used in urlencode_str().
 121  */
 122 static bool urlchar_needs_escaping(int c)
 123 {
 124   return !(ISCNTRL(c) || ISSPACE(c) || ISGRAPH(c));
 125 }
 126
 127 /* urlencode_str() writes data into an output dynbuf and URL-encodes the
 128  * spaces in the source URL accordingly.
 129  *
 130  * URL encoding should be skipped for host names, otherwise IDN resolution
 131  * will fail.
 132  */
 133 static CURLUcode urlencode_str(struct dynbuf *o, const char *url,
 134                                size_t len, bool relative,
 135                                bool query)
 136 {
 137   /* we must add this with whitespace-replacing */
 138   bool left = !query;
 139   const unsigned char *iptr;
 140   const unsigned char *host_sep = (const unsigned char *) url;
 141
 142   if(!relative)
 143     host_sep = (const unsigned char *) find_host_sep(url);
 144
 145   for(iptr = (unsigned char *)url;    /* read from here */
 146       len; iptr++, len--) {
 147
 148     if(iptr < host_sep) {
 149       if(Curl_dyn_addn(o, iptr, 1))
 150         return CURLUE_OUT_OF_MEMORY;
 151       continue;
 152     }
 153
 154     if(*iptr == ' ') {
 155       if(left) {
 156         if(Curl_dyn_addn(o, "%20", 3))
 157           return CURLUE_OUT_OF_MEMORY;
 158       }
 159       else {
 160         if(Curl_dyn_addn(o, "+", 1))
 161           return CURLUE_OUT_OF_MEMORY;
 162       }
 163       continue;
 164     }
 165
 166     if(*iptr == '?')
 167       left = FALSE;
 168
 169     if(urlchar_needs_escaping(*iptr)) {
 170       if(Curl_dyn_addf(o, "%%%02x", *iptr))
 171         return CURLUE_OUT_OF_MEMORY;
 172     }
 173     else {
 174       if(Curl_dyn_addn(o, iptr, 1))
 175         return CURLUE_OUT_OF_MEMORY;
 176     }
 177   }
 178
 179   return CURLUE_OK;
 180 }
 181
 182 /*
 183  * Returns the length of the scheme if the given URL is absolute (as opposed
 184  * to relative). Stores the scheme in the buffer if TRUE and 'buf' is
 185  * non-NULL. The buflen must be larger than MAX_SCHEME_LEN if buf is set.
 186  *
 187  * If 'guess_scheme' is TRUE, it means the URL might be provided without
 188  * scheme.
 189  */
 190 size_t Curl_is_absolute_url(const char *url, char *buf, size_t buflen,
 191                             bool guess_scheme)
 192 {
 193   int i;
 194   DEBUGASSERT(!buf || (buflen > MAX_SCHEME_LEN));
 195   (void)buflen; /* only used in debug-builds */
 196   if(buf)
 197     buf[0] = 0; /* always leave a defined value in buf */
 198 #ifdef WIN32
 199   if(guess_scheme && STARTS_WITH_DRIVE_PREFIX(url))
 200     return 0;
 201 #endif
 202   for(i = 0; i < MAX_SCHEME_LEN; ++i) {
 203     char s = url[i];
 204     if(s && (ISALNUM(s) || (s == '+') || (s == '-') || (s == '.') )) {
 205       /* RFC 3986 3.1 explains:
 206         scheme      = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
 207       */
 208     }
 209     else {
 210       break;
 211     }
 212   }
 213   if(i && (url[i] == ':') && ((url[i + 1] == '/') || !guess_scheme)) {
 214     /* If this does not guess scheme, the scheme always ends with the colon so
 215        that this also detects data: URLs etc. In guessing mode, data: could
 216        be the host name "data" with a specified port number. */
 217
 218     /* the length of the scheme is the name part only */
 219     size_t len = i;
 220     if(buf) {
 221       buf[i] = 0;
 222       while(i--) {
 223         buf[i] = Curl_raw_tolower(url[i]);
 224       }
 225     }
 226     return len;
 227   }
 228   return 0;
 229 }
 230
 231 /*
 232  * Concatenate a relative URL to a base URL making it absolute.
 233  * URL-encodes any spaces.
 234  * The returned pointer must be freed by the caller unless NULL
 235  * (returns NULL on out of memory).
 236  *
 237  * Note that this function destroys the 'base' string.
 238  */
 239 static char *concat_url(char *base, const char *relurl)
 240 {
 241   /***
 242    TRY to append this new path to the old URL
 243    to the right of the host part. Oh crap, this is doomed to cause
 244    problems in the future...
 245   */
 246   struct dynbuf newest;
 247   char *protsep;
 248   char *pathsep;
 249   bool host_changed = FALSE;
 250   const char *useurl = relurl;
 251
 252   /* protsep points to the start of the host name */
 253   protsep = strstr(base, "//");
 254   if(!protsep)
 255     protsep = base;
 256   else
 257     protsep += 2; /* pass the slashes */
 258
 259   if('/' != relurl[0]) {
 260     int level = 0;
 261
 262     /* First we need to find out if there's a ?-letter in the URL,
 263        and cut it and the right-side of that off */
 264     pathsep = strchr(protsep, '?');
 265     if(pathsep)
 266       *pathsep = 0;
 267
 268     /* we have a relative path to append to the last slash if there's one
 269        available, or if the new URL is just a query string (starts with a
 270        '?')  we append the new one at the end of the entire currently worked
 271        out URL */
 272     if(useurl[0] != '?') {
 273       pathsep = strrchr(protsep, '/');
 274       if(pathsep)
 275         *pathsep = 0;
 276     }
 277
 278     /* Check if there's any slash after the host name, and if so, remember
 279        that position instead */
 280     pathsep = strchr(protsep, '/');
 281     if(pathsep)
 282       protsep = pathsep + 1;
 283     else
 284       protsep = NULL;
 285
 286     /* now deal with one "./" or any amount of "../" in the newurl
 287        and act accordingly */
 288
 289     if((useurl[0] == '.') && (useurl[1] == '/'))
 290       useurl += 2; /* just skip the "./" */
 291
 292     while((useurl[0] == '.') &&
 293           (useurl[1] == '.') &&
 294           (useurl[2] == '/')) {
 295       level++;
 296       useurl += 3; /* pass the "../" */
 297     }
 298
 299     if(protsep) {
 300       while(level--) {
 301         /* cut off one more level from the right of the original URL */
 302         pathsep = strrchr(protsep, '/');
 303         if(pathsep)
 304           *pathsep = 0;
 305         else {
 306           *protsep = 0;
 307           break;
 308         }
 309       }
 310     }
 311   }
 312   else {
 313     /* We got a new absolute path for this server */
 314
 315     if(relurl[1] == '/') {
 316       /* the new URL starts with //, just keep the protocol part from the
 317          original one */
 318       *protsep = 0;
 319       useurl = &relurl[2]; /* we keep the slashes from the original, so we
 320                               skip the new ones */
 321       host_changed = TRUE;
 322     }
 323     else {
 324       /* cut off the original URL from the first slash, or deal with URLs
 325          without slash */
 326       pathsep = strchr(protsep, '/');
 327       if(pathsep) {
 328         /* When people use badly formatted URLs, such as
 329            "http://www.url.com?dir=/home/daniel" we must not use the first
 330            slash, if there's a ?-letter before it! */
 331         char *sep = strchr(protsep, '?');
 332         if(sep && (sep < pathsep))
 333           pathsep = sep;
 334         *pathsep = 0;
 335       }
 336       else {
 337         /* There was no slash. Now, since we might be operating on a badly
 338            formatted URL, such as "http://www.url.com?id=2380" which doesn't
 339            use a slash separator as it is supposed to, we need to check for a
 340            ?-letter as well! */
 341         pathsep = strchr(protsep, '?');
 342         if(pathsep)
 343           *pathsep = 0;
 344       }
 345     }
 346   }
 347
 348   Curl_dyn_init(&newest, CURL_MAX_INPUT_LENGTH);
 349
 350   /* copy over the root url part */
 351   if(Curl_dyn_add(&newest, base))
 352     return NULL;
 353
 354   /* check if we need to append a slash */
 355   if(('/' == useurl[0]) || (protsep && !*protsep) || ('?' == useurl[0]))
 356     ;
 357   else {
 358     if(Curl_dyn_addn(&newest, "/", 1))
 359       return NULL;
 360   }
 361
 362   /* then append the new piece on the right side */
 363   urlencode_str(&newest, useurl, strlen(useurl), !host_changed, FALSE);
 364
 365   return Curl_dyn_ptr(&newest);
 366 }
 367
 368 /* scan for byte values < 31 or 127 */
 369 static bool junkscan(const char *part, unsigned int flags)
 370 {
 371   if(part) {
 372     static const char badbytes[]={
 373       /* */ 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
 374       0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
 375       0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
 376       0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f,
 377       0x7f, 0x00 /* null-terminate */
 378     };
 379     size_t n = strlen(part);
 380     size_t nfine = strcspn(part, badbytes);
 381     if(nfine != n)
 382       /* since we don't know which part is scanned, return a generic error
 383          code */
 384       return TRUE;
 385     if(!(flags & CURLU_ALLOW_SPACE) && strchr(part, ' '))
 386       return TRUE;
 387   }
 388   return FALSE;
 389 }
 390
 391 /*
 392  * parse_hostname_login()
 393  *
 394  * Parse the login details (user name, password and options) from the URL and
 395  * strip them out of the host name
 396  *
 397  */
 398 static CURLUcode parse_hostname_login(struct Curl_URL *u,
 399                                       struct dynbuf *host,
 400                                       unsigned int flags)
 401 {
 402   CURLUcode result = CURLUE_OK;
 403   CURLcode ccode;
 404   char *userp = NULL;
 405   char *passwdp = NULL;
 406   char *optionsp = NULL;
 407   const struct Curl_handler *h = NULL;
 408
 409   /* At this point, we assume all the other special cases have been taken
 410    * care of, so the host is at most
 411    *
 412    *   [user[:password][;options]]@]hostname
 413    *
 414    * We need somewhere to put the embedded details, so do that first.
 415    */
 416
 417   char *login = Curl_dyn_ptr(host);
 418   char *ptr;
 419
 420   DEBUGASSERT(login);
 421
 422   ptr = strchr(login, '@');
 423   if(!ptr)
 424     goto out;
 425
 426   /* We will now try to extract the
 427    * possible login information in a string like:
 428    * ftp://user:password@ftp.my.site:8021/README */
 429   ptr++;
 430
 431   /* if this is a known scheme, get some details */
 432   if(u->scheme)
 433     h = Curl_builtin_scheme(u->scheme, CURL_ZERO_TERMINATED);
 434
 435   /* We could use the login information in the URL so extract it. Only parse
 436      options if the handler says we should. Note that 'h' might be NULL! */
 437   ccode = Curl_parse_login_details(login, ptr - login - 1,
 438                                    &userp, &passwdp,
 439                                    (h && (h->flags & PROTOPT_URLOPTIONS)) ?
 440                                    &optionsp:NULL);
 441   if(ccode) {
 442     result = CURLUE_BAD_LOGIN;
 443     goto out;
 444   }
 445
 446   if(userp) {
 447     if(flags & CURLU_DISALLOW_USER) {
 448       /* Option DISALLOW_USER is set and url contains username. */
 449       result = CURLUE_USER_NOT_ALLOWED;
 450       goto out;
 451     }
 452     if(junkscan(userp, flags)) {
 453       result = CURLUE_BAD_USER;
 454       goto out;
 455     }
 456     u->user = userp;
 457   }
 458
 459   if(passwdp) {
 460     if(junkscan(passwdp, flags)) {
 461       result = CURLUE_BAD_PASSWORD;
 462       goto out;
 463     }
 464     u->password = passwdp;
 465   }
 466
 467   if(optionsp) {
 468     if(junkscan(optionsp, flags)) {
 469       result = CURLUE_BAD_LOGIN;
 470       goto out;
 471     }
 472     u->options = optionsp;
 473   }
 474
 475   /* move the name to the start of the host buffer */
 476   if(Curl_dyn_tail(host, strlen(ptr)))
 477     return CURLUE_OUT_OF_MEMORY;
 478
 479   return CURLUE_OK;
 480   out:
 481
 482   free(userp);
 483   free(passwdp);
 484   free(optionsp);
 485   u->user = NULL;
 486   u->password = NULL;
 487   u->options = NULL;
 488
 489   return result;
 490 }
 491
 492 UNITTEST CURLUcode Curl_parse_port(struct Curl_URL *u, struct dynbuf *host,
 493                                    bool has_scheme)
 494 {
 495   char *portptr = NULL;
 496   char endbracket;
 497   int len;
 498   char *hostname = Curl_dyn_ptr(host);
 499   /*
 500    * Find the end of an IPv6 address, either on the ']' ending bracket or
 501    * a percent-encoded zone index.
 502    */
 503   if(1 == sscanf(hostname, "[%*45[0123456789abcdefABCDEF:.]%c%n",
 504                  &endbracket, &len)) {
 505     if(']' == endbracket)
 506       portptr = &hostname[len];
 507     else if('%' == endbracket) {
 508       int zonelen = len;
 509       if(1 == sscanf(hostname + zonelen, "%*[^]]%c%n", &endbracket, &len)) {
 510         if(']' != endbracket)
 511           return CURLUE_BAD_IPV6;
 512         portptr = &hostname[--zonelen + len + 1];
 513       }
 514       else
 515         return CURLUE_BAD_IPV6;
 516     }
 517     else
 518       return CURLUE_BAD_IPV6;
 519
 520     /* this is a RFC2732-style specified IP-address */
 521     if(portptr && *portptr) {
 522       if(*portptr != ':')
 523         return CURLUE_BAD_IPV6;
 524     }
 525     else
 526       portptr = NULL;
 527   }
 528   else
 529     portptr = strchr(hostname, ':');
 530
 531   if(portptr) {
 532     char *rest;
 533     long port;
 534     char portbuf[7];
 535     size_t keep = portptr - hostname;
 536
 537     /* Browser behavior adaptation. If there's a colon with no digits after,
 538        just cut off the name there which makes us ignore the colon and just
 539        use the default port. Firefox, Chrome and Safari all do that.
 540
 541        Don't do it if the URL has no scheme, to make something that looks like
 542        a scheme not work!
 543     */
 544     Curl_dyn_setlen(host, keep);
 545     portptr++;
 546     if(!*portptr)
 547       return has_scheme ? CURLUE_OK : CURLUE_BAD_PORT_NUMBER;
 548
 549     if(!ISDIGIT(*portptr))
 550       return CURLUE_BAD_PORT_NUMBER;
 551
 552     port = strtol(portptr, &rest, 10);  /* Port number must be decimal */
 553
 554     if(port > 0xffff)
 555       return CURLUE_BAD_PORT_NUMBER;
 556
 557     if(rest[0])
 558       return CURLUE_BAD_PORT_NUMBER;
 559
 560     *rest = 0;
 561     /* generate a new port number string to get rid of leading zeroes etc */
 562     msnprintf(portbuf, sizeof(portbuf), "%ld", port);
 563     u->portnum = port;
 564     u->port = strdup(portbuf);
 565     if(!u->port)
 566       return CURLUE_OUT_OF_MEMORY;
 567   }
 568
 569   return CURLUE_OK;
 570 }
 571
 572 static CURLUcode hostname_check(struct Curl_URL *u, char *hostname,
 573                                 size_t hlen) /* length of hostname */
 574 {
 575   size_t len;
 576   DEBUGASSERT(hostname);
 577
 578   if(!hostname[0])
 579     return CURLUE_NO_HOST;
 580   else if(hostname[0] == '[') {
 581     const char *l = "0123456789abcdefABCDEF:.";
 582     if(hlen < 4) /* '[::]' is the shortest possible valid string */
 583       return CURLUE_BAD_IPV6;
 584     hostname++;
 585     hlen -= 2;
 586
 587     if(hostname[hlen] != ']')
 588       return CURLUE_BAD_IPV6;
 589
 590     /* only valid letters are ok */
 591     len = strspn(hostname, l);
 592     if(hlen != len) {
 593       hlen = len;
 594       if(hostname[len] == '%') {
 595         /* this could now be '%[zone id]' */
 596         char zoneid[16];
 597         int i = 0;
 598         char *h = &hostname[len + 1];
 599         /* pass '25' if present and is a url encoded percent sign */
 600         if(!strncmp(h, "25", 2) && h[2] && (h[2] != ']'))
 601           h += 2;
 602         while(*h && (*h != ']') && (i < 15))
 603           zoneid[i++] = *h++;
 604         if(!i || (']' != *h))
 605           /* impossible to reach? */
 606           return CURLUE_MALFORMED_INPUT;
 607         zoneid[i] = 0;
 608         u->zoneid = strdup(zoneid);
 609         if(!u->zoneid)
 610           return CURLUE_OUT_OF_MEMORY;
 611         hostname[len] = ']'; /* insert end bracket */
 612         hostname[len + 1] = 0; /* terminate the hostname */
 613       }
 614       else
 615         return CURLUE_BAD_IPV6;
 616       /* hostname is fine */
 617     }
 618 #ifdef ENABLE_IPV6
 619     {
 620       char dest[16]; /* fits a binary IPv6 address */
 621       char norm[MAX_IPADR_LEN];
 622       hostname[hlen] = 0; /* end the address there */
 623       if(1 != Curl_inet_pton(AF_INET6, hostname, dest))
 624         return CURLUE_BAD_IPV6;
 625
 626       /* check if it can be done shorter */
 627       if(Curl_inet_ntop(AF_INET6, dest, norm, sizeof(norm)) &&
 628          (strlen(norm) < hlen)) {
 629         strcpy(hostname, norm);
 630         hlen = strlen(norm);
 631         hostname[hlen + 1] = 0;
 632       }
 633       hostname[hlen] = ']'; /* restore ending bracket */
 634     }
 635 #endif
 636   }
 637   else {
 638     /* letters from the second string are not ok */
 639     len = strcspn(hostname, " \r\n\t/:#?!@{}[]\\$\'\"^`*<>=;,");
 640     if(hlen != len)
 641       /* hostname with bad content */
 642       return CURLUE_BAD_HOSTNAME;
 643   }
 644   return CURLUE_OK;
 645 }
 646
 647 #define HOSTNAME_END(x) (((x) == '/') || ((x) == '?') || ((x) == '#'))
 648
 649 /*
 650  * Handle partial IPv4 numerical addresses and different bases, like
 651  * '16843009', '0x7f', '0x7f.1' '0177.1.1.1' etc.
 652  *
 653  * If the given input string is syntactically wrong or any part for example is
 654  * too big, this function returns FALSE and doesn't create any output.
 655  *
 656  * Output the "normalized" version of that input string in plain quad decimal
 657  * integers and return TRUE.
 658  */
 659 static bool ipv4_normalize(const char *hostname, char *outp, size_t olen)
 660 {
 661   bool done = FALSE;
 662   int n = 0;
 663   const char *c = hostname;
 664   unsigned long parts[4] = {0, 0, 0, 0};
 665
 666   while(!done) {
 667     char *endp;
 668     unsigned long l;
 669     if((*c < '0') || (*c > '9'))
 670       /* most importantly this doesn't allow a leading plus or minus */
 671       return FALSE;
 672     l = strtoul(c, &endp, 0);
 673
 674     /* overflow or nothing parsed at all */
 675     if(((l == ULONG_MAX) && (errno == ERANGE)) ||  (endp == c))
 676       return FALSE;
 677
 678 #if SIZEOF_LONG > 4
 679     /* a value larger than 32 bits */
 680     if(l > UINT_MAX)
 681       return FALSE;
 682 #endif
 683
 684     parts[n] = l;
 685     c = endp;
 686
 687     switch (*c) {
 688     case '.' :
 689       if(n == 3)
 690         return FALSE;
 691       n++;
 692       c++;
 693       break;
 694
 695     case '\0':
 696       done = TRUE;
 697       break;
 698
 699     default:
 700       return FALSE;
 701     }
 702   }
 703
 704   /* this is deemed a valid IPv4 numerical address */
 705
 706   switch(n) {
 707   case 0: /* a -- 32 bits */
 708     msnprintf(outp, olen, "%u.%u.%u.%u",
 709               parts[0] >> 24, (parts[0] >> 16) & 0xff,
 710               (parts[0] >> 8) & 0xff, parts[0] & 0xff);
 711     break;
 712   case 1: /* a.b -- 8.24 bits */
 713     if((parts[0] > 0xff) || (parts[1] > 0xffffff))
 714       return FALSE;
 715     msnprintf(outp, olen, "%u.%u.%u.%u",
 716               parts[0], (parts[1] >> 16) & 0xff,
 717               (parts[1] >> 8) & 0xff, parts[1] & 0xff);
 718     break;
 719   case 2: /* a.b.c -- 8.8.16 bits */
 720     if((parts[0] > 0xff) || (parts[1] > 0xff) || (parts[2] > 0xffff))
 721       return FALSE;
 722     msnprintf(outp, olen, "%u.%u.%u.%u",
 723               parts[0], parts[1], (parts[2] >> 8) & 0xff,
 724               parts[2] & 0xff);
 725     break;
 726   case 3: /* a.b.c.d -- 8.8.8.8 bits */
 727     if((parts[0] > 0xff) || (parts[1] > 0xff) || (parts[2] > 0xff) ||
 728        (parts[3] > 0xff))
 729       return FALSE;
 730     msnprintf(outp, olen, "%u.%u.%u.%u",
 731               parts[0], parts[1], parts[2], parts[3]);
 732     break;
 733   }
 734   return TRUE;
 735 }
 736
 737 /* if necessary, replace the host content with a URL decoded version */
 738 static CURLUcode decode_host(struct dynbuf *host)
 739 {
 740   char *per = NULL;
 741   const char *hostname = Curl_dyn_ptr(host);
 742   if(hostname[0] == '[')
 743     /* only decode if not an ipv6 numerical */
 744     return CURLUE_OK;
 745   per = strchr(hostname, '%');
 746   if(!per)
 747     /* nothing to decode */
 748     return CURLUE_OK;
 749   else {
 750     /* encoded */
 751     size_t dlen;
 752     char *decoded;
 753     CURLcode result = Curl_urldecode(hostname, 0, &decoded, &dlen,
 754                                      REJECT_CTRL);
 755     if(result)
 756       return CURLUE_BAD_HOSTNAME;
 757     Curl_dyn_reset(host);
 758     result = Curl_dyn_addn(host, decoded, dlen);
 759     free(decoded);
 760     if(result)
 761       return CURLUE_OUT_OF_MEMORY;
 762   }
 763
 764   return CURLUE_OK;
 765 }
 766
 767 /*
 768  * "Remove Dot Segments"
 769  * https://datatracker.ietf.org/doc/html/rfc3986#section-5.2.4
 770  */
 771
 772 /*
 773  * dedotdotify()
 774  * @unittest: 1395
 775  *
 776  * This function gets a null-terminated path with dot and dotdot sequences
 777  * passed in and strips them off according to the rules in RFC 3986 section
 778  * 5.2.4.
 779  *
 780  * The function handles a query part ('?' + stuff) appended but it expects
 781  * that fragments ('#' + stuff) have already been cut off.
 782  *
 783  * RETURNS
 784  *
 785  * an allocated dedotdotified output string
 786  */
 787 UNITTEST char *dedotdotify(const char *input, size_t clen);
 788 UNITTEST char *dedotdotify(const char *input, size_t clen)
 789 {
 790   char *out = malloc(clen + 1);
 791   char *outptr;
 792   const char *orginput = input;
 793   char *queryp;
 794   if(!out)
 795     return NULL; /* out of memory */
 796
 797   *out = 0; /* null-terminates, for inputs like "./" */
 798   outptr = out;
 799
 800   if(!*input)
 801     /* zero length input string, return that */
 802     return out;
 803
 804   /*
 805    * To handle query-parts properly, we must find it and remove it during the
 806    * dotdot-operation and then append it again at the end to the output
 807    * string.
 808    */
 809   queryp = strchr(input, '?');
 810
 811   do {
 812     bool dotdot = TRUE;
 813     if(*input == '.') {
 814       /*  A.  If the input buffer begins with a prefix of "../" or "./", then
 815           remove that prefix from the input buffer; otherwise, */
 816
 817       if(!strncmp("./", input, 2)) {
 818         input += 2;
 819         clen -= 2;
 820       }
 821       else if(!strncmp("../", input, 3)) {
 822         input += 3;
 823         clen -= 3;
 824       }
 825       /*  D.  if the input buffer consists only of "." or "..", then remove
 826           that from the input buffer; otherwise, */
 827
 828       else if(!strcmp(".", input) || !strcmp("..", input) ||
 829               !strncmp(".?", input, 2) || !strncmp("..?", input, 3)) {
 830         *out = 0;
 831         break;
 832       }
 833       else
 834         dotdot = FALSE;
 835     }
 836     else if(*input == '/') {
 837       /*  B.  if the input buffer begins with a prefix of "/./" or "/.", where
 838           "."  is a complete path segment, then replace that prefix with "/" in
 839           the input buffer; otherwise, */
 840       if(!strncmp("/./", input, 3)) {
 841         input += 2;
 842         clen -= 2;
 843       }
 844       else if(!strcmp("/.", input) || !strncmp("/.?", input, 3)) {
 845         *outptr++ = '/';
 846         *outptr = 0;
 847         break;
 848       }
 849
 850       /*  C.  if the input buffer begins with a prefix of "/../" or "/..",
 851           where ".." is a complete path segment, then replace that prefix with
 852           "/" in the input buffer and remove the last segment and its
 853           preceding "/" (if any) from the output buffer; otherwise, */
 854
 855       else if(!strncmp("/../", input, 4)) {
 856         input += 3;
 857         clen -= 3;
 858         /* remove the last segment from the output buffer */
 859         while(outptr > out) {
 860           outptr--;
 861           if(*outptr == '/')
 862             break;
 863         }
 864         *outptr = 0; /* null-terminate where it stops */
 865       }
 866       else if(!strcmp("/..", input) || !strncmp("/..?", input, 4)) {
 867         /* remove the last segment from the output buffer */
 868         while(outptr > out) {
 869           outptr--;
 870           if(*outptr == '/')
 871             break;
 872         }
 873         *outptr++ = '/';
 874         *outptr = 0; /* null-terminate where it stops */
 875         break;
 876       }
 877       else
 878         dotdot = FALSE;
 879     }
 880     else
 881       dotdot = FALSE;
 882
 883     if(!dotdot) {
 884       /*  E.  move the first path segment in the input buffer to the end of
 885           the output buffer, including the initial "/" character (if any) and
 886           any subsequent characters up to, but not including, the next "/"
 887           character or the end of the input buffer. */
 888
 889       do {
 890         *outptr++ = *input++;
 891         clen--;
 892       } while(*input && (*input != '/') && (*input != '?'));
 893       *outptr = 0;
 894     }
 895
 896     /* continue until end of input string OR, if there is a terminating
 897        query part, stop there */
 898   } while(*input && (!queryp || (input < queryp)));
 899
 900   if(queryp) {
 901     size_t qlen;
 902     /* There was a query part, append that to the output. */
 903     size_t oindex = queryp - orginput;
 904     qlen = strlen(&orginput[oindex]);
 905     memcpy(outptr, &orginput[oindex], qlen + 1); /* include zero byte */
 906   }
 907
 908   return out;
 909 }
 910
 911 static CURLUcode parseurl(const char *url, CURLU *u, unsigned int flags)
 912 {
 913   const char *path;
 914   size_t pathlen;
 915   bool uncpath = FALSE;
 916   char *query = NULL;
 917   char *fragment = NULL;
 918   char schemebuf[MAX_SCHEME_LEN + 1];
 919   const char *schemep = NULL;
 920   size_t schemelen = 0;
 921   size_t urllen;
 922   CURLUcode result = CURLUE_OK;
 923   size_t fraglen = 0;
 924   struct dynbuf host;
 925
 926   DEBUGASSERT(url);
 927
 928   Curl_dyn_init(&host, CURL_MAX_INPUT_LENGTH);
 929
 930   /*************************************************************
 931    * Parse the URL.
 932    ************************************************************/
 933   /* allocate scratch area */
 934   urllen = strlen(url);
 935   if(urllen > CURL_MAX_INPUT_LENGTH) {
 936     /* excessive input length */
 937     result = CURLUE_MALFORMED_INPUT;
 938     goto fail;
 939   }
 940
 941   schemelen = Curl_is_absolute_url(url, schemebuf, sizeof(schemebuf),
 942                                    flags & (CURLU_GUESS_SCHEME|
 943                                             CURLU_DEFAULT_SCHEME));
 944
 945   /* handle the file: scheme */
 946   if(schemelen && !strcmp(schemebuf, "file")) {
 947     if(urllen <= 6) {
 948       /* file:/ is not enough to actually be a complete file: URL */
 949       result = CURLUE_BAD_FILE_URL;
 950       goto fail;
 951     }
 952
 953     /* path has been allocated large enough to hold this */
 954     path = (char *)&url[5];
 955
 956     schemep = u->scheme = strdup("file");
 957     if(!u->scheme) {
 958       result = CURLUE_OUT_OF_MEMORY;
 959       goto fail;
 960     }
 961
 962     /* Extra handling URLs with an authority component (i.e. that start with
 963      * "file://")
 964      *
 965      * We allow omitted hostname (e.g. file:/<path>) -- valid according to
 966      * RFC 8089, but not the (current) WHAT-WG URL spec.
 967      */
 968     if(path[0] == '/' && path[1] == '/') {
 969       /* swallow the two slashes */
 970       const char *ptr = &path[2];
 971
 972       /*
 973        * According to RFC 8089, a file: URL can be reliably dereferenced if:
 974        *
 975        *  o it has no/blank hostname, or
 976        *
 977        *  o the hostname matches "localhost" (case-insensitively), or
 978        *
 979        *  o the hostname is a FQDN that resolves to this machine, or
 980        *
 981        *  o it is an UNC String transformed to an URI (Windows only, RFC 8089
 982        *    Appendix E.3).
 983        *
 984        * For brevity, we only consider URLs with empty, "localhost", or
 985        * "127.0.0.1" hostnames as local, otherwise as an UNC String.
 986        *
 987        * Additionally, there is an exception for URLs with a Windows drive
 988        * letter in the authority (which was accidentally omitted from RFC 8089
 989        * Appendix E, but believe me, it was meant to be there. --MK)
 990        */
 991       if(ptr[0] != '/' && !STARTS_WITH_URL_DRIVE_PREFIX(ptr)) {
 992         /* the URL includes a host name, it must match "localhost" or
 993            "127.0.0.1" to be valid */
 994         if(checkprefix("localhost/", ptr) ||
 995            checkprefix("127.0.0.1/", ptr)) {
 996           ptr += 9; /* now points to the slash after the host */
 997         }
 998         else {
 999 #if defined(WIN32)
1000           size_t len;
1001
1002           /* the host name, NetBIOS computer name, can not contain disallowed
1003              chars, and the delimiting slash character must be appended to the
1004              host name */
1005           path = strpbrk(ptr, "/\\:*?\"<>|");
1006           if(!path || *path != '/') {
1007             result = CURLUE_BAD_FILE_URL;
1008             goto fail;
1009           }
1010
1011           len = path - ptr;
1012           if(len) {
1013             if(Curl_dyn_addn(&host, ptr, len)) {
1014               result = CURLUE_OUT_OF_MEMORY;
1015               goto fail;
1016             }
1017             uncpath = TRUE;
1018           }
1019
1020           ptr -= 2; /* now points to the // before the host in UNC */
1021 #else
1022           /* Invalid file://hostname/, expected localhost or 127.0.0.1 or
1023              none */
1024           result = CURLUE_BAD_FILE_URL;
1025           goto fail;
1026 #endif
1027         }
1028       }
1029
1030       path = ptr;
1031     }
1032
1033     if(!uncpath)
1034       /* no host for file: URLs by default */
1035       Curl_dyn_reset(&host);
1036
1037 #if !defined(MSDOS) && !defined(WIN32) && !defined(__CYGWIN__)
1038     /* Don't allow Windows drive letters when not in Windows.
1039      * This catches both "file:/c:" and "file:c:" */
1040     if(('/' == path[0] && STARTS_WITH_URL_DRIVE_PREFIX(&path[1])) ||
1041        STARTS_WITH_URL_DRIVE_PREFIX(path)) {
1042       /* File drive letters are only accepted in MSDOS/Windows */
1043       result = CURLUE_BAD_FILE_URL;
1044       goto fail;
1045     }
1046 #else
1047     /* If the path starts with a slash and a drive letter, ditch the slash */
1048     if('/' == path[0] && STARTS_WITH_URL_DRIVE_PREFIX(&path[1])) {
1049       /* This cannot be done with strcpy, as the memory chunks overlap! */
1050       path++;
1051     }
1052 #endif
1053
1054   }
1055   else {
1056     /* clear path */
1057     const char *p;
1058     const char *hostp;
1059     size_t len;
1060
1061     if(schemelen) {
1062       int i = 0;
1063       p = &url[schemelen + 1];
1064       while(p && (*p == '/') && (i < 4)) {
1065         p++;
1066         i++;
1067       }
1068
1069       schemep = schemebuf;
1070       if(!Curl_builtin_scheme(schemep, CURL_ZERO_TERMINATED) &&
1071          !(flags & CURLU_NON_SUPPORT_SCHEME)) {
1072         result = CURLUE_UNSUPPORTED_SCHEME;
1073         goto fail;
1074       }
1075
1076       if((i < 1) || (i>3)) {
1077         /* less than one or more than three slashes */
1078         result = CURLUE_BAD_SLASHES;
1079         goto fail;
1080       }
1081       if(junkscan(schemep, flags)) {
1082         result = CURLUE_BAD_SCHEME;
1083         goto fail;
1084       }
1085     }
1086     else {
1087       /* no scheme! */
1088
1089       if(!(flags & (CURLU_DEFAULT_SCHEME|CURLU_GUESS_SCHEME))) {
1090         result = CURLUE_BAD_SCHEME;
1091         goto fail;
1092       }
1093       if(flags & CURLU_DEFAULT_SCHEME)
1094         schemep = DEFAULT_SCHEME;
1095
1096       /*
1097        * The URL was badly formatted, let's try without scheme specified.
1098        */
1099       p = url;
1100     }
1101     hostp = p; /* host name starts here */
1102
1103     /* find the end of the host name + port number */
1104     while(*p && !HOSTNAME_END(*p))
1105       p++;
1106
1107     len = p - hostp;
1108     if(len) {
1109       if(Curl_dyn_addn(&host, hostp, len)) {
1110         result = CURLUE_OUT_OF_MEMORY;
1111         goto fail;
1112       }
1113     }
1114     else {
1115       if(!(flags & CURLU_NO_AUTHORITY)) {
1116         result = CURLUE_NO_HOST;
1117         goto fail;
1118       }
1119     }
1120
1121     path = (char *)p;
1122
1123     if(schemep) {
1124       u->scheme = strdup(schemep);
1125       if(!u->scheme) {
1126         result = CURLUE_OUT_OF_MEMORY;
1127         goto fail;
1128       }
1129     }
1130   }
1131
1132   fragment = strchr(path, '#');
1133   if(fragment) {
1134     fraglen = strlen(fragment);
1135     if(fraglen > 1) {
1136       /* skip the leading '#' in the copy but include the terminating null */
1137       u->fragment = Curl_memdup(fragment + 1, fraglen);
1138       if(!u->fragment) {
1139         result = CURLUE_OUT_OF_MEMORY;
1140         goto fail;
1141       }
1142
1143       if(junkscan(u->fragment, flags)) {
1144         result = CURLUE_BAD_FRAGMENT;
1145         goto fail;
1146       }
1147     }
1148   }
1149
1150   query = strchr(path, '?');
1151   if(query && (!fragment || (query < fragment))) {
1152     size_t qlen = strlen(query) - fraglen; /* includes '?' */
1153     pathlen = strlen(path) - qlen - fraglen;
1154     if(qlen > 1) {
1155       if(qlen && (flags & CURLU_URLENCODE)) {
1156         struct dynbuf enc;
1157         Curl_dyn_init(&enc, CURL_MAX_INPUT_LENGTH);
1158         /* skip the leading question mark */
1159         if(urlencode_str(&enc, query + 1, qlen - 1, TRUE, TRUE)) {
1160           result = CURLUE_OUT_OF_MEMORY;
1161           goto fail;
1162         }
1163         u->query = Curl_dyn_ptr(&enc);
1164       }
1165       else {
1166         u->query = Curl_memdup(query + 1, qlen);
1167         if(!u->query) {
1168           result = CURLUE_OUT_OF_MEMORY;
1169           goto fail;
1170         }
1171         u->query[qlen - 1] = 0;
1172       }
1173
1174       if(junkscan(u->query, flags)) {
1175         result = CURLUE_BAD_QUERY;
1176         goto fail;
1177       }
1178     }
1179     else {
1180       /* single byte query */
1181       u->query = strdup("");
1182       if(!u->query) {
1183         result = CURLUE_OUT_OF_MEMORY;
1184         goto fail;
1185       }
1186     }
1187   }
1188   else
1189     pathlen = strlen(path) - fraglen;
1190
1191   if(pathlen && (flags & CURLU_URLENCODE)) {
1192     struct dynbuf enc;
1193     Curl_dyn_init(&enc, CURL_MAX_INPUT_LENGTH);
1194     if(urlencode_str(&enc, path, pathlen, TRUE, FALSE)) {
1195       result = CURLUE_OUT_OF_MEMORY;
1196       goto fail;
1197     }
1198     pathlen = Curl_dyn_len(&enc);
1199     path = u->path = Curl_dyn_ptr(&enc);
1200   }
1201
1202   if(!pathlen) {
1203     /* there is no path left, unset */
1204     path = NULL;
1205   }
1206   else {
1207     if(!u->path) {
1208       u->path = Curl_memdup(path, pathlen + 1);
1209       if(!u->path) {
1210         result = CURLUE_OUT_OF_MEMORY;
1211         goto fail;
1212       }
1213       u->path[pathlen] = 0;
1214       path = u->path;
1215     }
1216     else if(flags & CURLU_URLENCODE)
1217       /* it might have encoded more than just the path so cut it */
1218       u->path[pathlen] = 0;
1219
1220     if(junkscan(u->path, flags)) {
1221       result = CURLUE_BAD_PATH;
1222       goto fail;
1223     }
1224
1225     if(!(flags & CURLU_PATH_AS_IS)) {
1226       /* remove ../ and ./ sequences according to RFC3986 */
1227       char *newp = dedotdotify((char *)path, pathlen);
1228       if(!newp) {
1229         result = CURLUE_OUT_OF_MEMORY;
1230         goto fail;
1231       }
1232       free(u->path);
1233       u->path = newp;
1234     }
1235   }
1236
1237   if(Curl_dyn_len(&host)) {
1238     char normalized_ipv4[sizeof("255.255.255.255") + 1];
1239
1240     /*
1241      * Parse the login details and strip them out of the host name.
1242      */
1243     result = parse_hostname_login(u, &host, flags);
1244     if(!result)
1245       result = Curl_parse_port(u, &host, schemelen);
1246     if(result)
1247       goto fail;
1248
1249     if(junkscan(Curl_dyn_ptr(&host), flags)) {
1250       result = CURLUE_BAD_HOSTNAME;
1251       goto fail;
1252     }
1253
1254     if(ipv4_normalize(Curl_dyn_ptr(&host),
1255                       normalized_ipv4, sizeof(normalized_ipv4))) {
1256       Curl_dyn_reset(&host);
1257       if(Curl_dyn_add(&host, normalized_ipv4)) {
1258         result = CURLUE_OUT_OF_MEMORY;
1259         goto fail;
1260       }
1261     }
1262     else {
1263       result = decode_host(&host);
1264       if(!result)
1265         result = hostname_check(u, Curl_dyn_ptr(&host), Curl_dyn_len(&host));
1266       if(result)
1267         goto fail;
1268     }
1269
1270     if((flags & CURLU_GUESS_SCHEME) && !schemep) {
1271       const char *hostname = Curl_dyn_ptr(&host);
1272       /* legacy curl-style guess based on host name */
1273       if(checkprefix("ftp.", hostname))
1274         schemep = "ftp";
1275       else if(checkprefix("dict.", hostname))
1276         schemep = "dict";
1277       else if(checkprefix("ldap.", hostname))
1278         schemep = "ldap";
1279       else if(checkprefix("imap.", hostname))
1280         schemep = "imap";
1281       else if(checkprefix("smtp.", hostname))
1282         schemep = "smtp";
1283       else if(checkprefix("pop3.", hostname))
1284         schemep = "pop3";
1285       else
1286         schemep = "http";
1287
1288       u->scheme = strdup(schemep);
1289       if(!u->scheme) {
1290         result = CURLUE_OUT_OF_MEMORY;
1291         goto fail;
1292       }
1293     }
1294   }
1295   else if(flags & CURLU_NO_AUTHORITY) {
1296     /* allowed to be empty. */
1297     if(Curl_dyn_add(&host, "")) {
1298       result = CURLUE_OUT_OF_MEMORY;
1299       goto fail;
1300     }
1301   }
1302
1303   u->host = Curl_dyn_ptr(&host);
1304
1305   return result;
1306   fail:
1307   Curl_dyn_free(&host);
1308   free_urlhandle(u);
1309   return result;
1310 }
1311
1312 /*
1313  * Parse the URL and, if successful, replace everything in the Curl_URL struct.
1314  */
1315 static CURLUcode parseurl_and_replace(const char *url, CURLU *u,
1316                                       unsigned int flags)
1317 {
1318   CURLUcode result;
1319   CURLU tmpurl;
1320   memset(&tmpurl, 0, sizeof(tmpurl));
1321   result = parseurl(url, &tmpurl, flags);
1322   if(!result) {
1323     free_urlhandle(u);
1324     *u = tmpurl;
1325   }
1326   return result;
1327 }
1328
1329 /*
1330  */
1331 CURLU *curl_url(void)
1332 {
1333   return calloc(sizeof(struct Curl_URL), 1);
1334 }
1335
1336 void curl_url_cleanup(CURLU *u)
1337 {
1338   if(u) {
1339     free_urlhandle(u);
1340     free(u);
1341   }
1342 }
1343
1344 #define DUP(dest, src, name)                    \
1345   do {                                          \
1346     if(src->name) {                             \
1347       dest->name = strdup(src->name);           \
1348       if(!dest->name)                           \
1349         goto fail;                              \
1350     }                                           \
1351   } while(0)
1352
1353 CURLU *curl_url_dup(CURLU *in)
1354 {
1355   struct Curl_URL *u = calloc(sizeof(struct Curl_URL), 1);
1356   if(u) {
1357     DUP(u, in, scheme);
1358     DUP(u, in, user);
1359     DUP(u, in, password);
1360     DUP(u, in, options);
1361     DUP(u, in, host);
1362     DUP(u, in, port);
1363     DUP(u, in, path);
1364     DUP(u, in, query);
1365     DUP(u, in, fragment);
1366     u->portnum = in->portnum;
1367   }
1368   return u;
1369   fail:
1370   curl_url_cleanup(u);
1371   return NULL;
1372 }
1373
1374 CURLUcode curl_url_get(CURLU *u, CURLUPart what,
1375                        char **part, unsigned int flags)
1376 {
1377   char *ptr;
1378   CURLUcode ifmissing = CURLUE_UNKNOWN_PART;
1379   char portbuf[7];
1380   bool urldecode = (flags & CURLU_URLDECODE)?1:0;
1381   bool urlencode = (flags & CURLU_URLENCODE)?1:0;
1382   bool plusdecode = FALSE;
1383   (void)flags;
1384   if(!u)
1385     return CURLUE_BAD_HANDLE;
1386   if(!part)
1387     return CURLUE_BAD_PARTPOINTER;
1388   *part = NULL;
1389
1390   switch(what) {
1391   case CURLUPART_SCHEME:
1392     ptr = u->scheme;
1393     ifmissing = CURLUE_NO_SCHEME;
1394     urldecode = FALSE; /* never for schemes */
1395     break;
1396   case CURLUPART_USER:
1397     ptr = u->user;
1398     ifmissing = CURLUE_NO_USER;
1399     break;
1400   case CURLUPART_PASSWORD:
1401     ptr = u->password;
1402     ifmissing = CURLUE_NO_PASSWORD;
1403     break;
1404   case CURLUPART_OPTIONS:
1405     ptr = u->options;
1406     ifmissing = CURLUE_NO_OPTIONS;
1407     break;
1408   case CURLUPART_HOST:
1409     ptr = u->host;
1410     ifmissing = CURLUE_NO_HOST;
1411     break;
1412   case CURLUPART_ZONEID:
1413     ptr = u->zoneid;
1414     ifmissing = CURLUE_NO_ZONEID;
1415     break;
1416   case CURLUPART_PORT:
1417     ptr = u->port;
1418     ifmissing = CURLUE_NO_PORT;
1419     urldecode = FALSE; /* never for port */
1420     if(!ptr && (flags & CURLU_DEFAULT_PORT) && u->scheme) {
1421       /* there's no stored port number, but asked to deliver
1422          a default one for the scheme */
1423       const struct Curl_handler *h =
1424         Curl_builtin_scheme(u->scheme, CURL_ZERO_TERMINATED);
1425       if(h) {
1426         msnprintf(portbuf, sizeof(portbuf), "%u", h->defport);
1427         ptr = portbuf;
1428       }
1429     }
1430     else if(ptr && u->scheme) {
1431       /* there is a stored port number, but ask to inhibit if
1432          it matches the default one for the scheme */
1433       const struct Curl_handler *h =
1434         Curl_builtin_scheme(u->scheme, CURL_ZERO_TERMINATED);
1435       if(h && (h->defport == u->portnum) &&
1436          (flags & CURLU_NO_DEFAULT_PORT))
1437         ptr = NULL;
1438     }
1439     break;
1440   case CURLUPART_PATH:
1441     ptr = u->path;
1442     if(!ptr) {
1443       ptr = u->path = strdup("/");
1444       if(!u->path)
1445         return CURLUE_OUT_OF_MEMORY;
1446     }
1447     break;
1448   case CURLUPART_QUERY:
1449     ptr = u->query;
1450     ifmissing = CURLUE_NO_QUERY;
1451     plusdecode = urldecode;
1452     break;
1453   case CURLUPART_FRAGMENT:
1454     ptr = u->fragment;
1455     ifmissing = CURLUE_NO_FRAGMENT;
1456     break;
1457   case CURLUPART_URL: {
1458     char *url;
1459     char *scheme;
1460     char *options = u->options;
1461     char *port = u->port;
1462     char *allochost = NULL;
1463     if(u->scheme && strcasecompare("file", u->scheme)) {
1464       url = aprintf("file://%s%s%s",
1465                     u->path,
1466                     u->fragment? "#": "",
1467                     u->fragment? u->fragment : "");
1468     }
1469     else if(!u->host)
1470       return CURLUE_NO_HOST;
1471     else {
1472       const struct Curl_handler *h = NULL;
1473       if(u->scheme)
1474         scheme = u->scheme;
1475       else if(flags & CURLU_DEFAULT_SCHEME)
1476         scheme = (char *) DEFAULT_SCHEME;
1477       else
1478         return CURLUE_NO_SCHEME;
1479
1480       h = Curl_builtin_scheme(scheme, CURL_ZERO_TERMINATED);
1481       if(!port && (flags & CURLU_DEFAULT_PORT)) {
1482         /* there's no stored port number, but asked to deliver
1483            a default one for the scheme */
1484         if(h) {
1485           msnprintf(portbuf, sizeof(portbuf), "%u", h->defport);
1486           port = portbuf;
1487         }
1488       }
1489       else if(port) {
1490         /* there is a stored port number, but asked to inhibit if it matches
1491            the default one for the scheme */
1492         if(h && (h->defport == u->portnum) &&
1493            (flags & CURLU_NO_DEFAULT_PORT))
1494           port = NULL;
1495       }
1496
1497       if(h && !(h->flags & PROTOPT_URLOPTIONS))
1498         options = NULL;
1499
1500       if(u->host[0] == '[') {
1501         if(u->zoneid) {
1502           /* make it '[ host %25 zoneid ]' */
1503           struct dynbuf enc;
1504           size_t hostlen = strlen(u->host);
1505           Curl_dyn_init(&enc, CURL_MAX_INPUT_LENGTH);
1506           if(Curl_dyn_addf(&enc, "%.*s%%25%s]", (int)hostlen - 1, u->host,
1507                            u->zoneid))
1508             return CURLUE_OUT_OF_MEMORY;
1509           allochost = Curl_dyn_ptr(&enc);
1510         }
1511       }
1512       else if(urlencode) {
1513         allochost = curl_easy_escape(NULL, u->host, 0);
1514         if(!allochost)
1515           return CURLUE_OUT_OF_MEMORY;
1516       }
1517       else {
1518         /* only encode '%' in output host name */
1519         char *host = u->host;
1520         bool percent = FALSE;
1521         /* first, count number of percents present in the name */
1522         while(*host) {
1523           if(*host == '%') {
1524             percent = TRUE;
1525             break;
1526           }
1527           host++;
1528         }
1529         /* if there were percent(s), encode the host name */
1530         if(percent) {
1531           struct dynbuf enc;
1532           CURLcode result;
1533           Curl_dyn_init(&enc, CURL_MAX_INPUT_LENGTH);
1534           host = u->host;
1535           while(*host) {
1536             if(*host == '%')
1537               result = Curl_dyn_addn(&enc, "%25", 3);
1538             else
1539               result = Curl_dyn_addn(&enc, host, 1);
1540             if(result)
1541               return CURLUE_OUT_OF_MEMORY;
1542             host++;
1543           }
1544           free(u->host);
1545           u->host = Curl_dyn_ptr(&enc);
1546         }
1547       }
1548
1549       url = aprintf("%s://%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
1550                     scheme,
1551                     u->user ? u->user : "",
1552                     u->password ? ":": "",
1553                     u->password ? u->password : "",
1554                     options ? ";" : "",
1555                     options ? options : "",
1556                     (u->user || u->password || options) ? "@": "",
1557                     allochost ? allochost : u->host,
1558                     port ? ":": "",
1559                     port ? port : "",
1560                     (u->path && (u->path[0] != '/')) ? "/": "",
1561                     u->path ? u->path : "/",
1562                     (u->query && u->query[0]) ? "?": "",
1563                     (u->query && u->query[0]) ? u->query : "",
1564                     u->fragment? "#": "",
1565                     u->fragment? u->fragment : "");
1566       free(allochost);
1567     }
1568     if(!url)
1569       return CURLUE_OUT_OF_MEMORY;
1570     *part = url;
1571     return CURLUE_OK;
1572   }
1573   default:
1574     ptr = NULL;
1575     break;
1576   }
1577   if(ptr) {
1578     size_t partlen = strlen(ptr);
1579     size_t i = 0;
1580     *part = Curl_memdup(ptr, partlen + 1);
1581     if(!*part)
1582       return CURLUE_OUT_OF_MEMORY;
1583     if(plusdecode) {
1584       /* convert + to space */
1585       char *plus = *part;
1586       for(i = 0; i < partlen; ++plus, i++) {
1587         if(*plus == '+')
1588           *plus = ' ';
1589       }
1590     }
1591     if(urldecode) {
1592       char *decoded;
1593       size_t dlen;
1594       /* this unconditional rejection of control bytes is documented
1595          API behavior */
1596       CURLcode res = Curl_urldecode(*part, 0, &decoded, &dlen, REJECT_CTRL);
1597       free(*part);
1598       if(res) {
1599         *part = NULL;
1600         return CURLUE_URLDECODE;
1601       }
1602       *part = decoded;
1603       partlen = dlen;
1604     }
1605     if(urlencode) {
1606       struct dynbuf enc;
1607       Curl_dyn_init(&enc, CURL_MAX_INPUT_LENGTH);
1608       if(urlencode_str(&enc, *part, partlen, TRUE,
1609                        what == CURLUPART_QUERY))
1610         return CURLUE_OUT_OF_MEMORY;
1611       free(*part);
1612       *part = Curl_dyn_ptr(&enc);
1613     }
1614
1615     return CURLUE_OK;
1616   }
1617   else
1618     return ifmissing;
1619 }
1620
1621 CURLUcode curl_url_set(CURLU *u, CURLUPart what,
1622                        const char *part, unsigned int flags)
1623 {
1624   char **storep = NULL;
1625   long port = 0;
1626   bool urlencode = (flags & CURLU_URLENCODE)? 1 : 0;
1627   bool plusencode = FALSE;
1628   bool urlskipslash = FALSE;
1629   bool appendquery = FALSE;
1630   bool equalsencode = FALSE;
1631
1632   if(!u)
1633     return CURLUE_BAD_HANDLE;
1634   if(!part) {
1635     /* setting a part to NULL clears it */
1636     switch(what) {
1637     case CURLUPART_URL:
1638       break;
1639     case CURLUPART_SCHEME:
1640       storep = &u->scheme;
1641       break;
1642     case CURLUPART_USER:
1643       storep = &u->user;
1644       break;
1645     case CURLUPART_PASSWORD:
1646       storep = &u->password;
1647       break;
1648     case CURLUPART_OPTIONS:
1649       storep = &u->options;
1650       break;
1651     case CURLUPART_HOST:
1652       storep = &u->host;
1653       break;
1654     case CURLUPART_ZONEID:
1655       storep = &u->zoneid;
1656       break;
1657     case CURLUPART_PORT:
1658       u->portnum = 0;
1659       storep = &u->port;
1660       break;
1661     case CURLUPART_PATH:
1662       storep = &u->path;
1663       break;
1664     case CURLUPART_QUERY:
1665       storep = &u->query;
1666       break;
1667     case CURLUPART_FRAGMENT:
1668       storep = &u->fragment;
1669       break;
1670     default:
1671       return CURLUE_UNKNOWN_PART;
1672     }
1673     if(storep && *storep) {
1674       Curl_safefree(*storep);
1675     }
1676     else if(!storep) {
1677       free_urlhandle(u);
1678       memset(u, 0, sizeof(struct Curl_URL));
1679     }
1680     return CURLUE_OK;
1681   }
1682
1683   switch(what) {
1684   case CURLUPART_SCHEME:
1685     if(strlen(part) > MAX_SCHEME_LEN)
1686       /* too long */
1687       return CURLUE_BAD_SCHEME;
1688     if(!(flags & CURLU_NON_SUPPORT_SCHEME) &&
1689        /* verify that it is a fine scheme */
1690        !Curl_builtin_scheme(part, CURL_ZERO_TERMINATED))
1691       return CURLUE_UNSUPPORTED_SCHEME;
1692     storep = &u->scheme;
1693     urlencode = FALSE; /* never */
1694     break;
1695   case CURLUPART_USER:
1696     storep = &u->user;
1697     break;
1698   case CURLUPART_PASSWORD:
1699     storep = &u->password;
1700     break;
1701   case CURLUPART_OPTIONS:
1702     storep = &u->options;
1703     break;
1704   case CURLUPART_HOST: {
1705     size_t len = strcspn(part, " \r\n");
1706     if(strlen(part) != len)
1707       /* hostname with bad content */
1708       return CURLUE_BAD_HOSTNAME;
1709     storep = &u->host;
1710     Curl_safefree(u->zoneid);
1711     break;
1712   }
1713   case CURLUPART_ZONEID:
1714     storep = &u->zoneid;
1715     break;
1716   case CURLUPART_PORT:
1717   {
1718     char *endp;
1719     urlencode = FALSE; /* never */
1720     port = strtol(part, &endp, 10);  /* Port number must be decimal */
1721     if((port <= 0) || (port > 0xffff))
1722       return CURLUE_BAD_PORT_NUMBER;
1723     if(*endp)
1724       /* weirdly provided number, not good! */
1725       return CURLUE_BAD_PORT_NUMBER;
1726     storep = &u->port;
1727   }
1728   break;
1729   case CURLUPART_PATH:
1730     urlskipslash = TRUE;
1731     storep = &u->path;
1732     break;
1733   case CURLUPART_QUERY:
1734     plusencode = urlencode;
1735     appendquery = (flags & CURLU_APPENDQUERY)?1:0;
1736     equalsencode = appendquery;
1737     storep = &u->query;
1738     break;
1739   case CURLUPART_FRAGMENT:
1740     storep = &u->fragment;
1741     break;
1742   case CURLUPART_URL: {
1743     /*
1744      * Allow a new URL to replace the existing (if any) contents.
1745      *
1746      * If the existing contents is enough for a URL, allow a relative URL to
1747      * replace it.
1748      */
1749     CURLUcode result;
1750     char *oldurl;
1751     char *redired_url;
1752
1753     /* if the new thing is absolute or the old one is not
1754      * (we could not get an absolute url in 'oldurl'),
1755      * then replace the existing with the new. */
1756     if(Curl_is_absolute_url(part, NULL, 0,
1757                             flags & (CURLU_GUESS_SCHEME|
1758                                      CURLU_DEFAULT_SCHEME))
1759        || curl_url_get(u, CURLUPART_URL, &oldurl, flags)) {
1760       return parseurl_and_replace(part, u, flags);
1761     }
1762
1763     /* apply the relative part to create a new URL
1764      * and replace the existing one with it. */
1765     redired_url = concat_url(oldurl, part);
1766     free(oldurl);
1767     if(!redired_url)
1768       return CURLUE_OUT_OF_MEMORY;
1769
1770     result = parseurl_and_replace(redired_url, u, flags);
1771     free(redired_url);
1772     return result;
1773   }
1774   default:
1775     return CURLUE_UNKNOWN_PART;
1776   }
1777   DEBUGASSERT(storep);
1778   {
1779     const char *newp = part;
1780     size_t nalloc = strlen(part);
1781
1782     if(nalloc > CURL_MAX_INPUT_LENGTH)
1783       /* excessive input length */
1784       return CURLUE_MALFORMED_INPUT;
1785
1786     if(urlencode) {
1787       const unsigned char *i;
1788       struct dynbuf enc;
1789
1790       Curl_dyn_init(&enc, nalloc * 3 + 1);
1791
1792       for(i = (const unsigned char *)part; *i; i++) {
1793         CURLcode result;
1794         if((*i == ' ') && plusencode) {
1795           result = Curl_dyn_addn(&enc, "+", 1);
1796           if(result)
1797             return CURLUE_OUT_OF_MEMORY;
1798         }
1799         else if(Curl_isunreserved(*i) ||
1800                 ((*i == '/') && urlskipslash) ||
1801                 ((*i == '=') && equalsencode)) {
1802           if((*i == '=') && equalsencode)
1803             /* only skip the first equals sign */
1804             equalsencode = FALSE;
1805           result = Curl_dyn_addn(&enc, i, 1);
1806           if(result)
1807             return CURLUE_OUT_OF_MEMORY;
1808         }
1809         else {
1810           result = Curl_dyn_addf(&enc, "%%%02x", *i);
1811           if(result)
1812             return CURLUE_OUT_OF_MEMORY;
1813         }
1814       }
1815       newp = Curl_dyn_ptr(&enc);
1816     }
1817     else {
1818       char *p;
1819       newp = strdup(part);
1820       if(!newp)
1821         return CURLUE_OUT_OF_MEMORY;
1822       p = (char *)newp;
1823       while(*p) {
1824         /* make sure percent encoded are lower case */
1825         if((*p == '%') && ISXDIGIT(p[1]) && ISXDIGIT(p[2]) &&
1826            (ISUPPER(p[1]) || ISUPPER(p[2]))) {
1827           p[1] = Curl_raw_tolower(p[1]);
1828           p[2] = Curl_raw_tolower(p[2]);
1829           p += 3;
1830         }
1831         else
1832           p++;
1833       }
1834     }
1835
1836     if(appendquery) {
1837       /* Append the 'newp' string onto the old query. Add a '&' separator if
1838          none is present at the end of the existing query already */
1839
1840       size_t querylen = u->query ? strlen(u->query) : 0;
1841       bool addamperand = querylen && (u->query[querylen -1] != '&');
1842       if(querylen) {
1843         struct dynbuf enc;
1844         Curl_dyn_init(&enc, CURL_MAX_INPUT_LENGTH);
1845
1846         if(Curl_dyn_addn(&enc, u->query, querylen)) /* add original query */
1847           goto nomem;
1848
1849         if(addamperand) {
1850           if(Curl_dyn_addn(&enc, "&", 1))
1851             goto nomem;
1852         }
1853         if(Curl_dyn_add(&enc, newp))
1854           goto nomem;
1855         free((char *)newp);
1856         free(*storep);
1857         *storep = Curl_dyn_ptr(&enc);
1858         return CURLUE_OK;
1859         nomem:
1860         free((char *)newp);
1861         return CURLUE_OUT_OF_MEMORY;
1862       }
1863     }
1864
1865     if(what == CURLUPART_HOST) {
1866       size_t n = strlen(newp);
1867       if(!n && (flags & CURLU_NO_AUTHORITY)) {
1868         /* Skip hostname check, it's allowed to be empty. */
1869       }
1870       else {
1871         if(hostname_check(u, (char *)newp, n)) {
1872           free((char *)newp);
1873           return CURLUE_BAD_HOSTNAME;
1874         }
1875       }
1876     }
1877
1878     free(*storep);
1879     *storep = (char *)newp;
1880   }
1881   /* set after the string, to make it not assigned if the allocation above
1882      fails */
1883   if(port)
1884     u->portnum = port;
1885   return CURLUE_OK;
1886 }