lib/file.c

   1 /***************************************************************************
   2  *                                  _   _ ____  _
   3  *  Project                     ___| | | |  _ \| |
   4  *                             / __| | | | |_) | |
   5  *                            | (__| |_| |  _ <| |___
   6  *                             \___|\___/|_| \_\_____|
   7  *
   8  * Copyright (C) 1998 - 2018, Daniel Stenberg, <daniel@haxx.se>, et al.
   9  *
  10  * This software is licensed as described in the file COPYING, which
  11  * you should have received as part of this distribution. The terms
  12  * are also available at https://curl.haxx.se/docs/copyright.html.
  13  *
  14  * You may opt to use, copy, modify, merge, publish, distribute and/or sell
  15  * copies of the Software, and permit persons to whom the Software is
  16  * furnished to do so, under the terms of the COPYING file.
  17  *
  18  * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
  19  * KIND, either express or implied.
  20  *
  21  ***************************************************************************/
  22
  23 #include "curl_setup.h"
  24
  25 #ifndef CURL_DISABLE_FILE
  26
  27 #ifdef HAVE_NETINET_IN_H
  28 #include <netinet/in.h>
  29 #endif
  30 #ifdef HAVE_NETDB_H
  31 #include <netdb.h>
  32 #endif
  33 #ifdef HAVE_ARPA_INET_H
  34 #include <arpa/inet.h>
  35 #endif
  36 #ifdef HAVE_NET_IF_H
  37 #include <net/if.h>
  38 #endif
  39 #ifdef HAVE_SYS_IOCTL_H
  40 #include <sys/ioctl.h>
  41 #endif
  42
  43 #ifdef HAVE_SYS_PARAM_H
  44 #include <sys/param.h>
  45 #endif
  46
  47 #ifdef HAVE_FCNTL_H
  48 #include <fcntl.h>
  49 #endif
  50
  51 #include "strtoofft.h"
  52 #include "urldata.h"
  53 #include <curl/curl.h>
  54 #include "progress.h"
  55 #include "sendf.h"
  56 #include "escape.h"
  57 #include "file.h"
  58 #include "speedcheck.h"
  59 #include "getinfo.h"
  60 #include "transfer.h"
  61 #include "url.h"
  62 #include "parsedate.h" /* for the week day and month names */
  63 #include "warnless.h"
  64 #include "curl_range.h"
  65 /* The last 3 #include files should be in this order */
  66 #include "curl_printf.h"
  67 #include "curl_memory.h"
  68 #include "memdebug.h"
  69
  70 #if defined(WIN32) || defined(MSDOS) || defined(__EMX__) || \
  71   defined(__SYMBIAN32__)
  72 #define DOS_FILESYSTEM 1
  73 #endif
  74
  75 #ifdef OPEN_NEEDS_ARG3
  76 #  define open_readonly(p,f) open((p),(f),(0))
  77 #else
  78 #  define open_readonly(p,f) open((p),(f))
  79 #endif
  80
  81 /*
  82  * Forward declarations.
  83  */
  84
  85 static CURLcode file_do(struct connectdata *, bool *done);
  86 static CURLcode file_done(struct connectdata *conn,
  87                           CURLcode status, bool premature);
  88 static CURLcode file_connect(struct connectdata *conn, bool *done);
  89 static CURLcode file_disconnect(struct connectdata *conn,
  90                                 bool dead_connection);
  91 static CURLcode file_setup_connection(struct connectdata *conn);
  92
  93 /*
  94  * FILE scheme handler.
  95  */
  96
  97 const struct Curl_handler Curl_handler_file = {
  98   "FILE",                               /* scheme */
  99   file_setup_connection,                /* setup_connection */
 100   file_do,                              /* do_it */
 101   file_done,                            /* done */
 102   ZERO_NULL,                            /* do_more */
 103   file_connect,                         /* connect_it */
 104   ZERO_NULL,                            /* connecting */
 105   ZERO_NULL,                            /* doing */
 106   ZERO_NULL,                            /* proto_getsock */
 107   ZERO_NULL,                            /* doing_getsock */
 108   ZERO_NULL,                            /* domore_getsock */
 109   ZERO_NULL,                            /* perform_getsock */
 110   file_disconnect,                      /* disconnect */
 111   ZERO_NULL,                            /* readwrite */
 112   ZERO_NULL,                            /* connection_check */
 113   0,                                    /* defport */
 114   CURLPROTO_FILE,                       /* protocol */
 115   PROTOPT_NONETWORK | PROTOPT_NOURLQUERY /* flags */
 116 };
 117
 118
 119 static CURLcode file_setup_connection(struct connectdata *conn)
 120 {
 121   /* allocate the FILE specific struct */
 122   conn->data->req.protop = calloc(1, sizeof(struct FILEPROTO));
 123   if(!conn->data->req.protop)
 124     return CURLE_OUT_OF_MEMORY;
 125
 126   return CURLE_OK;
 127 }
 128
 129 /*
 130  * file_connect() gets called from Curl_protocol_connect() to allow us to
 131  * do protocol-specific actions at connect-time.  We emulate a
 132  * connect-then-transfer protocol and "connect" to the file here
 133  */
 134 static CURLcode file_connect(struct connectdata *conn, bool *done)
 135 {
 136   struct Curl_easy *data = conn->data;
 137   char *real_path;
 138   struct FILEPROTO *file = data->req.protop;
 139   int fd;
 140 #ifdef DOS_FILESYSTEM
 141   size_t i;
 142   char *actual_path;
 143 #endif
 144   size_t real_path_len;
 145
 146   CURLcode result = Curl_urldecode(data, data->state.path, 0, &real_path,
 147                                    &real_path_len, FALSE);
 148   if(result)
 149     return result;
 150
 151 #ifdef DOS_FILESYSTEM
 152   /* If the first character is a slash, and there's
 153      something that looks like a drive at the beginning of
 154      the path, skip the slash.  If we remove the initial
 155      slash in all cases, paths without drive letters end up
 156      relative to the current directory which isn't how
 157      browsers work.
 158
 159      Some browsers accept | instead of : as the drive letter
 160      separator, so we do too.
 161
 162      On other platforms, we need the slash to indicate an
 163      absolute pathname.  On Windows, absolute paths start
 164      with a drive letter.
 165   */
 166   actual_path = real_path;
 167   if((actual_path[0] == '/') &&
 168       actual_path[1] &&
 169      (actual_path[2] == ':' || actual_path[2] == '|')) {
 170     actual_path[2] = ':';
 171     actual_path++;
 172     real_path_len--;
 173   }
 174
 175   /* change path separators from '/' to '\\' for DOS, Windows and OS/2 */
 176   for(i = 0; i < real_path_len; ++i)
 177     if(actual_path[i] == '/')
 178       actual_path[i] = '\\';
 179     else if(!actual_path[i]) { /* binary zero */
 180       Curl_safefree(real_path);
 181       return CURLE_URL_MALFORMAT;
 182     }
 183
 184   fd = open_readonly(actual_path, O_RDONLY|O_BINARY);
 185   file->path = actual_path;
 186 #else
 187   if(memchr(real_path, 0, real_path_len)) {
 188     /* binary zeroes indicate foul play */
 189     Curl_safefree(real_path);
 190     return CURLE_URL_MALFORMAT;
 191   }
 192
 193   fd = open_readonly(real_path, O_RDONLY);
 194   file->path = real_path;
 195 #endif
 196   file->freepath = real_path; /* free this when done */
 197
 198   file->fd = fd;
 199   if(!data->set.upload && (fd == -1)) {
 200     failf(data, "Couldn't open file %s", data->state.path);
 201     file_done(conn, CURLE_FILE_COULDNT_READ_FILE, FALSE);
 202     return CURLE_FILE_COULDNT_READ_FILE;
 203   }
 204   *done = TRUE;
 205
 206   return CURLE_OK;
 207 }
 208
 209 static CURLcode file_done(struct connectdata *conn,
 210                                CURLcode status, bool premature)
 211 {
 212   struct FILEPROTO *file = conn->data->req.protop;
 213   (void)status; /* not used */
 214   (void)premature; /* not used */
 215
 216   if(file) {
 217     Curl_safefree(file->freepath);
 218     file->path = NULL;
 219     if(file->fd != -1)
 220       close(file->fd);
 221     file->fd = -1;
 222   }
 223
 224   return CURLE_OK;
 225 }
 226
 227 static CURLcode file_disconnect(struct connectdata *conn,
 228                                 bool dead_connection)
 229 {
 230   struct FILEPROTO *file = conn->data->req.protop;
 231   (void)dead_connection; /* not used */
 232
 233   if(file) {
 234     Curl_safefree(file->freepath);
 235     file->path = NULL;
 236     if(file->fd != -1)
 237       close(file->fd);
 238     file->fd = -1;
 239   }
 240
 241   return CURLE_OK;
 242 }
 243
 244 #ifdef DOS_FILESYSTEM
 245 #define DIRSEP '\\'
 246 #else
 247 #define DIRSEP '/'
 248 #endif
 249
 250 static CURLcode file_upload(struct connectdata *conn)
 251 {
 252   struct FILEPROTO *file = conn->data->req.protop;
 253   const char *dir = strchr(file->path, DIRSEP);
 254   int fd;
 255   int mode;
 256   CURLcode result = CURLE_OK;
 257   struct Curl_easy *data = conn->data;
 258   char *buf = data->state.buffer;
 259   size_t nread;
 260   size_t nwrite;
 261   curl_off_t bytecount = 0;
 262   struct_stat file_stat;
 263   const char *buf2;
 264
 265   /*
 266    * Since FILE: doesn't do the full init, we need to provide some extra
 267    * assignments here.
 268    */
 269   conn->data->req.upload_fromhere = buf;
 270
 271   if(!dir)
 272     return CURLE_FILE_COULDNT_READ_FILE; /* fix: better error code */
 273
 274   if(!dir[1])
 275     return CURLE_FILE_COULDNT_READ_FILE; /* fix: better error code */
 276
 277 #ifdef O_BINARY
 278 #define MODE_DEFAULT O_WRONLY|O_CREAT|O_BINARY
 279 #else
 280 #define MODE_DEFAULT O_WRONLY|O_CREAT
 281 #endif
 282
 283   if(data->state.resume_from)
 284     mode = MODE_DEFAULT|O_APPEND;
 285   else
 286     mode = MODE_DEFAULT|O_TRUNC;
 287
 288   fd = open(file->path, mode, conn->data->set.new_file_perms);
 289   if(fd < 0) {
 290     failf(data, "Can't open %s for writing", file->path);
 291     return CURLE_WRITE_ERROR;
 292   }
 293
 294   if(-1 != data->state.infilesize)
 295     /* known size of data to "upload" */
 296     Curl_pgrsSetUploadSize(data, data->state.infilesize);
 297
 298   /* treat the negative resume offset value as the case of "-" */
 299   if(data->state.resume_from < 0) {
 300     if(fstat(fd, &file_stat)) {
 301       close(fd);
 302       failf(data, "Can't get the size of %s", file->path);
 303       return CURLE_WRITE_ERROR;
 304     }
 305     data->state.resume_from = (curl_off_t)file_stat.st_size;
 306   }
 307
 308   while(!result) {
 309     int readcount;
 310     result = Curl_fillreadbuffer(conn, (int)data->set.buffer_size, &readcount);
 311     if(result)
 312       break;
 313
 314     if(readcount <= 0)  /* fix questionable compare error. curlvms */
 315       break;
 316
 317     nread = (size_t)readcount;
 318
 319     /*skip bytes before resume point*/
 320     if(data->state.resume_from) {
 321       if((curl_off_t)nread <= data->state.resume_from) {
 322         data->state.resume_from -= nread;
 323         nread = 0;
 324         buf2 = buf;
 325       }
 326       else {
 327         buf2 = buf + data->state.resume_from;
 328         nread -= (size_t)data->state.resume_from;
 329         data->state.resume_from = 0;
 330       }
 331     }
 332     else
 333       buf2 = buf;
 334
 335     /* write the data to the target */
 336     nwrite = write(fd, buf2, nread);
 337     if(nwrite != nread) {
 338       result = CURLE_SEND_ERROR;
 339       break;
 340     }
 341
 342     bytecount += nread;
 343
 344     Curl_pgrsSetUploadCounter(data, bytecount);
 345
 346     if(Curl_pgrsUpdate(conn))
 347       result = CURLE_ABORTED_BY_CALLBACK;
 348     else
 349       result = Curl_speedcheck(data, Curl_now());
 350   }
 351   if(!result && Curl_pgrsUpdate(conn))
 352     result = CURLE_ABORTED_BY_CALLBACK;
 353
 354   close(fd);
 355
 356   return result;
 357 }
 358
 359 /*
 360  * file_do() is the protocol-specific function for the do-phase, separated
 361  * from the connect-phase above. Other protocols merely setup the transfer in
 362  * the do-phase, to have it done in the main transfer loop but since some
 363  * platforms we support don't allow select()ing etc on file handles (as
 364  * opposed to sockets) we instead perform the whole do-operation in this
 365  * function.
 366  */
 367 static CURLcode file_do(struct connectdata *conn, bool *done)
 368 {
 369   /* This implementation ignores the host name in conformance with
 370      RFC 1738. Only local files (reachable via the standard file system)
 371      are supported. This means that files on remotely mounted directories
 372      (via NFS, Samba, NT sharing) can be accessed through a file:// URL
 373   */
 374   CURLcode result = CURLE_OK;
 375   struct_stat statbuf; /* struct_stat instead of struct stat just to allow the
 376                           Windows version to have a different struct without
 377                           having to redefine the simple word 'stat' */
 378   curl_off_t expected_size = 0;
 379   bool size_known;
 380   bool fstated = FALSE;
 381   ssize_t nread;
 382   struct Curl_easy *data = conn->data;
 383   char *buf = data->state.buffer;
 384   curl_off_t bytecount = 0;
 385   int fd;
 386   struct FILEPROTO *file;
 387
 388   *done = TRUE; /* unconditionally */
 389
 390   Curl_initinfo(data);
 391   Curl_pgrsStartNow(data);
 392
 393   if(data->set.upload)
 394     return file_upload(conn);
 395
 396   file = conn->data->req.protop;
 397
 398   /* get the fd from the connection phase */
 399   fd = file->fd;
 400
 401   /* VMS: This only works reliable for STREAMLF files */
 402   if(-1 != fstat(fd, &statbuf)) {
 403     /* we could stat it, then read out the size */
 404     expected_size = statbuf.st_size;
 405     /* and store the modification time */
 406     data->info.filetime = statbuf.st_mtime;
 407     fstated = TRUE;
 408   }
 409
 410   if(fstated && !data->state.range && data->set.timecondition) {
 411     if(!Curl_meets_timecondition(data, data->info.filetime)) {
 412       *done = TRUE;
 413       return CURLE_OK;
 414     }
 415   }
 416
 417   /* If we have selected NOBODY and HEADER, it means that we only want file
 418      information. Which for FILE can't be much more than the file size and
 419      date. */
 420   if(data->set.opt_no_body && data->set.include_header && fstated) {
 421     time_t filetime;
 422     struct tm buffer;
 423     const struct tm *tm = &buffer;
 424     char header[80];
 425     snprintf(header, sizeof(header),
 426              "Content-Length: %" CURL_FORMAT_CURL_OFF_T "\r\n", expected_size);
 427     result = Curl_client_write(conn, CLIENTWRITE_BOTH, header, 0);
 428     if(result)
 429       return result;
 430
 431     result = Curl_client_write(conn, CLIENTWRITE_BOTH,
 432                                (char *)"Accept-ranges: bytes\r\n", 0);
 433     if(result)
 434       return result;
 435
 436     filetime = (time_t)statbuf.st_mtime;
 437     result = Curl_gmtime(filetime, &buffer);
 438     if(result)
 439       return result;
 440
 441     /* format: "Tue, 15 Nov 1994 12:45:26 GMT" */
 442     snprintf(header, sizeof(header),
 443              "Last-Modified: %s, %02d %s %4d %02d:%02d:%02d GMT\r\n",
 444              Curl_wkday[tm->tm_wday?tm->tm_wday-1:6],
 445              tm->tm_mday,
 446              Curl_month[tm->tm_mon],
 447              tm->tm_year + 1900,
 448              tm->tm_hour,
 449              tm->tm_min,
 450              tm->tm_sec);
 451     result = Curl_client_write(conn, CLIENTWRITE_BOTH, header, 0);
 452     if(!result)
 453       /* set the file size to make it available post transfer */
 454       Curl_pgrsSetDownloadSize(data, expected_size);
 455     return result;
 456   }
 457
 458   /* Check whether file range has been specified */
 459   result = Curl_range(conn);
 460   if(result)
 461     return result;
 462
 463   /* Adjust the start offset in case we want to get the N last bytes
 464    * of the stream iff the filesize could be determined */
 465   if(data->state.resume_from < 0) {
 466     if(!fstated) {
 467       failf(data, "Can't get the size of file.");
 468       return CURLE_READ_ERROR;
 469     }
 470     data->state.resume_from += (curl_off_t)statbuf.st_size;
 471   }
 472
 473   if(data->state.resume_from <= expected_size)
 474     expected_size -= data->state.resume_from;
 475   else {
 476     failf(data, "failed to resume file:// transfer");
 477     return CURLE_BAD_DOWNLOAD_RESUME;
 478   }
 479
 480   /* A high water mark has been specified so we obey... */
 481   if(data->req.maxdownload > 0)
 482     expected_size = data->req.maxdownload;
 483
 484   if(!fstated || (expected_size == 0))
 485     size_known = FALSE;
 486   else
 487     size_known = TRUE;
 488
 489   /* The following is a shortcut implementation of file reading
 490      this is both more efficient than the former call to download() and
 491      it avoids problems with select() and recv() on file descriptors
 492      in Winsock */
 493   if(fstated)
 494     Curl_pgrsSetDownloadSize(data, expected_size);
 495
 496   if(data->state.resume_from) {
 497     if(data->state.resume_from !=
 498        lseek(fd, data->state.resume_from, SEEK_SET))
 499       return CURLE_BAD_DOWNLOAD_RESUME;
 500   }
 501
 502   Curl_pgrsTime(data, TIMER_STARTTRANSFER);
 503
 504   while(!result) {
 505     /* Don't fill a whole buffer if we want less than all data */
 506     size_t bytestoread;
 507
 508     if(size_known) {
 509       bytestoread = (expected_size < data->set.buffer_size) ?
 510         curlx_sotouz(expected_size) : (size_t)data->set.buffer_size;
 511     }
 512     else
 513       bytestoread = data->set.buffer_size-1;
 514
 515     nread = read(fd, buf, bytestoread);
 516
 517     if(nread > 0)
 518       buf[nread] = 0;
 519
 520     if(nread <= 0 || (size_known && (expected_size == 0)))
 521       break;
 522
 523     bytecount += nread;
 524     if(size_known)
 525       expected_size -= nread;
 526
 527     result = Curl_client_write(conn, CLIENTWRITE_BODY, buf, nread);
 528     if(result)
 529       return result;
 530
 531     Curl_pgrsSetDownloadCounter(data, bytecount);
 532
 533     if(Curl_pgrsUpdate(conn))
 534       result = CURLE_ABORTED_BY_CALLBACK;
 535     else
 536       result = Curl_speedcheck(data, Curl_now());
 537   }
 538   if(Curl_pgrsUpdate(conn))
 539     result = CURLE_ABORTED_BY_CALLBACK;
 540
 541   return result;
 542 }
 543
 544 #endif