fs/cifs/file.c

   1 /*
   2  *   fs/cifs/file.c
   3  *
   4  *   vfs operations that deal with files
   5  *
   6  *   Copyright (C) International Business Machines  Corp., 2002,2010
   7  *   Author(s): Steve French (sfrench@us.ibm.com)
   8  *              Jeremy Allison (jra@samba.org)
   9  *
  10  *   This library is free software; you can redistribute it and/or modify
  11  *   it under the terms of the GNU Lesser General Public License as published
  12  *   by the Free Software Foundation; either version 2.1 of the License, or
  13  *   (at your option) any later version.
  14  *
  15  *   This library is distributed in the hope that it will be useful,
  16  *   but WITHOUT ANY WARRANTY; without even the implied warranty of
  17  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
  18  *   the GNU Lesser General Public License for more details.
  19  *
  20  *   You should have received a copy of the GNU Lesser General Public License
  21  *   along with this library; if not, write to the Free Software
  22  *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
  23  */
  24 #include <linux/fs.h>
  25 #include <linux/backing-dev.h>
  26 #include <linux/stat.h>
  27 #include <linux/fcntl.h>
  28 #include <linux/pagemap.h>
  29 #include <linux/pagevec.h>
  30 #include <linux/writeback.h>
  31 #include <linux/task_io_accounting_ops.h>
  32 #include <linux/delay.h>
  33 #include <linux/mount.h>
  34 #include <linux/slab.h>
  35 #include <linux/swap.h>
  36 #include <asm/div64.h>
  37 #include "cifsfs.h"
  38 #include "cifspdu.h"
  39 #include "cifsglob.h"
  40 #include "cifsproto.h"
  41 #include "cifs_unicode.h"
  42 #include "cifs_debug.h"
  43 #include "cifs_fs_sb.h"
  44 #include "fscache.h"
  45
  46 static inline int cifs_convert_flags(unsigned int flags)
  47 {
  48         if ((flags & O_ACCMODE) == O_RDONLY)
  49                 return GENERIC_READ;
  50         else if ((flags & O_ACCMODE) == O_WRONLY)
  51                 return GENERIC_WRITE;
  52         else if ((flags & O_ACCMODE) == O_RDWR) {
  53                 /* GENERIC_ALL is too much permission to request
  54                    can cause unnecessary access denied on create */
  55                 /* return GENERIC_ALL; */
  56                 return (GENERIC_READ | GENERIC_WRITE);
  57         }
  58
  59         return (READ_CONTROL | FILE_WRITE_ATTRIBUTES | FILE_READ_ATTRIBUTES |
  60                 FILE_WRITE_EA | FILE_APPEND_DATA | FILE_WRITE_DATA |
  61                 FILE_READ_DATA);
  62 }
  63
  64 static u32 cifs_posix_convert_flags(unsigned int flags)
  65 {
  66         u32 posix_flags = 0;
  67
  68         if ((flags & O_ACCMODE) == O_RDONLY)
  69                 posix_flags = SMB_O_RDONLY;
  70         else if ((flags & O_ACCMODE) == O_WRONLY)
  71                 posix_flags = SMB_O_WRONLY;
  72         else if ((flags & O_ACCMODE) == O_RDWR)
  73                 posix_flags = SMB_O_RDWR;
  74
  75         if (flags & O_CREAT)
  76                 posix_flags |= SMB_O_CREAT;
  77         if (flags & O_EXCL)
  78                 posix_flags |= SMB_O_EXCL;
  79         if (flags & O_TRUNC)
  80                 posix_flags |= SMB_O_TRUNC;
  81         /* be safe and imply O_SYNC for O_DSYNC */
  82         if (flags & O_DSYNC)
  83                 posix_flags |= SMB_O_SYNC;
  84         if (flags & O_DIRECTORY)
  85                 posix_flags |= SMB_O_DIRECTORY;
  86         if (flags & O_NOFOLLOW)
  87                 posix_flags |= SMB_O_NOFOLLOW;
  88         if (flags & O_DIRECT)
  89                 posix_flags |= SMB_O_DIRECT;
  90
  91         return posix_flags;
  92 }
  93
  94 static inline int cifs_get_disposition(unsigned int flags)
  95 {
  96         if ((flags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL))
  97                 return FILE_CREATE;
  98         else if ((flags & (O_CREAT | O_TRUNC)) == (O_CREAT | O_TRUNC))
  99                 return FILE_OVERWRITE_IF;
 100         else if ((flags & O_CREAT) == O_CREAT)
 101                 return FILE_OPEN_IF;
 102         else if ((flags & O_TRUNC) == O_TRUNC)
 103                 return FILE_OVERWRITE;
 104         else
 105                 return FILE_OPEN;
 106 }
 107
 108 int cifs_posix_open(char *full_path, struct inode **pinode,
 109                         struct super_block *sb, int mode, unsigned int f_flags,
 110                         __u32 *poplock, __u16 *pnetfid, unsigned int xid)
 111 {
 112         int rc;
 113         FILE_UNIX_BASIC_INFO *presp_data;
 114         __u32 posix_flags = 0;
 115         struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
 116         struct cifs_fattr fattr;
 117         struct tcon_link *tlink;
 118         struct cifs_tcon *tcon;
 119
 120         cFYI(1, "posix open %s", full_path);
 121
 122         presp_data = kzalloc(sizeof(FILE_UNIX_BASIC_INFO), GFP_KERNEL);
 123         if (presp_data == NULL)
 124                 return -ENOMEM;
 125
 126         tlink = cifs_sb_tlink(cifs_sb);
 127         if (IS_ERR(tlink)) {
 128                 rc = PTR_ERR(tlink);
 129                 goto posix_open_ret;
 130         }
 131
 132         tcon = tlink_tcon(tlink);
 133         mode &= ~current_umask();
 134
 135         posix_flags = cifs_posix_convert_flags(f_flags);
 136         rc = CIFSPOSIXCreate(xid, tcon, posix_flags, mode, pnetfid, presp_data,
 137                              poplock, full_path, cifs_sb->local_nls,
 138                              cifs_sb->mnt_cifs_flags &
 139                                         CIFS_MOUNT_MAP_SPECIAL_CHR);
 140         cifs_put_tlink(tlink);
 141
 142         if (rc)
 143                 goto posix_open_ret;
 144
 145         if (presp_data->Type == cpu_to_le32(-1))
 146                 goto posix_open_ret; /* open ok, caller does qpathinfo */
 147
 148         if (!pinode)
 149                 goto posix_open_ret; /* caller does not need info */
 150
 151         cifs_unix_basic_to_fattr(&fattr, presp_data, cifs_sb);
 152
 153         /* get new inode and set it up */
 154         if (*pinode == NULL) {
 155                 cifs_fill_uniqueid(sb, &fattr);
 156                 *pinode = cifs_iget(sb, &fattr);
 157                 if (!*pinode) {
 158                         rc = -ENOMEM;
 159                         goto posix_open_ret;
 160                 }
 161         } else {
 162                 cifs_fattr_to_inode(*pinode, &fattr);
 163         }
 164
 165 posix_open_ret:
 166         kfree(presp_data);
 167         return rc;
 168 }
 169
 170 static int
 171 cifs_nt_open(char *full_path, struct inode *inode, struct cifs_sb_info *cifs_sb,
 172              struct cifs_tcon *tcon, unsigned int f_flags, __u32 *oplock,
 173              struct cifs_fid *fid, unsigned int xid)
 174 {
 175         int rc;
 176         int desired_access;
 177         int disposition;
 178         int create_options = CREATE_NOT_DIR;
 179         FILE_ALL_INFO *buf;
 180
 181         if (!tcon->ses->server->ops->open)
 182                 return -ENOSYS;
 183
 184         desired_access = cifs_convert_flags(f_flags);
 185
 186 /*********************************************************************
 187  *  open flag mapping table:
 188  *
 189  *      POSIX Flag            CIFS Disposition
 190  *      ----------            ----------------
 191  *      O_CREAT               FILE_OPEN_IF
 192  *      O_CREAT | O_EXCL      FILE_CREATE
 193  *      O_CREAT | O_TRUNC     FILE_OVERWRITE_IF
 194  *      O_TRUNC               FILE_OVERWRITE
 195  *      none of the above     FILE_OPEN
 196  *
 197  *      Note that there is not a direct match between disposition
 198  *      FILE_SUPERSEDE (ie create whether or not file exists although
 199  *      O_CREAT | O_TRUNC is similar but truncates the existing
 200  *      file rather than creating a new file as FILE_SUPERSEDE does
 201  *      (which uses the attributes / metadata passed in on open call)
 202  *?
 203  *?  O_SYNC is a reasonable match to CIFS writethrough flag
 204  *?  and the read write flags match reasonably.  O_LARGEFILE
 205  *?  is irrelevant because largefile support is always used
 206  *?  by this client. Flags O_APPEND, O_DIRECT, O_DIRECTORY,
 207  *       O_FASYNC, O_NOFOLLOW, O_NONBLOCK need further investigation
 208  *********************************************************************/
 209
 210         disposition = cifs_get_disposition(f_flags);
 211
 212         /* BB pass O_SYNC flag through on file attributes .. BB */
 213
 214         buf = kmalloc(sizeof(FILE_ALL_INFO), GFP_KERNEL);
 215         if (!buf)
 216                 return -ENOMEM;
 217
 218         if (backup_cred(cifs_sb))
 219                 create_options |= CREATE_OPEN_BACKUP_INTENT;
 220
 221         rc = tcon->ses->server->ops->open(xid, tcon, full_path, disposition,
 222                                           desired_access, create_options, fid,
 223                                           oplock, buf, cifs_sb);
 224
 225         if (rc)
 226                 goto out;
 227
 228         if (tcon->unix_ext)
 229                 rc = cifs_get_inode_info_unix(&inode, full_path, inode->i_sb,
 230                                               xid);
 231         else
 232                 rc = cifs_get_inode_info(&inode, full_path, buf, inode->i_sb,
 233                                          xid, &fid->netfid);
 234
 235 out:
 236         kfree(buf);
 237         return rc;
 238 }
 239
 240 struct cifsFileInfo *
 241 cifs_new_fileinfo(struct cifs_fid *fid, struct file *file,
 242                   struct tcon_link *tlink, __u32 oplock)
 243 {
 244         struct dentry *dentry = file->f_path.dentry;
 245         struct inode *inode = dentry->d_inode;
 246         struct cifsInodeInfo *cinode = CIFS_I(inode);
 247         struct cifsFileInfo *cfile;
 248
 249         cfile = kzalloc(sizeof(struct cifsFileInfo), GFP_KERNEL);
 250         if (cfile == NULL)
 251                 return cfile;
 252
 253         cfile->count = 1;
 254         cfile->pid = current->tgid;
 255         cfile->uid = current_fsuid();
 256         cfile->dentry = dget(dentry);
 257         cfile->f_flags = file->f_flags;
 258         cfile->invalidHandle = false;
 259         cfile->tlink = cifs_get_tlink(tlink);
 260         mutex_init(&cfile->fh_mutex);
 261         INIT_WORK(&cfile->oplock_break, cifs_oplock_break);
 262         INIT_LIST_HEAD(&cfile->llist);
 263         tlink_tcon(tlink)->ses->server->ops->set_fid(cfile, fid, oplock);
 264
 265         spin_lock(&cifs_file_list_lock);
 266         list_add(&cfile->tlist, &(tlink_tcon(tlink)->openFileList));
 267         /* if readable file instance put first in list*/
 268         if (file->f_mode & FMODE_READ)
 269                 list_add(&cfile->flist, &cinode->openFileList);
 270         else
 271                 list_add_tail(&cfile->flist, &cinode->openFileList);
 272         spin_unlock(&cifs_file_list_lock);
 273
 274         file->private_data = cfile;
 275         return cfile;
 276 }
 277
 278 static void cifs_del_lock_waiters(struct cifsLockInfo *lock);
 279
 280 struct cifsFileInfo *
 281 cifsFileInfo_get(struct cifsFileInfo *cifs_file)
 282 {
 283         spin_lock(&cifs_file_list_lock);
 284         cifsFileInfo_get_locked(cifs_file);
 285         spin_unlock(&cifs_file_list_lock);
 286         return cifs_file;
 287 }
 288
 289 /*
 290  * Release a reference on the file private data. This may involve closing
 291  * the filehandle out on the server. Must be called without holding
 292  * cifs_file_list_lock.
 293  */
 294 void cifsFileInfo_put(struct cifsFileInfo *cifs_file)
 295 {
 296         struct inode *inode = cifs_file->dentry->d_inode;
 297         struct cifs_tcon *tcon = tlink_tcon(cifs_file->tlink);
 298         struct cifsInodeInfo *cifsi = CIFS_I(inode);
 299         struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
 300         struct cifsLockInfo *li, *tmp;
 301
 302         spin_lock(&cifs_file_list_lock);
 303         if (--cifs_file->count > 0) {
 304                 spin_unlock(&cifs_file_list_lock);
 305                 return;
 306         }
 307
 308         /* remove it from the lists */
 309         list_del(&cifs_file->flist);
 310         list_del(&cifs_file->tlist);
 311
 312         if (list_empty(&cifsi->openFileList)) {
 313                 cFYI(1, "closing last open instance for inode %p",
 314                         cifs_file->dentry->d_inode);
 315                 /*
 316                  * In strict cache mode we need invalidate mapping on the last
 317                  * close  because it may cause a error when we open this file
 318                  * again and get at least level II oplock.
 319                  */
 320                 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO)
 321                         CIFS_I(inode)->invalid_mapping = true;
 322                 cifs_set_oplock_level(cifsi, 0);
 323         }
 324         spin_unlock(&cifs_file_list_lock);
 325
 326         cancel_work_sync(&cifs_file->oplock_break);
 327
 328         if (!tcon->need_reconnect && !cifs_file->invalidHandle) {
 329                 struct TCP_Server_Info *server = tcon->ses->server;
 330                 unsigned int xid;
 331                 int rc = -ENOSYS;
 332
 333                 xid = get_xid();
 334                 if (server->ops->close)
 335                         rc = server->ops->close(xid, tcon, &cifs_file->fid);
 336                 free_xid(xid);
 337         }
 338
 339         /* Delete any outstanding lock records. We'll lose them when the file
 340          * is closed anyway.
 341          */
 342         mutex_lock(&cifsi->lock_mutex);
 343         list_for_each_entry_safe(li, tmp, &cifs_file->llist, llist) {
 344                 list_del(&li->llist);
 345                 cifs_del_lock_waiters(li);
 346                 kfree(li);
 347         }
 348         mutex_unlock(&cifsi->lock_mutex);
 349
 350         cifs_put_tlink(cifs_file->tlink);
 351         dput(cifs_file->dentry);
 352         kfree(cifs_file);
 353 }
 354
 355 int cifs_open(struct inode *inode, struct file *file)
 356 {
 357         int rc = -EACCES;
 358         unsigned int xid;
 359         __u32 oplock;
 360         struct cifs_sb_info *cifs_sb;
 361         struct cifs_tcon *tcon;
 362         struct tcon_link *tlink;
 363         struct cifsFileInfo *cfile = NULL;
 364         char *full_path = NULL;
 365         bool posix_open_ok = false;
 366         struct cifs_fid fid;
 367
 368         xid = get_xid();
 369
 370         cifs_sb = CIFS_SB(inode->i_sb);
 371         tlink = cifs_sb_tlink(cifs_sb);
 372         if (IS_ERR(tlink)) {
 373                 free_xid(xid);
 374                 return PTR_ERR(tlink);
 375         }
 376         tcon = tlink_tcon(tlink);
 377
 378         full_path = build_path_from_dentry(file->f_path.dentry);
 379         if (full_path == NULL) {
 380                 rc = -ENOMEM;
 381                 goto out;
 382         }
 383
 384         cFYI(1, "inode = 0x%p file flags are 0x%x for %s",
 385                  inode, file->f_flags, full_path);
 386
 387         if (tcon->ses->server->oplocks)
 388                 oplock = REQ_OPLOCK;
 389         else
 390                 oplock = 0;
 391
 392         if (!tcon->broken_posix_open && tcon->unix_ext &&
 393             cap_unix(tcon->ses) && (CIFS_UNIX_POSIX_PATH_OPS_CAP &
 394                                 le64_to_cpu(tcon->fsUnixInfo.Capability))) {
 395                 /* can not refresh inode info since size could be stale */
 396                 rc = cifs_posix_open(full_path, &inode, inode->i_sb,
 397                                 cifs_sb->mnt_file_mode /* ignored */,
 398                                 file->f_flags, &oplock, &fid.netfid, xid);
 399                 if (rc == 0) {
 400                         cFYI(1, "posix open succeeded");
 401                         posix_open_ok = true;
 402                 } else if ((rc == -EINVAL) || (rc == -EOPNOTSUPP)) {
 403                         if (tcon->ses->serverNOS)
 404                                 cERROR(1, "server %s of type %s returned"
 405                                            " unexpected error on SMB posix open"
 406                                            ", disabling posix open support."
 407                                            " Check if server update available.",
 408                                            tcon->ses->serverName,
 409                                            tcon->ses->serverNOS);
 410                         tcon->broken_posix_open = true;
 411                 } else if ((rc != -EIO) && (rc != -EREMOTE) &&
 412                          (rc != -EOPNOTSUPP)) /* path not found or net err */
 413                         goto out;
 414                 /*
 415                  * Else fallthrough to retry open the old way on network i/o
 416                  * or DFS errors.
 417                  */
 418         }
 419
 420         if (!posix_open_ok) {
 421                 rc = cifs_nt_open(full_path, inode, cifs_sb, tcon,
 422                                   file->f_flags, &oplock, &fid, xid);
 423                 if (rc)
 424                         goto out;
 425         }
 426
 427         cfile = cifs_new_fileinfo(&fid, file, tlink, oplock);
 428         if (cfile == NULL) {
 429                 if (tcon->ses->server->ops->close)
 430                         tcon->ses->server->ops->close(xid, tcon, &fid);
 431                 rc = -ENOMEM;
 432                 goto out;
 433         }
 434
 435         cifs_fscache_set_inode_cookie(inode, file);
 436
 437         if ((oplock & CIFS_CREATE_ACTION) && !posix_open_ok && tcon->unix_ext) {
 438                 /*
 439                  * Time to set mode which we can not set earlier due to
 440                  * problems creating new read-only files.
 441                  */
 442                 struct cifs_unix_set_info_args args = {
 443                         .mode   = inode->i_mode,
 444                         .uid    = NO_CHANGE_64,
 445                         .gid    = NO_CHANGE_64,
 446                         .ctime  = NO_CHANGE_64,
 447                         .atime  = NO_CHANGE_64,
 448                         .mtime  = NO_CHANGE_64,
 449                         .device = 0,
 450                 };
 451                 CIFSSMBUnixSetFileInfo(xid, tcon, &args, fid.netfid,
 452                                        cfile->pid);
 453         }
 454
 455 out:
 456         kfree(full_path);
 457         free_xid(xid);
 458         cifs_put_tlink(tlink);
 459         return rc;
 460 }
 461
 462 /*
 463  * Try to reacquire byte range locks that were released when session
 464  * to server was lost
 465  */
 466 static int cifs_relock_file(struct cifsFileInfo *cifsFile)
 467 {
 468         int rc = 0;
 469
 470         /* BB list all locks open on this file and relock */
 471
 472         return rc;
 473 }
 474
 475 static int
 476 cifs_reopen_file(struct cifsFileInfo *cfile, bool can_flush)
 477 {
 478         int rc = -EACCES;
 479         unsigned int xid;
 480         __u32 oplock;
 481         struct cifs_sb_info *cifs_sb;
 482         struct cifs_tcon *tcon;
 483         struct TCP_Server_Info *server;
 484         struct cifsInodeInfo *cinode;
 485         struct inode *inode;
 486         char *full_path = NULL;
 487         int desired_access;
 488         int disposition = FILE_OPEN;
 489         int create_options = CREATE_NOT_DIR;
 490         struct cifs_fid fid;
 491
 492         xid = get_xid();
 493         mutex_lock(&cfile->fh_mutex);
 494         if (!cfile->invalidHandle) {
 495                 mutex_unlock(&cfile->fh_mutex);
 496                 rc = 0;
 497                 free_xid(xid);
 498                 return rc;
 499         }
 500
 501         inode = cfile->dentry->d_inode;
 502         cifs_sb = CIFS_SB(inode->i_sb);
 503         tcon = tlink_tcon(cfile->tlink);
 504         server = tcon->ses->server;
 505
 506         /*
 507          * Can not grab rename sem here because various ops, including those
 508          * that already have the rename sem can end up causing writepage to get
 509          * called and if the server was down that means we end up here, and we
 510          * can never tell if the caller already has the rename_sem.
 511          */
 512         full_path = build_path_from_dentry(cfile->dentry);
 513         if (full_path == NULL) {
 514                 rc = -ENOMEM;
 515                 mutex_unlock(&cfile->fh_mutex);
 516                 free_xid(xid);
 517                 return rc;
 518         }
 519
 520         cFYI(1, "inode = 0x%p file flags 0x%x for %s", inode, cfile->f_flags,
 521              full_path);
 522
 523         if (tcon->ses->server->oplocks)
 524                 oplock = REQ_OPLOCK;
 525         else
 526                 oplock = 0;
 527
 528         if (tcon->unix_ext && cap_unix(tcon->ses) &&
 529             (CIFS_UNIX_POSIX_PATH_OPS_CAP &
 530                                 le64_to_cpu(tcon->fsUnixInfo.Capability))) {
 531                 /*
 532                  * O_CREAT, O_EXCL and O_TRUNC already had their effect on the
 533                  * original open. Must mask them off for a reopen.
 534                  */
 535                 unsigned int oflags = cfile->f_flags &
 536                                                 ~(O_CREAT | O_EXCL | O_TRUNC);
 537
 538                 rc = cifs_posix_open(full_path, NULL, inode->i_sb,
 539                                      cifs_sb->mnt_file_mode /* ignored */,
 540                                      oflags, &oplock, &fid.netfid, xid);
 541                 if (rc == 0) {
 542                         cFYI(1, "posix reopen succeeded");
 543                         goto reopen_success;
 544                 }
 545                 /*
 546                  * fallthrough to retry open the old way on errors, especially
 547                  * in the reconnect path it is important to retry hard
 548                  */
 549         }
 550
 551         desired_access = cifs_convert_flags(cfile->f_flags);
 552
 553         if (backup_cred(cifs_sb))
 554                 create_options |= CREATE_OPEN_BACKUP_INTENT;
 555
 556         /*
 557          * Can not refresh inode by passing in file_info buf to be returned by
 558          * CIFSSMBOpen and then calling get_inode_info with returned buf since
 559          * file might have write behind data that needs to be flushed and server
 560          * version of file size can be stale. If we knew for sure that inode was
 561          * not dirty locally we could do this.
 562          */
 563         rc = server->ops->open(xid, tcon, full_path, disposition,
 564                                desired_access, create_options, &fid, &oplock,
 565                                NULL, cifs_sb);
 566         if (rc) {
 567                 mutex_unlock(&cfile->fh_mutex);
 568                 cFYI(1, "cifs_reopen returned 0x%x", rc);
 569                 cFYI(1, "oplock: %d", oplock);
 570                 goto reopen_error_exit;
 571         }
 572
 573 reopen_success:
 574         cfile->invalidHandle = false;
 575         mutex_unlock(&cfile->fh_mutex);
 576         cinode = CIFS_I(inode);
 577
 578         if (can_flush) {
 579                 rc = filemap_write_and_wait(inode->i_mapping);
 580                 mapping_set_error(inode->i_mapping, rc);
 581
 582                 if (tcon->unix_ext)
 583                         rc = cifs_get_inode_info_unix(&inode, full_path,
 584                                                       inode->i_sb, xid);
 585                 else
 586                         rc = cifs_get_inode_info(&inode, full_path, NULL,
 587                                                  inode->i_sb, xid, NULL);
 588         }
 589         /*
 590          * Else we are writing out data to server already and could deadlock if
 591          * we tried to flush data, and since we do not know if we have data that
 592          * would invalidate the current end of file on the server we can not go
 593          * to the server to get the new inode info.
 594          */
 595
 596         server->ops->set_fid(cfile, &fid, oplock);
 597         cifs_relock_file(cfile);
 598
 599 reopen_error_exit:
 600         kfree(full_path);
 601         free_xid(xid);
 602         return rc;
 603 }
 604
 605 int cifs_close(struct inode *inode, struct file *file)
 606 {
 607         if (file->private_data != NULL) {
 608                 cifsFileInfo_put(file->private_data);
 609                 file->private_data = NULL;
 610         }
 611
 612         /* return code from the ->release op is always ignored */
 613         return 0;
 614 }
 615
 616 int cifs_closedir(struct inode *inode, struct file *file)
 617 {
 618         int rc = 0;
 619         unsigned int xid;
 620         struct cifsFileInfo *cfile = file->private_data;
 621         char *tmp;
 622
 623         cFYI(1, "Closedir inode = 0x%p", inode);
 624
 625         xid = get_xid();
 626
 627         if (cfile) {
 628                 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
 629
 630                 cFYI(1, "Freeing private data in close dir");
 631                 spin_lock(&cifs_file_list_lock);
 632                 if (!cfile->srch_inf.endOfSearch && !cfile->invalidHandle) {
 633                         cfile->invalidHandle = true;
 634                         spin_unlock(&cifs_file_list_lock);
 635                         rc = CIFSFindClose(xid, tcon, cfile->fid.netfid);
 636                         cFYI(1, "Closing uncompleted readdir with rc %d", rc);
 637                         /* not much we can do if it fails anyway, ignore rc */
 638                         rc = 0;
 639                 } else
 640                         spin_unlock(&cifs_file_list_lock);
 641                 tmp = cfile->srch_inf.ntwrk_buf_start;
 642                 if (tmp) {
 643                         cFYI(1, "closedir free smb buf in srch struct");
 644                         cfile->srch_inf.ntwrk_buf_start = NULL;
 645                         if (cfile->srch_inf.smallBuf)
 646                                 cifs_small_buf_release(tmp);
 647                         else
 648                                 cifs_buf_release(tmp);
 649                 }
 650                 cifs_put_tlink(cfile->tlink);
 651                 kfree(file->private_data);
 652                 file->private_data = NULL;
 653         }
 654         /* BB can we lock the filestruct while this is going on? */
 655         free_xid(xid);
 656         return rc;
 657 }
 658
 659 static struct cifsLockInfo *
 660 cifs_lock_init(__u64 offset, __u64 length, __u8 type)
 661 {
 662         struct cifsLockInfo *lock =
 663                 kmalloc(sizeof(struct cifsLockInfo), GFP_KERNEL);
 664         if (!lock)
 665                 return lock;
 666         lock->offset = offset;
 667         lock->length = length;
 668         lock->type = type;
 669         lock->pid = current->tgid;
 670         INIT_LIST_HEAD(&lock->blist);
 671         init_waitqueue_head(&lock->block_q);
 672         return lock;
 673 }
 674
 675 static void
 676 cifs_del_lock_waiters(struct cifsLockInfo *lock)
 677 {
 678         struct cifsLockInfo *li, *tmp;
 679         list_for_each_entry_safe(li, tmp, &lock->blist, blist) {
 680                 list_del_init(&li->blist);
 681                 wake_up(&li->block_q);
 682         }
 683 }
 684
 685 static bool
 686 cifs_find_fid_lock_conflict(struct cifsFileInfo *cfile, __u64 offset,
 687                             __u64 length, __u8 type, struct cifsFileInfo *cur,
 688                             struct cifsLockInfo **conf_lock)
 689 {
 690         struct cifsLockInfo *li;
 691         struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
 692
 693         list_for_each_entry(li, &cfile->llist, llist) {
 694                 if (offset + length <= li->offset ||
 695                     offset >= li->offset + li->length)
 696                         continue;
 697                 else if ((type & server->vals->shared_lock_type) &&
 698                          ((server->ops->compare_fids(cur, cfile) &&
 699                            current->tgid == li->pid) || type == li->type))
 700                         continue;
 701                 else {
 702                         *conf_lock = li;
 703                         return true;
 704                 }
 705         }
 706         return false;
 707 }
 708
 709 static bool
 710 cifs_find_lock_conflict(struct cifsFileInfo *cfile, __u64 offset, __u64 length,
 711                         __u8 type, struct cifsLockInfo **conf_lock)
 712 {
 713         bool rc = false;
 714         struct cifsFileInfo *fid, *tmp;
 715         struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode);
 716
 717         spin_lock(&cifs_file_list_lock);
 718         list_for_each_entry_safe(fid, tmp, &cinode->openFileList, flist) {
 719                 rc = cifs_find_fid_lock_conflict(fid, offset, length, type,
 720                                                  cfile, conf_lock);
 721                 if (rc)
 722                         break;
 723         }
 724         spin_unlock(&cifs_file_list_lock);
 725
 726         return rc;
 727 }
 728
 729 /*
 730  * Check if there is another lock that prevents us to set the lock (mandatory
 731  * style). If such a lock exists, update the flock structure with its
 732  * properties. Otherwise, set the flock type to F_UNLCK if we can cache brlocks
 733  * or leave it the same if we can't. Returns 0 if we don't need to request to
 734  * the server or 1 otherwise.
 735  */
 736 static int
 737 cifs_lock_test(struct cifsFileInfo *cfile, __u64 offset, __u64 length,
 738                __u8 type, struct file_lock *flock)
 739 {
 740         int rc = 0;
 741         struct cifsLockInfo *conf_lock;
 742         struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode);
 743         struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
 744         bool exist;
 745
 746         mutex_lock(&cinode->lock_mutex);
 747
 748         exist = cifs_find_lock_conflict(cfile, offset, length, type,
 749                                         &conf_lock);
 750         if (exist) {
 751                 flock->fl_start = conf_lock->offset;
 752                 flock->fl_end = conf_lock->offset + conf_lock->length - 1;
 753                 flock->fl_pid = conf_lock->pid;
 754                 if (conf_lock->type & server->vals->shared_lock_type)
 755                         flock->fl_type = F_RDLCK;
 756                 else
 757                         flock->fl_type = F_WRLCK;
 758         } else if (!cinode->can_cache_brlcks)
 759                 rc = 1;
 760         else
 761                 flock->fl_type = F_UNLCK;
 762
 763         mutex_unlock(&cinode->lock_mutex);
 764         return rc;
 765 }
 766
 767 static void
 768 cifs_lock_add(struct cifsFileInfo *cfile, struct cifsLockInfo *lock)
 769 {
 770         struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode);
 771         mutex_lock(&cinode->lock_mutex);
 772         list_add_tail(&lock->llist, &cfile->llist);
 773         mutex_unlock(&cinode->lock_mutex);
 774 }
 775
 776 /*
 777  * Set the byte-range lock (mandatory style). Returns:
 778  * 1) 0, if we set the lock and don't need to request to the server;
 779  * 2) 1, if no locks prevent us but we need to request to the server;
 780  * 3) -EACCESS, if there is a lock that prevents us and wait is false.
 781  */
 782 static int
 783 cifs_lock_add_if(struct cifsFileInfo *cfile, struct cifsLockInfo *lock,
 784                  bool wait)
 785 {
 786         struct cifsLockInfo *conf_lock;
 787         struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode);
 788         bool exist;
 789         int rc = 0;
 790
 791 try_again:
 792         exist = false;
 793         mutex_lock(&cinode->lock_mutex);
 794
 795         exist = cifs_find_lock_conflict(cfile, lock->offset, lock->length,
 796                                         lock->type, &conf_lock);
 797         if (!exist && cinode->can_cache_brlcks) {
 798                 list_add_tail(&lock->llist, &cfile->llist);
 799                 mutex_unlock(&cinode->lock_mutex);
 800                 return rc;
 801         }
 802
 803         if (!exist)
 804                 rc = 1;
 805         else if (!wait)
 806                 rc = -EACCES;
 807         else {
 808                 list_add_tail(&lock->blist, &conf_lock->blist);
 809                 mutex_unlock(&cinode->lock_mutex);
 810                 rc = wait_event_interruptible(lock->block_q,
 811                                         (lock->blist.prev == &lock->blist) &&
 812                                         (lock->blist.next == &lock->blist));
 813                 if (!rc)
 814                         goto try_again;
 815                 mutex_lock(&cinode->lock_mutex);
 816                 list_del_init(&lock->blist);
 817         }
 818
 819         mutex_unlock(&cinode->lock_mutex);
 820         return rc;
 821 }
 822
 823 /*
 824  * Check if there is another lock that prevents us to set the lock (posix
 825  * style). If such a lock exists, update the flock structure with its
 826  * properties. Otherwise, set the flock type to F_UNLCK if we can cache brlocks
 827  * or leave it the same if we can't. Returns 0 if we don't need to request to
 828  * the server or 1 otherwise.
 829  */
 830 static int
 831 cifs_posix_lock_test(struct file *file, struct file_lock *flock)
 832 {
 833         int rc = 0;
 834         struct cifsInodeInfo *cinode = CIFS_I(file->f_path.dentry->d_inode);
 835         unsigned char saved_type = flock->fl_type;
 836
 837         if ((flock->fl_flags & FL_POSIX) == 0)
 838                 return 1;
 839
 840         mutex_lock(&cinode->lock_mutex);
 841         posix_test_lock(file, flock);
 842
 843         if (flock->fl_type == F_UNLCK && !cinode->can_cache_brlcks) {
 844                 flock->fl_type = saved_type;
 845                 rc = 1;
 846         }
 847
 848         mutex_unlock(&cinode->lock_mutex);
 849         return rc;
 850 }
 851
 852 /*
 853  * Set the byte-range lock (posix style). Returns:
 854  * 1) 0, if we set the lock and don't need to request to the server;
 855  * 2) 1, if we need to request to the server;
 856  * 3) <0, if the error occurs while setting the lock.
 857  */
 858 static int
 859 cifs_posix_lock_set(struct file *file, struct file_lock *flock)
 860 {
 861         struct cifsInodeInfo *cinode = CIFS_I(file->f_path.dentry->d_inode);
 862         int rc = 1;
 863
 864         if ((flock->fl_flags & FL_POSIX) == 0)
 865                 return rc;
 866
 867 try_again:
 868         mutex_lock(&cinode->lock_mutex);
 869         if (!cinode->can_cache_brlcks) {
 870                 mutex_unlock(&cinode->lock_mutex);
 871                 return rc;
 872         }
 873
 874         rc = posix_lock_file(file, flock, NULL);
 875         mutex_unlock(&cinode->lock_mutex);
 876         if (rc == FILE_LOCK_DEFERRED) {
 877                 rc = wait_event_interruptible(flock->fl_wait, !flock->fl_next);
 878                 if (!rc)
 879                         goto try_again;
 880                 locks_delete_block(flock);
 881         }
 882         return rc;
 883 }
 884
 885 static int
 886 cifs_push_mandatory_locks(struct cifsFileInfo *cfile)
 887 {
 888         unsigned int xid;
 889         int rc = 0, stored_rc;
 890         struct cifsLockInfo *li, *tmp;
 891         struct cifs_tcon *tcon;
 892         struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode);
 893         unsigned int num, max_num, max_buf;
 894         LOCKING_ANDX_RANGE *buf, *cur;
 895         int types[] = {LOCKING_ANDX_LARGE_FILES,
 896                        LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES};
 897         int i;
 898
 899         xid = get_xid();
 900         tcon = tlink_tcon(cfile->tlink);
 901
 902         mutex_lock(&cinode->lock_mutex);
 903         if (!cinode->can_cache_brlcks) {
 904                 mutex_unlock(&cinode->lock_mutex);
 905                 free_xid(xid);
 906                 return rc;
 907         }
 908
 909         /*
 910          * Accessing maxBuf is racy with cifs_reconnect - need to store value
 911          * and check it for zero before using.
 912          */
 913         max_buf = tcon->ses->server->maxBuf;
 914         if (!max_buf) {
 915                 mutex_unlock(&cinode->lock_mutex);
 916                 free_xid(xid);
 917                 return -EINVAL;
 918         }
 919
 920         max_num = (max_buf - sizeof(struct smb_hdr)) /
 921                                                 sizeof(LOCKING_ANDX_RANGE);
 922         buf = kzalloc(max_num * sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
 923         if (!buf) {
 924                 mutex_unlock(&cinode->lock_mutex);
 925                 free_xid(xid);
 926                 return -ENOMEM;
 927         }
 928
 929         for (i = 0; i < 2; i++) {
 930                 cur = buf;
 931                 num = 0;
 932                 list_for_each_entry_safe(li, tmp, &cfile->llist, llist) {
 933                         if (li->type != types[i])
 934                                 continue;
 935                         cur->Pid = cpu_to_le16(li->pid);
 936                         cur->LengthLow = cpu_to_le32((u32)li->length);
 937                         cur->LengthHigh = cpu_to_le32((u32)(li->length>>32));
 938                         cur->OffsetLow = cpu_to_le32((u32)li->offset);
 939                         cur->OffsetHigh = cpu_to_le32((u32)(li->offset>>32));
 940                         if (++num == max_num) {
 941                                 stored_rc = cifs_lockv(xid, tcon,
 942                                                        cfile->fid.netfid,
 943                                                        (__u8)li->type, 0, num,
 944                                                        buf);
 945                                 if (stored_rc)
 946                                         rc = stored_rc;
 947                                 cur = buf;
 948                                 num = 0;
 949                         } else
 950                                 cur++;
 951                 }
 952
 953                 if (num) {
 954                         stored_rc = cifs_lockv(xid, tcon, cfile->fid.netfid,
 955                                                (__u8)types[i], 0, num, buf);
 956                         if (stored_rc)
 957                                 rc = stored_rc;
 958                 }
 959         }
 960
 961         cinode->can_cache_brlcks = false;
 962         mutex_unlock(&cinode->lock_mutex);
 963
 964         kfree(buf);
 965         free_xid(xid);
 966         return rc;
 967 }
 968
 969 /* copied from fs/locks.c with a name change */
 970 #define cifs_for_each_lock(inode, lockp) \
 971         for (lockp = &inode->i_flock; *lockp != NULL; \
 972              lockp = &(*lockp)->fl_next)
 973
 974 struct lock_to_push {
 975         struct list_head llist;
 976         __u64 offset;
 977         __u64 length;
 978         __u32 pid;
 979         __u16 netfid;
 980         __u8 type;
 981 };
 982
 983 static int
 984 cifs_push_posix_locks(struct cifsFileInfo *cfile)
 985 {
 986         struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode);
 987         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
 988         struct file_lock *flock, **before;
 989         unsigned int count = 0, i = 0;
 990         int rc = 0, xid, type;
 991         struct list_head locks_to_send, *el;
 992         struct lock_to_push *lck, *tmp;
 993         __u64 length;
 994
 995         xid = get_xid();
 996
 997         mutex_lock(&cinode->lock_mutex);
 998         if (!cinode->can_cache_brlcks) {
 999                 mutex_unlock(&cinode->lock_mutex);
1000                 free_xid(xid);
1001                 return rc;
1002         }
1003
1004         lock_flocks();
1005         cifs_for_each_lock(cfile->dentry->d_inode, before) {
1006                 if ((*before)->fl_flags & FL_POSIX)
1007                         count++;
1008         }
1009         unlock_flocks();
1010
1011         INIT_LIST_HEAD(&locks_to_send);
1012
1013         /*
1014          * Allocating count locks is enough because no FL_POSIX locks can be
1015          * added to the list while we are holding cinode->lock_mutex that
1016          * protects locking operations of this inode.
1017          */
1018         for (; i < count; i++) {
1019                 lck = kmalloc(sizeof(struct lock_to_push), GFP_KERNEL);
1020                 if (!lck) {
1021                         rc = -ENOMEM;
1022                         goto err_out;
1023                 }
1024                 list_add_tail(&lck->llist, &locks_to_send);
1025         }
1026
1027         el = locks_to_send.next;
1028         lock_flocks();
1029         cifs_for_each_lock(cfile->dentry->d_inode, before) {
1030                 flock = *before;
1031                 if ((flock->fl_flags & FL_POSIX) == 0)
1032                         continue;
1033                 if (el == &locks_to_send) {
1034                         /*
1035                          * The list ended. We don't have enough allocated
1036                          * structures - something is really wrong.
1037                          */
1038                         cERROR(1, "Can't push all brlocks!");
1039                         break;
1040                 }
1041                 length = 1 + flock->fl_end - flock->fl_start;
1042                 if (flock->fl_type == F_RDLCK || flock->fl_type == F_SHLCK)
1043                         type = CIFS_RDLCK;
1044                 else
1045                         type = CIFS_WRLCK;
1046                 lck = list_entry(el, struct lock_to_push, llist);
1047                 lck->pid = flock->fl_pid;
1048                 lck->netfid = cfile->fid.netfid;
1049                 lck->length = length;
1050                 lck->type = type;
1051                 lck->offset = flock->fl_start;
1052                 el = el->next;
1053         }
1054         unlock_flocks();
1055
1056         list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) {
1057                 int stored_rc;
1058
1059                 stored_rc = CIFSSMBPosixLock(xid, tcon, lck->netfid, lck->pid,
1060                                              lck->offset, lck->length, NULL,
1061                                              lck->type, 0);
1062                 if (stored_rc)
1063                         rc = stored_rc;
1064                 list_del(&lck->llist);
1065                 kfree(lck);
1066         }
1067
1068 out:
1069         cinode->can_cache_brlcks = false;
1070         mutex_unlock(&cinode->lock_mutex);
1071
1072         free_xid(xid);
1073         return rc;
1074 err_out:
1075         list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) {
1076                 list_del(&lck->llist);
1077                 kfree(lck);
1078         }
1079         goto out;
1080 }
1081
1082 static int
1083 cifs_push_locks(struct cifsFileInfo *cfile)
1084 {
1085         struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
1086         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1087
1088         if (cap_unix(tcon->ses) &&
1089             (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
1090             ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
1091                 return cifs_push_posix_locks(cfile);
1092
1093         return cifs_push_mandatory_locks(cfile);
1094 }
1095
1096 static void
1097 cifs_read_flock(struct file_lock *flock, __u32 *type, int *lock, int *unlock,
1098                 bool *wait_flag, struct TCP_Server_Info *server)
1099 {
1100         if (flock->fl_flags & FL_POSIX)
1101                 cFYI(1, "Posix");
1102         if (flock->fl_flags & FL_FLOCK)
1103                 cFYI(1, "Flock");
1104         if (flock->fl_flags & FL_SLEEP) {
1105                 cFYI(1, "Blocking lock");
1106                 *wait_flag = true;
1107         }
1108         if (flock->fl_flags & FL_ACCESS)
1109                 cFYI(1, "Process suspended by mandatory locking - "
1110                         "not implemented yet");
1111         if (flock->fl_flags & FL_LEASE)
1112                 cFYI(1, "Lease on file - not implemented yet");
1113         if (flock->fl_flags &
1114             (~(FL_POSIX | FL_FLOCK | FL_SLEEP | FL_ACCESS | FL_LEASE)))
1115                 cFYI(1, "Unknown lock flags 0x%x", flock->fl_flags);
1116
1117         *type = server->vals->large_lock_type;
1118         if (flock->fl_type == F_WRLCK) {
1119                 cFYI(1, "F_WRLCK ");
1120                 *type |= server->vals->exclusive_lock_type;
1121                 *lock = 1;
1122         } else if (flock->fl_type == F_UNLCK) {
1123                 cFYI(1, "F_UNLCK");
1124                 *type |= server->vals->unlock_lock_type;
1125                 *unlock = 1;
1126                 /* Check if unlock includes more than one lock range */
1127         } else if (flock->fl_type == F_RDLCK) {
1128                 cFYI(1, "F_RDLCK");
1129                 *type |= server->vals->shared_lock_type;
1130                 *lock = 1;
1131         } else if (flock->fl_type == F_EXLCK) {
1132                 cFYI(1, "F_EXLCK");
1133                 *type |= server->vals->exclusive_lock_type;
1134                 *lock = 1;
1135         } else if (flock->fl_type == F_SHLCK) {
1136                 cFYI(1, "F_SHLCK");
1137                 *type |= server->vals->shared_lock_type;
1138                 *lock = 1;
1139         } else
1140                 cFYI(1, "Unknown type of lock");
1141 }
1142
1143 static int
1144 cifs_mandatory_lock(unsigned int xid, struct cifsFileInfo *cfile, __u64 offset,
1145                     __u64 length, __u32 type, int lock, int unlock, bool wait)
1146 {
1147         return CIFSSMBLock(xid, tlink_tcon(cfile->tlink), cfile->fid.netfid,
1148                            current->tgid, length, offset, unlock, lock,
1149                            (__u8)type, wait, 0);
1150 }
1151
1152 static int
1153 cifs_getlk(struct file *file, struct file_lock *flock, __u32 type,
1154            bool wait_flag, bool posix_lck, unsigned int xid)
1155 {
1156         int rc = 0;
1157         __u64 length = 1 + flock->fl_end - flock->fl_start;
1158         struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
1159         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1160         struct TCP_Server_Info *server = tcon->ses->server;
1161         __u16 netfid = cfile->fid.netfid;
1162
1163         if (posix_lck) {
1164                 int posix_lock_type;
1165
1166                 rc = cifs_posix_lock_test(file, flock);
1167                 if (!rc)
1168                         return rc;
1169
1170                 if (type & server->vals->shared_lock_type)
1171                         posix_lock_type = CIFS_RDLCK;
1172                 else
1173                         posix_lock_type = CIFS_WRLCK;
1174                 rc = CIFSSMBPosixLock(xid, tcon, netfid, current->tgid,
1175                                       flock->fl_start, length, flock,
1176                                       posix_lock_type, wait_flag);
1177                 return rc;
1178         }
1179
1180         rc = cifs_lock_test(cfile, flock->fl_start, length, type, flock);
1181         if (!rc)
1182                 return rc;
1183
1184         /* BB we could chain these into one lock request BB */
1185         rc = cifs_mandatory_lock(xid, cfile, flock->fl_start, length, type,
1186                                  1, 0, false);
1187         if (rc == 0) {
1188                 rc = cifs_mandatory_lock(xid, cfile, flock->fl_start, length,
1189                                          type, 0, 1, false);
1190                 flock->fl_type = F_UNLCK;
1191                 if (rc != 0)
1192                         cERROR(1, "Error unlocking previously locked "
1193                                   "range %d during test of lock", rc);
1194                 return 0;
1195         }
1196
1197         if (type & server->vals->shared_lock_type) {
1198                 flock->fl_type = F_WRLCK;
1199                 return 0;
1200         }
1201
1202         rc = cifs_mandatory_lock(xid, cfile, flock->fl_start, length,
1203                                  type | server->vals->shared_lock_type, 1, 0,
1204                                  false);
1205         if (rc == 0) {
1206                 rc = cifs_mandatory_lock(xid, cfile, flock->fl_start, length,
1207                                          type | server->vals->shared_lock_type,
1208                                          0, 1, false);
1209                 flock->fl_type = F_RDLCK;
1210                 if (rc != 0)
1211                         cERROR(1, "Error unlocking previously locked "
1212                                   "range %d during test of lock", rc);
1213         } else
1214                 flock->fl_type = F_WRLCK;
1215
1216         return 0;
1217 }
1218
1219 static void
1220 cifs_move_llist(struct list_head *source, struct list_head *dest)
1221 {
1222         struct list_head *li, *tmp;
1223         list_for_each_safe(li, tmp, source)
1224                 list_move(li, dest);
1225 }
1226
1227 static void
1228 cifs_free_llist(struct list_head *llist)
1229 {
1230         struct cifsLockInfo *li, *tmp;
1231         list_for_each_entry_safe(li, tmp, llist, llist) {
1232                 cifs_del_lock_waiters(li);
1233                 list_del(&li->llist);
1234                 kfree(li);
1235         }
1236 }
1237
1238 static int
1239 cifs_unlock_range(struct cifsFileInfo *cfile, struct file_lock *flock,
1240                   unsigned int xid)
1241 {
1242         int rc = 0, stored_rc;
1243         int types[] = {LOCKING_ANDX_LARGE_FILES,
1244                        LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES};
1245         unsigned int i;
1246         unsigned int max_num, num, max_buf;
1247         LOCKING_ANDX_RANGE *buf, *cur;
1248         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1249         struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode);
1250         struct cifsLockInfo *li, *tmp;
1251         __u64 length = 1 + flock->fl_end - flock->fl_start;
1252         struct list_head tmp_llist;
1253
1254         INIT_LIST_HEAD(&tmp_llist);
1255
1256         /*
1257          * Accessing maxBuf is racy with cifs_reconnect - need to store value
1258          * and check it for zero before using.
1259          */
1260         max_buf = tcon->ses->server->maxBuf;
1261         if (!max_buf)
1262                 return -EINVAL;
1263
1264         max_num = (max_buf - sizeof(struct smb_hdr)) /
1265                                                 sizeof(LOCKING_ANDX_RANGE);
1266         buf = kzalloc(max_num * sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
1267         if (!buf)
1268                 return -ENOMEM;
1269
1270         mutex_lock(&cinode->lock_mutex);
1271         for (i = 0; i < 2; i++) {
1272                 cur = buf;
1273                 num = 0;
1274                 list_for_each_entry_safe(li, tmp, &cfile->llist, llist) {
1275                         if (flock->fl_start > li->offset ||
1276                             (flock->fl_start + length) <
1277                             (li->offset + li->length))
1278                                 continue;
1279                         if (current->tgid != li->pid)
1280                                 continue;
1281                         if (types[i] != li->type)
1282                                 continue;
1283                         if (cinode->can_cache_brlcks) {
1284                                 /*
1285                                  * We can cache brlock requests - simply remove
1286                                  * a lock from the file's list.
1287                                  */
1288                                 list_del(&li->llist);
1289                                 cifs_del_lock_waiters(li);
1290                                 kfree(li);
1291                                 continue;
1292                         }
1293                         cur->Pid = cpu_to_le16(li->pid);
1294                         cur->LengthLow = cpu_to_le32((u32)li->length);
1295                         cur->LengthHigh = cpu_to_le32((u32)(li->length>>32));
1296                         cur->OffsetLow = cpu_to_le32((u32)li->offset);
1297                         cur->OffsetHigh = cpu_to_le32((u32)(li->offset>>32));
1298                         /*
1299                          * We need to save a lock here to let us add it again to
1300                          * the file's list if the unlock range request fails on
1301                          * the server.
1302                          */
1303                         list_move(&li->llist, &tmp_llist);
1304                         if (++num == max_num) {
1305                                 stored_rc = cifs_lockv(xid, tcon,
1306                                                        cfile->fid.netfid,
1307                                                        li->type, num, 0, buf);
1308                                 if (stored_rc) {
1309                                         /*
1310                                          * We failed on the unlock range
1311                                          * request - add all locks from the tmp
1312                                          * list to the head of the file's list.
1313                                          */
1314                                         cifs_move_llist(&tmp_llist,
1315                                                         &cfile->llist);
1316                                         rc = stored_rc;
1317                                 } else
1318                                         /*
1319                                          * The unlock range request succeed -
1320                                          * free the tmp list.
1321                                          */
1322                                         cifs_free_llist(&tmp_llist);
1323                                 cur = buf;
1324                                 num = 0;
1325                         } else
1326                                 cur++;
1327                 }
1328                 if (num) {
1329                         stored_rc = cifs_lockv(xid, tcon, cfile->fid.netfid,
1330                                                types[i], num, 0, buf);
1331                         if (stored_rc) {
1332                                 cifs_move_llist(&tmp_llist, &cfile->llist);
1333                                 rc = stored_rc;
1334                         } else
1335                                 cifs_free_llist(&tmp_llist);
1336                 }
1337         }
1338
1339         mutex_unlock(&cinode->lock_mutex);
1340         kfree(buf);
1341         return rc;
1342 }
1343
1344 static int
1345 cifs_setlk(struct file *file,  struct file_lock *flock, __u32 type,
1346            bool wait_flag, bool posix_lck, int lock, int unlock,
1347            unsigned int xid)
1348 {
1349         int rc = 0;
1350         __u64 length = 1 + flock->fl_end - flock->fl_start;
1351         struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
1352         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1353         struct TCP_Server_Info *server = tcon->ses->server;
1354         __u16 netfid = cfile->fid.netfid;
1355
1356         if (posix_lck) {
1357                 int posix_lock_type;
1358
1359                 rc = cifs_posix_lock_set(file, flock);
1360                 if (!rc || rc < 0)
1361                         return rc;
1362
1363                 if (type & server->vals->shared_lock_type)
1364                         posix_lock_type = CIFS_RDLCK;
1365                 else
1366                         posix_lock_type = CIFS_WRLCK;
1367
1368                 if (unlock == 1)
1369                         posix_lock_type = CIFS_UNLCK;
1370
1371                 rc = CIFSSMBPosixLock(xid, tcon, netfid, current->tgid,
1372                                       flock->fl_start, length, NULL,
1373                                       posix_lock_type, wait_flag);
1374                 goto out;
1375         }
1376
1377         if (lock) {
1378                 struct cifsLockInfo *lock;
1379
1380                 lock = cifs_lock_init(flock->fl_start, length, type);
1381                 if (!lock)
1382                         return -ENOMEM;
1383
1384                 rc = cifs_lock_add_if(cfile, lock, wait_flag);
1385                 if (rc < 0)
1386                         kfree(lock);
1387                 if (rc <= 0)
1388                         goto out;
1389
1390                 rc = cifs_mandatory_lock(xid, cfile, flock->fl_start, length,
1391                                          type, 1, 0, wait_flag);
1392                 if (rc) {
1393                         kfree(lock);
1394                         goto out;
1395                 }
1396
1397                 cifs_lock_add(cfile, lock);
1398         } else if (unlock)
1399                 rc = cifs_unlock_range(cfile, flock, xid);
1400
1401 out:
1402         if (flock->fl_flags & FL_POSIX)
1403                 posix_lock_file_wait(file, flock);
1404         return rc;
1405 }
1406
1407 int cifs_lock(struct file *file, int cmd, struct file_lock *flock)
1408 {
1409         int rc, xid;
1410         int lock = 0, unlock = 0;
1411         bool wait_flag = false;
1412         bool posix_lck = false;
1413         struct cifs_sb_info *cifs_sb;
1414         struct cifs_tcon *tcon;
1415         struct cifsInodeInfo *cinode;
1416         struct cifsFileInfo *cfile;
1417         __u16 netfid;
1418         __u32 type;
1419
1420         rc = -EACCES;
1421         xid = get_xid();
1422
1423         cFYI(1, "Lock parm: 0x%x flockflags: 0x%x flocktype: 0x%x start: %lld "
1424                 "end: %lld", cmd, flock->fl_flags, flock->fl_type,
1425                 flock->fl_start, flock->fl_end);
1426
1427         cfile = (struct cifsFileInfo *)file->private_data;
1428         tcon = tlink_tcon(cfile->tlink);
1429
1430         cifs_read_flock(flock, &type, &lock, &unlock, &wait_flag,
1431                         tcon->ses->server);
1432
1433         cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
1434         netfid = cfile->fid.netfid;
1435         cinode = CIFS_I(file->f_path.dentry->d_inode);
1436
1437         if (cap_unix(tcon->ses) &&
1438             (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
1439             ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
1440                 posix_lck = true;
1441         /*
1442          * BB add code here to normalize offset and length to account for
1443          * negative length which we can not accept over the wire.
1444          */
1445         if (IS_GETLK(cmd)) {
1446                 rc = cifs_getlk(file, flock, type, wait_flag, posix_lck, xid);
1447                 free_xid(xid);
1448                 return rc;
1449         }
1450
1451         if (!lock && !unlock) {
1452                 /*
1453                  * if no lock or unlock then nothing to do since we do not
1454                  * know what it is
1455                  */
1456                 free_xid(xid);
1457                 return -EOPNOTSUPP;
1458         }
1459
1460         rc = cifs_setlk(file, flock, type, wait_flag, posix_lck, lock, unlock,
1461                         xid);
1462         free_xid(xid);
1463         return rc;
1464 }
1465
1466 /*
1467  * update the file size (if needed) after a write. Should be called with
1468  * the inode->i_lock held
1469  */
1470 void
1471 cifs_update_eof(struct cifsInodeInfo *cifsi, loff_t offset,
1472                       unsigned int bytes_written)
1473 {
1474         loff_t end_of_write = offset + bytes_written;
1475
1476         if (end_of_write > cifsi->server_eof)
1477                 cifsi->server_eof = end_of_write;
1478 }
1479
1480 static ssize_t cifs_write(struct cifsFileInfo *open_file, __u32 pid,
1481                           const char *write_data, size_t write_size,
1482                           loff_t *poffset)
1483 {
1484         int rc = 0;
1485         unsigned int bytes_written = 0;
1486         unsigned int total_written;
1487         struct cifs_sb_info *cifs_sb;
1488         struct cifs_tcon *pTcon;
1489         unsigned int xid;
1490         struct dentry *dentry = open_file->dentry;
1491         struct cifsInodeInfo *cifsi = CIFS_I(dentry->d_inode);
1492         struct cifs_io_parms io_parms;
1493
1494         cifs_sb = CIFS_SB(dentry->d_sb);
1495
1496         cFYI(1, "write %zd bytes to offset %lld of %s", write_size,
1497            *poffset, dentry->d_name.name);
1498
1499         pTcon = tlink_tcon(open_file->tlink);
1500
1501         xid = get_xid();
1502
1503         for (total_written = 0; write_size > total_written;
1504              total_written += bytes_written) {
1505                 rc = -EAGAIN;
1506                 while (rc == -EAGAIN) {
1507                         struct kvec iov[2];
1508                         unsigned int len;
1509
1510                         if (open_file->invalidHandle) {
1511                                 /* we could deadlock if we called
1512                                    filemap_fdatawait from here so tell
1513                                    reopen_file not to flush data to
1514                                    server now */
1515                                 rc = cifs_reopen_file(open_file, false);
1516                                 if (rc != 0)
1517                                         break;
1518                         }
1519
1520                         len = min((size_t)cifs_sb->wsize,
1521                                   write_size - total_written);
1522                         /* iov[0] is reserved for smb header */
1523                         iov[1].iov_base = (char *)write_data + total_written;
1524                         iov[1].iov_len = len;
1525                         io_parms.netfid = open_file->fid.netfid;
1526                         io_parms.pid = pid;
1527                         io_parms.tcon = pTcon;
1528                         io_parms.offset = *poffset;
1529                         io_parms.length = len;
1530                         rc = CIFSSMBWrite2(xid, &io_parms, &bytes_written, iov,
1531                                            1, 0);
1532                 }
1533                 if (rc || (bytes_written == 0)) {
1534                         if (total_written)
1535                                 break;
1536                         else {
1537                                 free_xid(xid);
1538                                 return rc;
1539                         }
1540                 } else {
1541                         spin_lock(&dentry->d_inode->i_lock);
1542                         cifs_update_eof(cifsi, *poffset, bytes_written);
1543                         spin_unlock(&dentry->d_inode->i_lock);
1544                         *poffset += bytes_written;
1545                 }
1546         }
1547
1548         cifs_stats_bytes_written(pTcon, total_written);
1549
1550         if (total_written > 0) {
1551                 spin_lock(&dentry->d_inode->i_lock);
1552                 if (*poffset > dentry->d_inode->i_size)
1553                         i_size_write(dentry->d_inode, *poffset);
1554                 spin_unlock(&dentry->d_inode->i_lock);
1555         }
1556         mark_inode_dirty_sync(dentry->d_inode);
1557         free_xid(xid);
1558         return total_written;
1559 }
1560
1561 struct cifsFileInfo *find_readable_file(struct cifsInodeInfo *cifs_inode,
1562                                         bool fsuid_only)
1563 {
1564         struct cifsFileInfo *open_file = NULL;
1565         struct cifs_sb_info *cifs_sb = CIFS_SB(cifs_inode->vfs_inode.i_sb);
1566
1567         /* only filter by fsuid on multiuser mounts */
1568         if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
1569                 fsuid_only = false;
1570
1571         spin_lock(&cifs_file_list_lock);
1572         /* we could simply get the first_list_entry since write-only entries
1573            are always at the end of the list but since the first entry might
1574            have a close pending, we go through the whole list */
1575         list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
1576                 if (fsuid_only && open_file->uid != current_fsuid())
1577                         continue;
1578                 if (OPEN_FMODE(open_file->f_flags) & FMODE_READ) {
1579                         if (!open_file->invalidHandle) {
1580                                 /* found a good file */
1581                                 /* lock it so it will not be closed on us */
1582                                 cifsFileInfo_get_locked(open_file);
1583                                 spin_unlock(&cifs_file_list_lock);
1584                                 return open_file;
1585                         } /* else might as well continue, and look for
1586                              another, or simply have the caller reopen it
1587                              again rather than trying to fix this handle */
1588                 } else /* write only file */
1589                         break; /* write only files are last so must be done */
1590         }
1591         spin_unlock(&cifs_file_list_lock);
1592         return NULL;
1593 }
1594
1595 struct cifsFileInfo *find_writable_file(struct cifsInodeInfo *cifs_inode,
1596                                         bool fsuid_only)
1597 {
1598         struct cifsFileInfo *open_file, *inv_file = NULL;
1599         struct cifs_sb_info *cifs_sb;
1600         bool any_available = false;
1601         int rc;
1602         unsigned int refind = 0;
1603
1604         /* Having a null inode here (because mapping->host was set to zero by
1605         the VFS or MM) should not happen but we had reports of on oops (due to
1606         it being zero) during stress testcases so we need to check for it */
1607
1608         if (cifs_inode == NULL) {
1609                 cERROR(1, "Null inode passed to cifs_writeable_file");
1610                 dump_stack();
1611                 return NULL;
1612         }
1613
1614         cifs_sb = CIFS_SB(cifs_inode->vfs_inode.i_sb);
1615
1616         /* only filter by fsuid on multiuser mounts */
1617         if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
1618                 fsuid_only = false;
1619
1620         spin_lock(&cifs_file_list_lock);
1621 refind_writable:
1622         if (refind > MAX_REOPEN_ATT) {
1623                 spin_unlock(&cifs_file_list_lock);
1624                 return NULL;
1625         }
1626         list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
1627                 if (!any_available && open_file->pid != current->tgid)
1628                         continue;
1629                 if (fsuid_only && open_file->uid != current_fsuid())
1630                         continue;
1631                 if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) {
1632                         if (!open_file->invalidHandle) {
1633                                 /* found a good writable file */
1634                                 cifsFileInfo_get_locked(open_file);
1635                                 spin_unlock(&cifs_file_list_lock);
1636                                 return open_file;
1637                         } else {
1638                                 if (!inv_file)
1639                                         inv_file = open_file;
1640                         }
1641                 }
1642         }
1643         /* couldn't find useable FH with same pid, try any available */
1644         if (!any_available) {
1645                 any_available = true;
1646                 goto refind_writable;
1647         }
1648
1649         if (inv_file) {
1650                 any_available = false;
1651                 cifsFileInfo_get_locked(inv_file);
1652         }
1653
1654         spin_unlock(&cifs_file_list_lock);
1655
1656         if (inv_file) {
1657                 rc = cifs_reopen_file(inv_file, false);
1658                 if (!rc)
1659                         return inv_file;
1660                 else {
1661                         spin_lock(&cifs_file_list_lock);
1662                         list_move_tail(&inv_file->flist,
1663                                         &cifs_inode->openFileList);
1664                         spin_unlock(&cifs_file_list_lock);
1665                         cifsFileInfo_put(inv_file);
1666                         spin_lock(&cifs_file_list_lock);
1667                         ++refind;
1668                         goto refind_writable;
1669                 }
1670         }
1671
1672         return NULL;
1673 }
1674
1675 static int cifs_partialpagewrite(struct page *page, unsigned from, unsigned to)
1676 {
1677         struct address_space *mapping = page->mapping;
1678         loff_t offset = (loff_t)page->index << PAGE_CACHE_SHIFT;
1679         char *write_data;
1680         int rc = -EFAULT;
1681         int bytes_written = 0;
1682         struct inode *inode;
1683         struct cifsFileInfo *open_file;
1684
1685         if (!mapping || !mapping->host)
1686                 return -EFAULT;
1687
1688         inode = page->mapping->host;
1689
1690         offset += (loff_t)from;
1691         write_data = kmap(page);
1692         write_data += from;
1693
1694         if ((to > PAGE_CACHE_SIZE) || (from > to)) {
1695                 kunmap(page);
1696                 return -EIO;
1697         }
1698
1699         /* racing with truncate? */
1700         if (offset > mapping->host->i_size) {
1701                 kunmap(page);
1702                 return 0; /* don't care */
1703         }
1704
1705         /* check to make sure that we are not extending the file */
1706         if (mapping->host->i_size - offset < (loff_t)to)
1707                 to = (unsigned)(mapping->host->i_size - offset);
1708
1709         open_file = find_writable_file(CIFS_I(mapping->host), false);
1710         if (open_file) {
1711                 bytes_written = cifs_write(open_file, open_file->pid,
1712                                            write_data, to - from, &offset);
1713                 cifsFileInfo_put(open_file);
1714                 /* Does mm or vfs already set times? */
1715                 inode->i_atime = inode->i_mtime = current_fs_time(inode->i_sb);
1716                 if ((bytes_written > 0) && (offset))
1717                         rc = 0;
1718                 else if (bytes_written < 0)
1719                         rc = bytes_written;
1720         } else {
1721                 cFYI(1, "No writeable filehandles for inode");
1722                 rc = -EIO;
1723         }
1724
1725         kunmap(page);
1726         return rc;
1727 }
1728
1729 /*
1730  * Marshal up the iov array, reserving the first one for the header. Also,
1731  * set wdata->bytes.
1732  */
1733 static void
1734 cifs_writepages_marshal_iov(struct kvec *iov, struct cifs_writedata *wdata)
1735 {
1736         int i;
1737         struct inode *inode = wdata->cfile->dentry->d_inode;
1738         loff_t size = i_size_read(inode);
1739
1740         /* marshal up the pages into iov array */
1741         wdata->bytes = 0;
1742         for (i = 0; i < wdata->nr_pages; i++) {
1743                 iov[i + 1].iov_len = min(size - page_offset(wdata->pages[i]),
1744                                         (loff_t)PAGE_CACHE_SIZE);
1745                 iov[i + 1].iov_base = kmap(wdata->pages[i]);
1746                 wdata->bytes += iov[i + 1].iov_len;
1747         }
1748 }
1749
1750 static int cifs_writepages(struct address_space *mapping,
1751                            struct writeback_control *wbc)
1752 {
1753         struct cifs_sb_info *cifs_sb = CIFS_SB(mapping->host->i_sb);
1754         bool done = false, scanned = false, range_whole = false;
1755         pgoff_t end, index;
1756         struct cifs_writedata *wdata;
1757         struct page *page;
1758         int rc = 0;
1759
1760         /*
1761          * If wsize is smaller than the page cache size, default to writing
1762          * one page at a time via cifs_writepage
1763          */
1764         if (cifs_sb->wsize < PAGE_CACHE_SIZE)
1765                 return generic_writepages(mapping, wbc);
1766
1767         if (wbc->range_cyclic) {
1768                 index = mapping->writeback_index; /* Start from prev offset */
1769                 end = -1;
1770         } else {
1771                 index = wbc->range_start >> PAGE_CACHE_SHIFT;
1772                 end = wbc->range_end >> PAGE_CACHE_SHIFT;
1773                 if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
1774                         range_whole = true;
1775                 scanned = true;
1776         }
1777 retry:
1778         while (!done && index <= end) {
1779                 unsigned int i, nr_pages, found_pages;
1780                 pgoff_t next = 0, tofind;
1781                 struct page **pages;
1782
1783                 tofind = min((cifs_sb->wsize / PAGE_CACHE_SIZE) - 1,
1784                                 end - index) + 1;
1785
1786                 wdata = cifs_writedata_alloc((unsigned int)tofind,
1787                                              cifs_writev_complete);
1788                 if (!wdata) {
1789                         rc = -ENOMEM;
1790                         break;
1791                 }
1792
1793                 /*
1794                  * find_get_pages_tag seems to return a max of 256 on each
1795                  * iteration, so we must call it several times in order to
1796                  * fill the array or the wsize is effectively limited to
1797                  * 256 * PAGE_CACHE_SIZE.
1798                  */
1799                 found_pages = 0;
1800                 pages = wdata->pages;
1801                 do {
1802                         nr_pages = find_get_pages_tag(mapping, &index,
1803                                                         PAGECACHE_TAG_DIRTY,
1804                                                         tofind, pages);
1805                         found_pages += nr_pages;
1806                         tofind -= nr_pages;
1807                         pages += nr_pages;
1808                 } while (nr_pages && tofind && index <= end);
1809
1810                 if (found_pages == 0) {
1811                         kref_put(&wdata->refcount, cifs_writedata_release);
1812                         break;
1813                 }
1814
1815                 nr_pages = 0;
1816                 for (i = 0; i < found_pages; i++) {
1817                         page = wdata->pages[i];
1818                         /*
1819                          * At this point we hold neither mapping->tree_lock nor
1820                          * lock on the page itself: the page may be truncated or
1821                          * invalidated (changing page->mapping to NULL), or even
1822                          * swizzled back from swapper_space to tmpfs file
1823                          * mapping
1824                          */
1825
1826                         if (nr_pages == 0)
1827                                 lock_page(page);
1828                         else if (!trylock_page(page))
1829                                 break;
1830
1831                         if (unlikely(page->mapping != mapping)) {
1832                                 unlock_page(page);
1833                                 break;
1834                         }
1835
1836                         if (!wbc->range_cyclic && page->index > end) {
1837                                 done = true;
1838                                 unlock_page(page);
1839                                 break;
1840                         }
1841
1842                         if (next && (page->index != next)) {
1843                                 /* Not next consecutive page */
1844                                 unlock_page(page);
1845                                 break;
1846                         }
1847
1848                         if (wbc->sync_mode != WB_SYNC_NONE)
1849                                 wait_on_page_writeback(page);
1850
1851                         if (PageWriteback(page) ||
1852                                         !clear_page_dirty_for_io(page)) {
1853                                 unlock_page(page);
1854                                 break;
1855                         }
1856
1857                         /*
1858                          * This actually clears the dirty bit in the radix tree.
1859                          * See cifs_writepage() for more commentary.
1860                          */
1861                         set_page_writeback(page);
1862
1863                         if (page_offset(page) >= mapping->host->i_size) {
1864                                 done = true;
1865                                 unlock_page(page);
1866                                 end_page_writeback(page);
1867                                 break;
1868                         }
1869
1870                         wdata->pages[i] = page;
1871                         next = page->index + 1;
1872                         ++nr_pages;
1873                 }
1874
1875                 /* reset index to refind any pages skipped */
1876                 if (nr_pages == 0)
1877                         index = wdata->pages[0]->index + 1;
1878
1879                 /* put any pages we aren't going to use */
1880                 for (i = nr_pages; i < found_pages; i++) {
1881                         page_cache_release(wdata->pages[i]);
1882                         wdata->pages[i] = NULL;
1883                 }
1884
1885                 /* nothing to write? */
1886                 if (nr_pages == 0) {
1887                         kref_put(&wdata->refcount, cifs_writedata_release);
1888                         continue;
1889                 }
1890
1891                 wdata->sync_mode = wbc->sync_mode;
1892                 wdata->nr_pages = nr_pages;
1893                 wdata->offset = page_offset(wdata->pages[0]);
1894                 wdata->marshal_iov = cifs_writepages_marshal_iov;
1895
1896                 do {
1897                         if (wdata->cfile != NULL)
1898                                 cifsFileInfo_put(wdata->cfile);
1899                         wdata->cfile = find_writable_file(CIFS_I(mapping->host),
1900                                                           false);
1901                         if (!wdata->cfile) {
1902                                 cERROR(1, "No writable handles for inode");
1903                                 rc = -EBADF;
1904                                 break;
1905                         }
1906                         wdata->pid = wdata->cfile->pid;
1907                         rc = cifs_async_writev(wdata);
1908                 } while (wbc->sync_mode == WB_SYNC_ALL && rc == -EAGAIN);
1909
1910                 for (i = 0; i < nr_pages; ++i)
1911                         unlock_page(wdata->pages[i]);
1912
1913                 /* send failure -- clean up the mess */
1914                 if (rc != 0) {
1915                         for (i = 0; i < nr_pages; ++i) {
1916                                 if (rc == -EAGAIN)
1917                                         redirty_page_for_writepage(wbc,
1918                                                            wdata->pages[i]);
1919                                 else
1920                                         SetPageError(wdata->pages[i]);
1921                                 end_page_writeback(wdata->pages[i]);
1922                                 page_cache_release(wdata->pages[i]);
1923                         }
1924                         if (rc != -EAGAIN)
1925                                 mapping_set_error(mapping, rc);
1926                 }
1927                 kref_put(&wdata->refcount, cifs_writedata_release);
1928
1929                 wbc->nr_to_write -= nr_pages;
1930                 if (wbc->nr_to_write <= 0)
1931                         done = true;
1932
1933                 index = next;
1934         }
1935
1936         if (!scanned && !done) {
1937                 /*
1938                  * We hit the last page and there is more work to be done: wrap
1939                  * back to the start of the file
1940                  */
1941                 scanned = true;
1942                 index = 0;
1943                 goto retry;
1944         }
1945
1946         if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0))
1947                 mapping->writeback_index = index;
1948
1949         return rc;
1950 }
1951
1952 static int
1953 cifs_writepage_locked(struct page *page, struct writeback_control *wbc)
1954 {
1955         int rc;
1956         unsigned int xid;
1957
1958         xid = get_xid();
1959 /* BB add check for wbc flags */
1960         page_cache_get(page);
1961         if (!PageUptodate(page))
1962                 cFYI(1, "ppw - page not up to date");
1963
1964         /*
1965          * Set the "writeback" flag, and clear "dirty" in the radix tree.
1966          *
1967          * A writepage() implementation always needs to do either this,
1968          * or re-dirty the page with "redirty_page_for_writepage()" in
1969          * the case of a failure.
1970          *
1971          * Just unlocking the page will cause the radix tree tag-bits
1972          * to fail to update with the state of the page correctly.
1973          */
1974         set_page_writeback(page);
1975 retry_write:
1976         rc = cifs_partialpagewrite(page, 0, PAGE_CACHE_SIZE);
1977         if (rc == -EAGAIN && wbc->sync_mode == WB_SYNC_ALL)
1978                 goto retry_write;
1979         else if (rc == -EAGAIN)
1980                 redirty_page_for_writepage(wbc, page);
1981         else if (rc != 0)
1982                 SetPageError(page);
1983         else
1984                 SetPageUptodate(page);
1985         end_page_writeback(page);
1986         page_cache_release(page);
1987         free_xid(xid);
1988         return rc;
1989 }
1990
1991 static int cifs_writepage(struct page *page, struct writeback_control *wbc)
1992 {
1993         int rc = cifs_writepage_locked(page, wbc);
1994         unlock_page(page);
1995         return rc;
1996 }
1997
1998 static int cifs_write_end(struct file *file, struct address_space *mapping,
1999                         loff_t pos, unsigned len, unsigned copied,
2000                         struct page *page, void *fsdata)
2001 {
2002         int rc;
2003         struct inode *inode = mapping->host;
2004         struct cifsFileInfo *cfile = file->private_data;
2005         struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
2006         __u32 pid;
2007
2008         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
2009                 pid = cfile->pid;
2010         else
2011                 pid = current->tgid;
2012
2013         cFYI(1, "write_end for page %p from pos %lld with %d bytes",
2014                  page, pos, copied);
2015
2016         if (PageChecked(page)) {
2017                 if (copied == len)
2018                         SetPageUptodate(page);
2019                 ClearPageChecked(page);
2020         } else if (!PageUptodate(page) && copied == PAGE_CACHE_SIZE)
2021                 SetPageUptodate(page);
2022
2023         if (!PageUptodate(page)) {
2024                 char *page_data;
2025                 unsigned offset = pos & (PAGE_CACHE_SIZE - 1);
2026                 unsigned int xid;
2027
2028                 xid = get_xid();
2029                 /* this is probably better than directly calling
2030                    partialpage_write since in this function the file handle is
2031                    known which we might as well leverage */
2032                 /* BB check if anything else missing out of ppw
2033                    such as updating last write time */
2034                 page_data = kmap(page);
2035                 rc = cifs_write(cfile, pid, page_data + offset, copied, &pos);
2036                 /* if (rc < 0) should we set writebehind rc? */
2037                 kunmap(page);
2038
2039                 free_xid(xid);
2040         } else {
2041                 rc = copied;
2042                 pos += copied;
2043                 set_page_dirty(page);
2044         }
2045
2046         if (rc > 0) {
2047                 spin_lock(&inode->i_lock);
2048                 if (pos > inode->i_size)
2049                         i_size_write(inode, pos);
2050                 spin_unlock(&inode->i_lock);
2051         }
2052
2053         unlock_page(page);
2054         page_cache_release(page);
2055
2056         return rc;
2057 }
2058
2059 int cifs_strict_fsync(struct file *file, loff_t start, loff_t end,
2060                       int datasync)
2061 {
2062         unsigned int xid;
2063         int rc = 0;
2064         struct cifs_tcon *tcon;
2065         struct TCP_Server_Info *server;
2066         struct cifsFileInfo *smbfile = file->private_data;
2067         struct inode *inode = file->f_path.dentry->d_inode;
2068         struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
2069
2070         rc = filemap_write_and_wait_range(inode->i_mapping, start, end);
2071         if (rc)
2072                 return rc;
2073         mutex_lock(&inode->i_mutex);
2074
2075         xid = get_xid();
2076
2077         cFYI(1, "Sync file - name: %s datasync: 0x%x",
2078                 file->f_path.dentry->d_name.name, datasync);
2079
2080         if (!CIFS_I(inode)->clientCanCacheRead) {
2081                 rc = cifs_invalidate_mapping(inode);
2082                 if (rc) {
2083                         cFYI(1, "rc: %d during invalidate phase", rc);
2084                         rc = 0; /* don't care about it in fsync */
2085                 }
2086         }
2087
2088         tcon = tlink_tcon(smbfile->tlink);
2089         if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) {
2090                 server = tcon->ses->server;
2091                 if (server->ops->flush)
2092                         rc = server->ops->flush(xid, tcon, &smbfile->fid);
2093                 else
2094                         rc = -ENOSYS;
2095         }
2096
2097         free_xid(xid);
2098         mutex_unlock(&inode->i_mutex);
2099         return rc;
2100 }
2101
2102 int cifs_fsync(struct file *file, loff_t start, loff_t end, int datasync)
2103 {
2104         unsigned int xid;
2105         int rc = 0;
2106         struct cifs_tcon *tcon;
2107         struct TCP_Server_Info *server;
2108         struct cifsFileInfo *smbfile = file->private_data;
2109         struct cifs_sb_info *cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
2110         struct inode *inode = file->f_mapping->host;
2111
2112         rc = filemap_write_and_wait_range(inode->i_mapping, start, end);
2113         if (rc)
2114                 return rc;
2115         mutex_lock(&inode->i_mutex);
2116
2117         xid = get_xid();
2118
2119         cFYI(1, "Sync file - name: %s datasync: 0x%x",
2120                 file->f_path.dentry->d_name.name, datasync);
2121
2122         tcon = tlink_tcon(smbfile->tlink);
2123         if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) {
2124                 server = tcon->ses->server;
2125                 if (server->ops->flush)
2126                         rc = server->ops->flush(xid, tcon, &smbfile->fid);
2127                 else
2128                         rc = -ENOSYS;
2129         }
2130
2131         free_xid(xid);
2132         mutex_unlock(&inode->i_mutex);
2133         return rc;
2134 }
2135
2136 /*
2137  * As file closes, flush all cached write data for this inode checking
2138  * for write behind errors.
2139  */
2140 int cifs_flush(struct file *file, fl_owner_t id)
2141 {
2142         struct inode *inode = file->f_path.dentry->d_inode;
2143         int rc = 0;
2144
2145         if (file->f_mode & FMODE_WRITE)
2146                 rc = filemap_write_and_wait(inode->i_mapping);
2147
2148         cFYI(1, "Flush inode %p file %p rc %d", inode, file, rc);
2149
2150         return rc;
2151 }
2152
2153 static int
2154 cifs_write_allocate_pages(struct page **pages, unsigned long num_pages)
2155 {
2156         int rc = 0;
2157         unsigned long i;
2158
2159         for (i = 0; i < num_pages; i++) {
2160                 pages[i] = alloc_page(GFP_KERNEL|__GFP_HIGHMEM);
2161                 if (!pages[i]) {
2162                         /*
2163                          * save number of pages we have already allocated and
2164                          * return with ENOMEM error
2165                          */
2166                         num_pages = i;
2167                         rc = -ENOMEM;
2168                         break;
2169                 }
2170         }
2171
2172         if (rc) {
2173                 for (i = 0; i < num_pages; i++)
2174                         put_page(pages[i]);
2175         }
2176         return rc;
2177 }
2178
2179 static inline
2180 size_t get_numpages(const size_t wsize, const size_t len, size_t *cur_len)
2181 {
2182         size_t num_pages;
2183         size_t clen;
2184
2185         clen = min_t(const size_t, len, wsize);
2186         num_pages = DIV_ROUND_UP(clen, PAGE_SIZE);
2187
2188         if (cur_len)
2189                 *cur_len = clen;
2190
2191         return num_pages;
2192 }
2193
2194 static void
2195 cifs_uncached_marshal_iov(struct kvec *iov, struct cifs_writedata *wdata)
2196 {
2197         int i;
2198         size_t bytes = wdata->bytes;
2199
2200         /* marshal up the pages into iov array */
2201         for (i = 0; i < wdata->nr_pages; i++) {
2202                 iov[i + 1].iov_len = min_t(size_t, bytes, PAGE_SIZE);
2203                 iov[i + 1].iov_base = kmap(wdata->pages[i]);
2204                 bytes -= iov[i + 1].iov_len;
2205         }
2206 }
2207
2208 static void
2209 cifs_uncached_writev_complete(struct work_struct *work)
2210 {
2211         int i;
2212         struct cifs_writedata *wdata = container_of(work,
2213                                         struct cifs_writedata, work);
2214         struct inode *inode = wdata->cfile->dentry->d_inode;
2215         struct cifsInodeInfo *cifsi = CIFS_I(inode);
2216
2217         spin_lock(&inode->i_lock);
2218         cifs_update_eof(cifsi, wdata->offset, wdata->bytes);
2219         if (cifsi->server_eof > inode->i_size)
2220                 i_size_write(inode, cifsi->server_eof);
2221         spin_unlock(&inode->i_lock);
2222
2223         complete(&wdata->done);
2224
2225         if (wdata->result != -EAGAIN) {
2226                 for (i = 0; i < wdata->nr_pages; i++)
2227                         put_page(wdata->pages[i]);
2228         }
2229
2230         kref_put(&wdata->refcount, cifs_writedata_release);
2231 }
2232
2233 /* attempt to send write to server, retry on any -EAGAIN errors */
2234 static int
2235 cifs_uncached_retry_writev(struct cifs_writedata *wdata)
2236 {
2237         int rc;
2238
2239         do {
2240                 if (wdata->cfile->invalidHandle) {
2241                         rc = cifs_reopen_file(wdata->cfile, false);
2242                         if (rc != 0)
2243                                 continue;
2244                 }
2245                 rc = cifs_async_writev(wdata);
2246         } while (rc == -EAGAIN);
2247
2248         return rc;
2249 }
2250
2251 static ssize_t
2252 cifs_iovec_write(struct file *file, const struct iovec *iov,
2253                  unsigned long nr_segs, loff_t *poffset)
2254 {
2255         unsigned long nr_pages, i;
2256         size_t copied, len, cur_len;
2257         ssize_t total_written = 0;
2258         loff_t offset;
2259         struct iov_iter it;
2260         struct cifsFileInfo *open_file;
2261         struct cifs_tcon *tcon;
2262         struct cifs_sb_info *cifs_sb;
2263         struct cifs_writedata *wdata, *tmp;
2264         struct list_head wdata_list;
2265         int rc;
2266         pid_t pid;
2267
2268         len = iov_length(iov, nr_segs);
2269         if (!len)
2270                 return 0;
2271
2272         rc = generic_write_checks(file, poffset, &len, 0);
2273         if (rc)
2274                 return rc;
2275
2276         INIT_LIST_HEAD(&wdata_list);
2277         cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
2278         open_file = file->private_data;
2279         tcon = tlink_tcon(open_file->tlink);
2280         offset = *poffset;
2281
2282         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
2283                 pid = open_file->pid;
2284         else
2285                 pid = current->tgid;
2286
2287         iov_iter_init(&it, iov, nr_segs, len, 0);
2288         do {
2289                 size_t save_len;
2290
2291                 nr_pages = get_numpages(cifs_sb->wsize, len, &cur_len);
2292                 wdata = cifs_writedata_alloc(nr_pages,
2293                                              cifs_uncached_writev_complete);
2294                 if (!wdata) {
2295                         rc = -ENOMEM;
2296                         break;
2297                 }
2298
2299                 rc = cifs_write_allocate_pages(wdata->pages, nr_pages);
2300                 if (rc) {
2301                         kfree(wdata);
2302                         break;
2303                 }
2304
2305                 save_len = cur_len;
2306                 for (i = 0; i < nr_pages; i++) {
2307                         copied = min_t(const size_t, cur_len, PAGE_SIZE);
2308                         copied = iov_iter_copy_from_user(wdata->pages[i], &it,
2309                                                          0, copied);
2310                         cur_len -= copied;
2311                         iov_iter_advance(&it, copied);
2312                 }
2313                 cur_len = save_len - cur_len;
2314
2315                 wdata->sync_mode = WB_SYNC_ALL;
2316                 wdata->nr_pages = nr_pages;
2317                 wdata->offset = (__u64)offset;
2318                 wdata->cfile = cifsFileInfo_get(open_file);
2319                 wdata->pid = pid;
2320                 wdata->bytes = cur_len;
2321                 wdata->marshal_iov = cifs_uncached_marshal_iov;
2322                 rc = cifs_uncached_retry_writev(wdata);
2323                 if (rc) {
2324                         kref_put(&wdata->refcount, cifs_writedata_release);
2325                         break;
2326                 }
2327
2328                 list_add_tail(&wdata->list, &wdata_list);
2329                 offset += cur_len;
2330                 len -= cur_len;
2331         } while (len > 0);
2332
2333         /*
2334          * If at least one write was successfully sent, then discard any rc
2335          * value from the later writes. If the other write succeeds, then
2336          * we'll end up returning whatever was written. If it fails, then
2337          * we'll get a new rc value from that.
2338          */
2339         if (!list_empty(&wdata_list))
2340                 rc = 0;
2341
2342         /*
2343          * Wait for and collect replies for any successful sends in order of
2344          * increasing offset. Once an error is hit or we get a fatal signal
2345          * while waiting, then return without waiting for any more replies.
2346          */
2347 restart_loop:
2348         list_for_each_entry_safe(wdata, tmp, &wdata_list, list) {
2349                 if (!rc) {
2350                         /* FIXME: freezable too? */
2351                         rc = wait_for_completion_killable(&wdata->done);
2352                         if (rc)
2353                                 rc = -EINTR;
2354                         else if (wdata->result)
2355                                 rc = wdata->result;
2356                         else
2357                                 total_written += wdata->bytes;
2358
2359                         /* resend call if it's a retryable error */
2360                         if (rc == -EAGAIN) {
2361                                 rc = cifs_uncached_retry_writev(wdata);
2362                                 goto restart_loop;
2363                         }
2364                 }
2365                 list_del_init(&wdata->list);
2366                 kref_put(&wdata->refcount, cifs_writedata_release);
2367         }
2368
2369         if (total_written > 0)
2370                 *poffset += total_written;
2371
2372         cifs_stats_bytes_written(tcon, total_written);
2373         return total_written ? total_written : (ssize_t)rc;
2374 }
2375
2376 ssize_t cifs_user_writev(struct kiocb *iocb, const struct iovec *iov,
2377                                 unsigned long nr_segs, loff_t pos)
2378 {
2379         ssize_t written;
2380         struct inode *inode;
2381
2382         inode = iocb->ki_filp->f_path.dentry->d_inode;
2383
2384         /*
2385          * BB - optimize the way when signing is disabled. We can drop this
2386          * extra memory-to-memory copying and use iovec buffers for constructing
2387          * write request.
2388          */
2389
2390         written = cifs_iovec_write(iocb->ki_filp, iov, nr_segs, &pos);
2391         if (written > 0) {
2392                 CIFS_I(inode)->invalid_mapping = true;
2393                 iocb->ki_pos = pos;
2394         }
2395
2396         return written;
2397 }
2398
2399 ssize_t cifs_strict_writev(struct kiocb *iocb, const struct iovec *iov,
2400                            unsigned long nr_segs, loff_t pos)
2401 {
2402         struct inode *inode;
2403
2404         inode = iocb->ki_filp->f_path.dentry->d_inode;
2405
2406         if (CIFS_I(inode)->clientCanCacheAll)
2407                 return generic_file_aio_write(iocb, iov, nr_segs, pos);
2408
2409         /*
2410          * In strict cache mode we need to write the data to the server exactly
2411          * from the pos to pos+len-1 rather than flush all affected pages
2412          * because it may cause a error with mandatory locks on these pages but
2413          * not on the region from pos to ppos+len-1.
2414          */
2415
2416         return cifs_user_writev(iocb, iov, nr_segs, pos);
2417 }
2418
2419 static struct cifs_readdata *
2420 cifs_readdata_alloc(unsigned int nr_vecs, work_func_t complete)
2421 {
2422         struct cifs_readdata *rdata;
2423
2424         rdata = kzalloc(sizeof(*rdata) +
2425                         sizeof(struct kvec) * nr_vecs, GFP_KERNEL);
2426         if (rdata != NULL) {
2427                 kref_init(&rdata->refcount);
2428                 INIT_LIST_HEAD(&rdata->list);
2429                 init_completion(&rdata->done);
2430                 INIT_WORK(&rdata->work, complete);
2431                 INIT_LIST_HEAD(&rdata->pages);
2432         }
2433         return rdata;
2434 }
2435
2436 void
2437 cifs_readdata_release(struct kref *refcount)
2438 {
2439         struct cifs_readdata *rdata = container_of(refcount,
2440                                         struct cifs_readdata, refcount);
2441
2442         if (rdata->cfile)
2443                 cifsFileInfo_put(rdata->cfile);
2444
2445         kfree(rdata);
2446 }
2447
2448 static int
2449 cifs_read_allocate_pages(struct list_head *list, unsigned int npages)
2450 {
2451         int rc = 0;
2452         struct page *page, *tpage;
2453         unsigned int i;
2454
2455         for (i = 0; i < npages; i++) {
2456                 page = alloc_page(GFP_KERNEL|__GFP_HIGHMEM);
2457                 if (!page) {
2458                         rc = -ENOMEM;
2459                         break;
2460                 }
2461                 list_add(&page->lru, list);
2462         }
2463
2464         if (rc) {
2465                 list_for_each_entry_safe(page, tpage, list, lru) {
2466                         list_del(&page->lru);
2467                         put_page(page);
2468                 }
2469         }
2470         return rc;
2471 }
2472
2473 static void
2474 cifs_uncached_readdata_release(struct kref *refcount)
2475 {
2476         struct page *page, *tpage;
2477         struct cifs_readdata *rdata = container_of(refcount,
2478                                         struct cifs_readdata, refcount);
2479
2480         list_for_each_entry_safe(page, tpage, &rdata->pages, lru) {
2481                 list_del(&page->lru);
2482                 put_page(page);
2483         }
2484         cifs_readdata_release(refcount);
2485 }
2486
2487 static int
2488 cifs_retry_async_readv(struct cifs_readdata *rdata)
2489 {
2490         int rc;
2491         struct TCP_Server_Info *server;
2492
2493         server = tlink_tcon(rdata->cfile->tlink)->ses->server;
2494
2495         do {
2496                 if (rdata->cfile->invalidHandle) {
2497                         rc = cifs_reopen_file(rdata->cfile, true);
2498                         if (rc != 0)
2499                                 continue;
2500                 }
2501                 rc = server->ops->async_readv(rdata);
2502         } while (rc == -EAGAIN);
2503
2504         return rc;
2505 }
2506
2507 /**
2508  * cifs_readdata_to_iov - copy data from pages in response to an iovec
2509  * @rdata:      the readdata response with list of pages holding data
2510  * @iov:        vector in which we should copy the data
2511  * @nr_segs:    number of segments in vector
2512  * @offset:     offset into file of the first iovec
2513  * @copied:     used to return the amount of data copied to the iov
2514  *
2515  * This function copies data from a list of pages in a readdata response into
2516  * an array of iovecs. It will first calculate where the data should go
2517  * based on the info in the readdata and then copy the data into that spot.
2518  */
2519 static ssize_t
2520 cifs_readdata_to_iov(struct cifs_readdata *rdata, const struct iovec *iov,
2521                         unsigned long nr_segs, loff_t offset, ssize_t *copied)
2522 {
2523         int rc = 0;
2524         struct iov_iter ii;
2525         size_t pos = rdata->offset - offset;
2526         struct page *page, *tpage;
2527         ssize_t remaining = rdata->bytes;
2528         unsigned char *pdata;
2529
2530         /* set up iov_iter and advance to the correct offset */
2531         iov_iter_init(&ii, iov, nr_segs, iov_length(iov, nr_segs), 0);
2532         iov_iter_advance(&ii, pos);
2533
2534         *copied = 0;
2535         list_for_each_entry_safe(page, tpage, &rdata->pages, lru) {
2536                 ssize_t copy;
2537
2538                 /* copy a whole page or whatever's left */
2539                 copy = min_t(ssize_t, remaining, PAGE_SIZE);
2540
2541                 /* ...but limit it to whatever space is left in the iov */
2542                 copy = min_t(ssize_t, copy, iov_iter_count(&ii));
2543
2544                 /* go while there's data to be copied and no errors */
2545                 if (copy && !rc) {
2546                         pdata = kmap(page);
2547                         rc = memcpy_toiovecend(ii.iov, pdata, ii.iov_offset,
2548                                                 (int)copy);
2549                         kunmap(page);
2550                         if (!rc) {
2551                                 *copied += copy;
2552                                 remaining -= copy;
2553                                 iov_iter_advance(&ii, copy);
2554                         }
2555                 }
2556
2557                 list_del(&page->lru);
2558                 put_page(page);
2559         }
2560
2561         return rc;
2562 }
2563
2564 static void
2565 cifs_uncached_readv_complete(struct work_struct *work)
2566 {
2567         struct cifs_readdata *rdata = container_of(work,
2568                                                 struct cifs_readdata, work);
2569
2570         /* if the result is non-zero then the pages weren't kmapped */
2571         if (rdata->result == 0) {
2572                 struct page *page;
2573
2574                 list_for_each_entry(page, &rdata->pages, lru)
2575                         kunmap(page);
2576         }
2577
2578         complete(&rdata->done);
2579         kref_put(&rdata->refcount, cifs_uncached_readdata_release);
2580 }
2581
2582 static int
2583 cifs_uncached_read_marshal_iov(struct cifs_readdata *rdata,
2584                                 unsigned int remaining)
2585 {
2586         int len = 0;
2587         struct page *page, *tpage;
2588
2589         rdata->nr_iov = 1;
2590         list_for_each_entry_safe(page, tpage, &rdata->pages, lru) {
2591                 if (remaining >= PAGE_SIZE) {
2592                         /* enough data to fill the page */
2593                         rdata->iov[rdata->nr_iov].iov_base = kmap(page);
2594                         rdata->iov[rdata->nr_iov].iov_len = PAGE_SIZE;
2595                         cFYI(1, "%u: idx=%lu iov_base=%p iov_len=%zu",
2596                                 rdata->nr_iov, page->index,
2597                                 rdata->iov[rdata->nr_iov].iov_base,
2598                                 rdata->iov[rdata->nr_iov].iov_len);
2599                         ++rdata->nr_iov;
2600                         len += PAGE_SIZE;
2601                         remaining -= PAGE_SIZE;
2602                 } else if (remaining > 0) {
2603                         /* enough for partial page, fill and zero the rest */
2604                         rdata->iov[rdata->nr_iov].iov_base = kmap(page);
2605                         rdata->iov[rdata->nr_iov].iov_len = remaining;
2606                         cFYI(1, "%u: idx=%lu iov_base=%p iov_len=%zu",
2607                                 rdata->nr_iov, page->index,
2608                                 rdata->iov[rdata->nr_iov].iov_base,
2609                                 rdata->iov[rdata->nr_iov].iov_len);
2610                         memset(rdata->iov[rdata->nr_iov].iov_base + remaining,
2611                                 '\0', PAGE_SIZE - remaining);
2612                         ++rdata->nr_iov;
2613                         len += remaining;
2614                         remaining = 0;
2615                 } else {
2616                         /* no need to hold page hostage */
2617                         list_del(&page->lru);
2618                         put_page(page);
2619                 }
2620         }
2621
2622         return len;
2623 }
2624
2625 static ssize_t
2626 cifs_iovec_read(struct file *file, const struct iovec *iov,
2627                  unsigned long nr_segs, loff_t *poffset)
2628 {
2629         ssize_t rc;
2630         size_t len, cur_len;
2631         ssize_t total_read = 0;
2632         loff_t offset = *poffset;
2633         unsigned int npages;
2634         struct cifs_sb_info *cifs_sb;
2635         struct cifs_tcon *tcon;
2636         struct cifsFileInfo *open_file;
2637         struct cifs_readdata *rdata, *tmp;
2638         struct list_head rdata_list;
2639         pid_t pid;
2640
2641         if (!nr_segs)
2642                 return 0;
2643
2644         len = iov_length(iov, nr_segs);
2645         if (!len)
2646                 return 0;
2647
2648         INIT_LIST_HEAD(&rdata_list);
2649         cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
2650         open_file = file->private_data;
2651         tcon = tlink_tcon(open_file->tlink);
2652
2653         if (!tcon->ses->server->ops->async_readv)
2654                 return -ENOSYS;
2655
2656         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
2657                 pid = open_file->pid;
2658         else
2659                 pid = current->tgid;
2660
2661         if ((file->f_flags & O_ACCMODE) == O_WRONLY)
2662                 cFYI(1, "attempting read on write only file instance");
2663
2664         do {
2665                 cur_len = min_t(const size_t, len - total_read, cifs_sb->rsize);
2666                 npages = DIV_ROUND_UP(cur_len, PAGE_SIZE);
2667
2668                 /* allocate a readdata struct */
2669                 rdata = cifs_readdata_alloc(npages,
2670                                             cifs_uncached_readv_complete);
2671                 if (!rdata) {
2672                         rc = -ENOMEM;
2673                         goto error;
2674                 }
2675
2676                 rc = cifs_read_allocate_pages(&rdata->pages, npages);
2677                 if (rc)
2678                         goto error;
2679
2680                 rdata->cfile = cifsFileInfo_get(open_file);
2681                 rdata->offset = offset;
2682                 rdata->bytes = cur_len;
2683                 rdata->pid = pid;
2684                 rdata->marshal_iov = cifs_uncached_read_marshal_iov;
2685
2686                 rc = cifs_retry_async_readv(rdata);
2687 error:
2688                 if (rc) {
2689                         kref_put(&rdata->refcount,
2690                                  cifs_uncached_readdata_release);
2691                         break;
2692                 }
2693
2694                 list_add_tail(&rdata->list, &rdata_list);
2695                 offset += cur_len;
2696                 len -= cur_len;
2697         } while (len > 0);
2698
2699         /* if at least one read request send succeeded, then reset rc */
2700         if (!list_empty(&rdata_list))
2701                 rc = 0;
2702
2703         /* the loop below should proceed in the order of increasing offsets */
2704 restart_loop:
2705         list_for_each_entry_safe(rdata, tmp, &rdata_list, list) {
2706                 if (!rc) {
2707                         ssize_t copied;
2708
2709                         /* FIXME: freezable sleep too? */
2710                         rc = wait_for_completion_killable(&rdata->done);
2711                         if (rc)
2712                                 rc = -EINTR;
2713                         else if (rdata->result)
2714                                 rc = rdata->result;
2715                         else {
2716                                 rc = cifs_readdata_to_iov(rdata, iov,
2717                                                         nr_segs, *poffset,
2718                                                         &copied);
2719                                 total_read += copied;
2720                         }
2721
2722                         /* resend call if it's a retryable error */
2723                         if (rc == -EAGAIN) {
2724                                 rc = cifs_retry_async_readv(rdata);
2725                                 goto restart_loop;
2726                         }
2727                 }
2728                 list_del_init(&rdata->list);
2729                 kref_put(&rdata->refcount, cifs_uncached_readdata_release);
2730         }
2731
2732         cifs_stats_bytes_read(tcon, total_read);
2733         *poffset += total_read;
2734
2735         return total_read ? total_read : rc;
2736 }
2737
2738 ssize_t cifs_user_readv(struct kiocb *iocb, const struct iovec *iov,
2739                                unsigned long nr_segs, loff_t pos)
2740 {
2741         ssize_t read;
2742
2743         read = cifs_iovec_read(iocb->ki_filp, iov, nr_segs, &pos);
2744         if (read > 0)
2745                 iocb->ki_pos = pos;
2746
2747         return read;
2748 }
2749
2750 ssize_t cifs_strict_readv(struct kiocb *iocb, const struct iovec *iov,
2751                           unsigned long nr_segs, loff_t pos)
2752 {
2753         struct inode *inode;
2754
2755         inode = iocb->ki_filp->f_path.dentry->d_inode;
2756
2757         if (CIFS_I(inode)->clientCanCacheRead)
2758                 return generic_file_aio_read(iocb, iov, nr_segs, pos);
2759
2760         /*
2761          * In strict cache mode we need to read from the server all the time
2762          * if we don't have level II oplock because the server can delay mtime
2763          * change - so we can't make a decision about inode invalidating.
2764          * And we can also fail with pagereading if there are mandatory locks
2765          * on pages affected by this read but not on the region from pos to
2766          * pos+len-1.
2767          */
2768
2769         return cifs_user_readv(iocb, iov, nr_segs, pos);
2770 }
2771
2772 static ssize_t cifs_read(struct file *file, char *read_data, size_t read_size,
2773                          loff_t *poffset)
2774 {
2775         int rc = -EACCES;
2776         unsigned int bytes_read = 0;
2777         unsigned int total_read;
2778         unsigned int current_read_size;
2779         unsigned int rsize;
2780         struct cifs_sb_info *cifs_sb;
2781         struct cifs_tcon *tcon;
2782         unsigned int xid;
2783         char *current_offset;
2784         struct cifsFileInfo *open_file;
2785         struct cifs_io_parms io_parms;
2786         int buf_type = CIFS_NO_BUFFER;
2787         __u32 pid;
2788
2789         xid = get_xid();
2790         cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
2791
2792         /* FIXME: set up handlers for larger reads and/or convert to async */
2793         rsize = min_t(unsigned int, cifs_sb->rsize, CIFSMaxBufSize);
2794
2795         if (file->private_data == NULL) {
2796                 rc = -EBADF;
2797                 free_xid(xid);
2798                 return rc;
2799         }
2800         open_file = file->private_data;
2801         tcon = tlink_tcon(open_file->tlink);
2802
2803         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
2804                 pid = open_file->pid;
2805         else
2806                 pid = current->tgid;
2807
2808         if ((file->f_flags & O_ACCMODE) == O_WRONLY)
2809                 cFYI(1, "attempting read on write only file instance");
2810
2811         for (total_read = 0, current_offset = read_data;
2812              read_size > total_read;
2813              total_read += bytes_read, current_offset += bytes_read) {
2814                 current_read_size = min_t(uint, read_size - total_read, rsize);
2815                 /*
2816                  * For windows me and 9x we do not want to request more than it
2817                  * negotiated since it will refuse the read then.
2818                  */
2819                 if ((tcon->ses) && !(tcon->ses->capabilities &
2820                                 tcon->ses->server->vals->cap_large_files)) {
2821                         current_read_size = min_t(uint, current_read_size,
2822                                         CIFSMaxBufSize);
2823                 }
2824                 rc = -EAGAIN;
2825                 while (rc == -EAGAIN) {
2826                         if (open_file->invalidHandle) {
2827                                 rc = cifs_reopen_file(open_file, true);
2828                                 if (rc != 0)
2829                                         break;
2830                         }
2831                         io_parms.netfid = open_file->fid.netfid;
2832                         io_parms.pid = pid;
2833                         io_parms.tcon = tcon;
2834                         io_parms.offset = *poffset;
2835                         io_parms.length = current_read_size;
2836                         rc = CIFSSMBRead(xid, &io_parms, &bytes_read,
2837                                          &current_offset, &buf_type);
2838                 }
2839                 if (rc || (bytes_read == 0)) {
2840                         if (total_read) {
2841                                 break;
2842                         } else {
2843                                 free_xid(xid);
2844                                 return rc;
2845                         }
2846                 } else {
2847                         cifs_stats_bytes_read(tcon, total_read);
2848                         *poffset += bytes_read;
2849                 }
2850         }
2851         free_xid(xid);
2852         return total_read;
2853 }
2854
2855 /*
2856  * If the page is mmap'ed into a process' page tables, then we need to make
2857  * sure that it doesn't change while being written back.
2858  */
2859 static int
2860 cifs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
2861 {
2862         struct page *page = vmf->page;
2863
2864         lock_page(page);
2865         return VM_FAULT_LOCKED;
2866 }
2867
2868 static struct vm_operations_struct cifs_file_vm_ops = {
2869         .fault = filemap_fault,
2870         .page_mkwrite = cifs_page_mkwrite,
2871 };
2872
2873 int cifs_file_strict_mmap(struct file *file, struct vm_area_struct *vma)
2874 {
2875         int rc, xid;
2876         struct inode *inode = file->f_path.dentry->d_inode;
2877
2878         xid = get_xid();
2879
2880         if (!CIFS_I(inode)->clientCanCacheRead) {
2881                 rc = cifs_invalidate_mapping(inode);
2882                 if (rc)
2883                         return rc;
2884         }
2885
2886         rc = generic_file_mmap(file, vma);
2887         if (rc == 0)
2888                 vma->vm_ops = &cifs_file_vm_ops;
2889         free_xid(xid);
2890         return rc;
2891 }
2892
2893 int cifs_file_mmap(struct file *file, struct vm_area_struct *vma)
2894 {
2895         int rc, xid;
2896
2897         xid = get_xid();
2898         rc = cifs_revalidate_file(file);
2899         if (rc) {
2900                 cFYI(1, "Validation prior to mmap failed, error=%d", rc);
2901                 free_xid(xid);
2902                 return rc;
2903         }
2904         rc = generic_file_mmap(file, vma);
2905         if (rc == 0)
2906                 vma->vm_ops = &cifs_file_vm_ops;
2907         free_xid(xid);
2908         return rc;
2909 }
2910
2911 static void
2912 cifs_readv_complete(struct work_struct *work)
2913 {
2914         struct cifs_readdata *rdata = container_of(work,
2915                                                 struct cifs_readdata, work);
2916         struct page *page, *tpage;
2917
2918         list_for_each_entry_safe(page, tpage, &rdata->pages, lru) {
2919                 list_del(&page->lru);
2920                 lru_cache_add_file(page);
2921
2922                 if (rdata->result == 0) {
2923                         kunmap(page);
2924                         flush_dcache_page(page);
2925                         SetPageUptodate(page);
2926                 }
2927
2928                 unlock_page(page);
2929
2930                 if (rdata->result == 0)
2931                         cifs_readpage_to_fscache(rdata->mapping->host, page);
2932
2933                 page_cache_release(page);
2934         }
2935         kref_put(&rdata->refcount, cifs_readdata_release);
2936 }
2937
2938 static int
2939 cifs_readpages_marshal_iov(struct cifs_readdata *rdata, unsigned int remaining)
2940 {
2941         int len = 0;
2942         struct page *page, *tpage;
2943         u64 eof;
2944         pgoff_t eof_index;
2945
2946         /* determine the eof that the server (probably) has */
2947         eof = CIFS_I(rdata->mapping->host)->server_eof;
2948         eof_index = eof ? (eof - 1) >> PAGE_CACHE_SHIFT : 0;
2949         cFYI(1, "eof=%llu eof_index=%lu", eof, eof_index);
2950
2951         rdata->nr_iov = 1;
2952         list_for_each_entry_safe(page, tpage, &rdata->pages, lru) {
2953                 if (remaining >= PAGE_CACHE_SIZE) {
2954                         /* enough data to fill the page */
2955                         rdata->iov[rdata->nr_iov].iov_base = kmap(page);
2956                         rdata->iov[rdata->nr_iov].iov_len = PAGE_CACHE_SIZE;
2957                         cFYI(1, "%u: idx=%lu iov_base=%p iov_len=%zu",
2958                                 rdata->nr_iov, page->index,
2959                                 rdata->iov[rdata->nr_iov].iov_base,
2960                                 rdata->iov[rdata->nr_iov].iov_len);
2961                         ++rdata->nr_iov;
2962                         len += PAGE_CACHE_SIZE;
2963                         remaining -= PAGE_CACHE_SIZE;
2964                 } else if (remaining > 0) {
2965                         /* enough for partial page, fill and zero the rest */
2966                         rdata->iov[rdata->nr_iov].iov_base = kmap(page);
2967                         rdata->iov[rdata->nr_iov].iov_len = remaining;
2968                         cFYI(1, "%u: idx=%lu iov_base=%p iov_len=%zu",
2969                                 rdata->nr_iov, page->index,
2970                                 rdata->iov[rdata->nr_iov].iov_base,
2971                                 rdata->iov[rdata->nr_iov].iov_len);
2972                         memset(rdata->iov[rdata->nr_iov].iov_base + remaining,
2973                                 '\0', PAGE_CACHE_SIZE - remaining);
2974                         ++rdata->nr_iov;
2975                         len += remaining;
2976                         remaining = 0;
2977                 } else if (page->index > eof_index) {
2978                         /*
2979                          * The VFS will not try to do readahead past the
2980                          * i_size, but it's possible that we have outstanding
2981                          * writes with gaps in the middle and the i_size hasn't
2982                          * caught up yet. Populate those with zeroed out pages
2983                          * to prevent the VFS from repeatedly attempting to
2984                          * fill them until the writes are flushed.
2985                          */
2986                         zero_user(page, 0, PAGE_CACHE_SIZE);
2987                         list_del(&page->lru);
2988                         lru_cache_add_file(page);
2989                         flush_dcache_page(page);
2990                         SetPageUptodate(page);
2991                         unlock_page(page);
2992                         page_cache_release(page);
2993                 } else {
2994                         /* no need to hold page hostage */
2995                         list_del(&page->lru);
2996                         lru_cache_add_file(page);
2997                         unlock_page(page);
2998                         page_cache_release(page);
2999                 }
3000         }
3001
3002         return len;
3003 }
3004
3005 static int cifs_readpages(struct file *file, struct address_space *mapping,
3006         struct list_head *page_list, unsigned num_pages)
3007 {
3008         int rc;
3009         struct list_head tmplist;
3010         struct cifsFileInfo *open_file = file->private_data;
3011         struct cifs_sb_info *cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
3012         unsigned int rsize = cifs_sb->rsize;
3013         pid_t pid;
3014
3015         /*
3016          * Give up immediately if rsize is too small to read an entire page.
3017          * The VFS will fall back to readpage. We should never reach this
3018          * point however since we set ra_pages to 0 when the rsize is smaller
3019          * than a cache page.
3020          */
3021         if (unlikely(rsize < PAGE_CACHE_SIZE))
3022                 return 0;
3023
3024         /*
3025          * Reads as many pages as possible from fscache. Returns -ENOBUFS
3026          * immediately if the cookie is negative
3027          */
3028         rc = cifs_readpages_from_fscache(mapping->host, mapping, page_list,
3029                                          &num_pages);
3030         if (rc == 0)
3031                 return rc;
3032
3033         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
3034                 pid = open_file->pid;
3035         else
3036                 pid = current->tgid;
3037
3038         rc = 0;
3039         INIT_LIST_HEAD(&tmplist);
3040
3041         cFYI(1, "%s: file=%p mapping=%p num_pages=%u", __func__, file,
3042                 mapping, num_pages);
3043
3044         /*
3045          * Start with the page at end of list and move it to private
3046          * list. Do the same with any following pages until we hit
3047          * the rsize limit, hit an index discontinuity, or run out of
3048          * pages. Issue the async read and then start the loop again
3049          * until the list is empty.
3050          *
3051          * Note that list order is important. The page_list is in
3052          * the order of declining indexes. When we put the pages in
3053          * the rdata->pages, then we want them in increasing order.
3054          */
3055         while (!list_empty(page_list)) {
3056                 unsigned int bytes = PAGE_CACHE_SIZE;
3057                 unsigned int expected_index;
3058                 unsigned int nr_pages = 1;
3059                 loff_t offset;
3060                 struct page *page, *tpage;
3061                 struct cifs_readdata *rdata;
3062
3063                 page = list_entry(page_list->prev, struct page, lru);
3064
3065                 /*
3066                  * Lock the page and put it in the cache. Since no one else
3067                  * should have access to this page, we're safe to simply set
3068                  * PG_locked without checking it first.
3069                  */
3070                 __set_page_locked(page);
3071                 rc = add_to_page_cache_locked(page, mapping,
3072                                               page->index, GFP_KERNEL);
3073
3074                 /* give up if we can't stick it in the cache */
3075                 if (rc) {
3076                         __clear_page_locked(page);
3077                         break;
3078                 }
3079
3080                 /* move first page to the tmplist */
3081                 offset = (loff_t)page->index << PAGE_CACHE_SHIFT;
3082                 list_move_tail(&page->lru, &tmplist);
3083
3084                 /* now try and add more pages onto the request */
3085                 expected_index = page->index + 1;
3086                 list_for_each_entry_safe_reverse(page, tpage, page_list, lru) {
3087                         /* discontinuity ? */
3088                         if (page->index != expected_index)
3089                                 break;
3090
3091                         /* would this page push the read over the rsize? */
3092                         if (bytes + PAGE_CACHE_SIZE > rsize)
3093                                 break;
3094
3095                         __set_page_locked(page);
3096                         if (add_to_page_cache_locked(page, mapping,
3097                                                 page->index, GFP_KERNEL)) {
3098                                 __clear_page_locked(page);
3099                                 break;
3100                         }
3101                         list_move_tail(&page->lru, &tmplist);
3102                         bytes += PAGE_CACHE_SIZE;
3103                         expected_index++;
3104                         nr_pages++;
3105                 }
3106
3107                 rdata = cifs_readdata_alloc(nr_pages, cifs_readv_complete);
3108                 if (!rdata) {
3109                         /* best to give up if we're out of mem */
3110                         list_for_each_entry_safe(page, tpage, &tmplist, lru) {
3111                                 list_del(&page->lru);
3112                                 lru_cache_add_file(page);
3113                                 unlock_page(page);
3114                                 page_cache_release(page);
3115                         }
3116                         rc = -ENOMEM;
3117                         break;
3118                 }
3119
3120                 rdata->cfile = cifsFileInfo_get(open_file);
3121                 rdata->mapping = mapping;
3122                 rdata->offset = offset;
3123                 rdata->bytes = bytes;
3124                 rdata->pid = pid;
3125                 rdata->marshal_iov = cifs_readpages_marshal_iov;
3126                 list_splice_init(&tmplist, &rdata->pages);
3127
3128                 rc = cifs_retry_async_readv(rdata);
3129                 if (rc != 0) {
3130                         list_for_each_entry_safe(page, tpage, &rdata->pages,
3131                                                  lru) {
3132                                 list_del(&page->lru);
3133                                 lru_cache_add_file(page);
3134                                 unlock_page(page);
3135                                 page_cache_release(page);
3136                         }
3137                         kref_put(&rdata->refcount, cifs_readdata_release);
3138                         break;
3139                 }
3140
3141                 kref_put(&rdata->refcount, cifs_readdata_release);
3142         }
3143
3144         return rc;
3145 }
3146
3147 static int cifs_readpage_worker(struct file *file, struct page *page,
3148         loff_t *poffset)
3149 {
3150         char *read_data;
3151         int rc;
3152
3153         /* Is the page cached? */
3154         rc = cifs_readpage_from_fscache(file->f_path.dentry->d_inode, page);
3155         if (rc == 0)
3156                 goto read_complete;
3157
3158         page_cache_get(page);
3159         read_data = kmap(page);
3160         /* for reads over a certain size could initiate async read ahead */
3161
3162         rc = cifs_read(file, read_data, PAGE_CACHE_SIZE, poffset);
3163
3164         if (rc < 0)
3165                 goto io_error;
3166         else
3167                 cFYI(1, "Bytes read %d", rc);
3168
3169         file->f_path.dentry->d_inode->i_atime =
3170                 current_fs_time(file->f_path.dentry->d_inode->i_sb);
3171
3172         if (PAGE_CACHE_SIZE > rc)
3173                 memset(read_data + rc, 0, PAGE_CACHE_SIZE - rc);
3174
3175         flush_dcache_page(page);
3176         SetPageUptodate(page);
3177
3178         /* send this page to the cache */
3179         cifs_readpage_to_fscache(file->f_path.dentry->d_inode, page);
3180
3181         rc = 0;
3182
3183 io_error:
3184         kunmap(page);
3185         page_cache_release(page);
3186
3187 read_complete:
3188         return rc;
3189 }
3190
3191 static int cifs_readpage(struct file *file, struct page *page)
3192 {
3193         loff_t offset = (loff_t)page->index << PAGE_CACHE_SHIFT;
3194         int rc = -EACCES;
3195         unsigned int xid;
3196
3197         xid = get_xid();
3198
3199         if (file->private_data == NULL) {
3200                 rc = -EBADF;
3201                 free_xid(xid);
3202                 return rc;
3203         }
3204
3205         cFYI(1, "readpage %p at offset %d 0x%x",
3206                  page, (int)offset, (int)offset);
3207
3208         rc = cifs_readpage_worker(file, page, &offset);
3209
3210         unlock_page(page);
3211
3212         free_xid(xid);
3213         return rc;
3214 }
3215
3216 static int is_inode_writable(struct cifsInodeInfo *cifs_inode)
3217 {
3218         struct cifsFileInfo *open_file;
3219
3220         spin_lock(&cifs_file_list_lock);
3221         list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
3222                 if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) {
3223                         spin_unlock(&cifs_file_list_lock);
3224                         return 1;
3225                 }
3226         }
3227         spin_unlock(&cifs_file_list_lock);
3228         return 0;
3229 }
3230
3231 /* We do not want to update the file size from server for inodes
3232    open for write - to avoid races with writepage extending
3233    the file - in the future we could consider allowing
3234    refreshing the inode only on increases in the file size
3235    but this is tricky to do without racing with writebehind
3236    page caching in the current Linux kernel design */
3237 bool is_size_safe_to_change(struct cifsInodeInfo *cifsInode, __u64 end_of_file)
3238 {
3239         if (!cifsInode)
3240                 return true;
3241
3242         if (is_inode_writable(cifsInode)) {
3243                 /* This inode is open for write at least once */
3244                 struct cifs_sb_info *cifs_sb;
3245
3246                 cifs_sb = CIFS_SB(cifsInode->vfs_inode.i_sb);
3247                 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DIRECT_IO) {
3248                         /* since no page cache to corrupt on directio
3249                         we can change size safely */
3250                         return true;
3251                 }
3252
3253                 if (i_size_read(&cifsInode->vfs_inode) < end_of_file)
3254                         return true;
3255
3256                 return false;
3257         } else
3258                 return true;
3259 }
3260
3261 static int cifs_write_begin(struct file *file, struct address_space *mapping,
3262                         loff_t pos, unsigned len, unsigned flags,
3263                         struct page **pagep, void **fsdata)
3264 {
3265         pgoff_t index = pos >> PAGE_CACHE_SHIFT;
3266         loff_t offset = pos & (PAGE_CACHE_SIZE - 1);
3267         loff_t page_start = pos & PAGE_MASK;
3268         loff_t i_size;
3269         struct page *page;
3270         int rc = 0;
3271
3272         cFYI(1, "write_begin from %lld len %d", (long long)pos, len);
3273
3274         page = grab_cache_page_write_begin(mapping, index, flags);
3275         if (!page) {
3276                 rc = -ENOMEM;
3277                 goto out;
3278         }
3279
3280         if (PageUptodate(page))
3281                 goto out;
3282
3283         /*
3284          * If we write a full page it will be up to date, no need to read from
3285          * the server. If the write is short, we'll end up doing a sync write
3286          * instead.
3287          */
3288         if (len == PAGE_CACHE_SIZE)
3289                 goto out;
3290
3291         /*
3292          * optimize away the read when we have an oplock, and we're not
3293          * expecting to use any of the data we'd be reading in. That
3294          * is, when the page lies beyond the EOF, or straddles the EOF
3295          * and the write will cover all of the existing data.
3296          */
3297         if (CIFS_I(mapping->host)->clientCanCacheRead) {
3298                 i_size = i_size_read(mapping->host);
3299                 if (page_start >= i_size ||
3300                     (offset == 0 && (pos + len) >= i_size)) {
3301                         zero_user_segments(page, 0, offset,
3302                                            offset + len,
3303                                            PAGE_CACHE_SIZE);
3304                         /*
3305                          * PageChecked means that the parts of the page
3306                          * to which we're not writing are considered up
3307                          * to date. Once the data is copied to the
3308                          * page, it can be set uptodate.
3309                          */
3310                         SetPageChecked(page);
3311                         goto out;
3312                 }
3313         }
3314
3315         if ((file->f_flags & O_ACCMODE) != O_WRONLY) {
3316                 /*
3317                  * might as well read a page, it is fast enough. If we get
3318                  * an error, we don't need to return it. cifs_write_end will
3319                  * do a sync write instead since PG_uptodate isn't set.
3320                  */
3321                 cifs_readpage_worker(file, page, &page_start);
3322         } else {
3323                 /* we could try using another file handle if there is one -
3324                    but how would we lock it to prevent close of that handle
3325                    racing with this read? In any case
3326                    this will be written out by write_end so is fine */
3327         }
3328 out:
3329         *pagep = page;
3330         return rc;
3331 }
3332
3333 static int cifs_release_page(struct page *page, gfp_t gfp)
3334 {
3335         if (PagePrivate(page))
3336                 return 0;
3337
3338         return cifs_fscache_release_page(page, gfp);
3339 }
3340
3341 static void cifs_invalidate_page(struct page *page, unsigned long offset)
3342 {
3343         struct cifsInodeInfo *cifsi = CIFS_I(page->mapping->host);
3344
3345         if (offset == 0)
3346                 cifs_fscache_invalidate_page(page, &cifsi->vfs_inode);
3347 }
3348
3349 static int cifs_launder_page(struct page *page)
3350 {
3351         int rc = 0;
3352         loff_t range_start = page_offset(page);
3353         loff_t range_end = range_start + (loff_t)(PAGE_CACHE_SIZE - 1);
3354         struct writeback_control wbc = {
3355                 .sync_mode = WB_SYNC_ALL,
3356                 .nr_to_write = 0,
3357                 .range_start = range_start,
3358                 .range_end = range_end,
3359         };
3360
3361         cFYI(1, "Launder page: %p", page);
3362
3363         if (clear_page_dirty_for_io(page))
3364                 rc = cifs_writepage_locked(page, &wbc);
3365
3366         cifs_fscache_invalidate_page(page, page->mapping->host);
3367         return rc;
3368 }
3369
3370 void cifs_oplock_break(struct work_struct *work)
3371 {
3372         struct cifsFileInfo *cfile = container_of(work, struct cifsFileInfo,
3373                                                   oplock_break);
3374         struct inode *inode = cfile->dentry->d_inode;
3375         struct cifsInodeInfo *cinode = CIFS_I(inode);
3376         int rc = 0;
3377
3378         if (inode && S_ISREG(inode->i_mode)) {
3379                 if (cinode->clientCanCacheRead)
3380                         break_lease(inode, O_RDONLY);
3381                 else
3382                         break_lease(inode, O_WRONLY);
3383                 rc = filemap_fdatawrite(inode->i_mapping);
3384                 if (cinode->clientCanCacheRead == 0) {
3385                         rc = filemap_fdatawait(inode->i_mapping);
3386                         mapping_set_error(inode->i_mapping, rc);
3387                         invalidate_remote_inode(inode);
3388                 }
3389                 cFYI(1, "Oplock flush inode %p rc %d", inode, rc);
3390         }
3391
3392         rc = cifs_push_locks(cfile);
3393         if (rc)
3394                 cERROR(1, "Push locks rc = %d", rc);
3395
3396         /*
3397          * releasing stale oplock after recent reconnect of smb session using
3398          * a now incorrect file handle is not a data integrity issue but do
3399          * not bother sending an oplock release if session to server still is
3400          * disconnected since oplock already released by the server
3401          */
3402         if (!cfile->oplock_break_cancelled) {
3403                 rc = CIFSSMBLock(0, tlink_tcon(cfile->tlink), cfile->fid.netfid,
3404                                  current->tgid, 0, 0, 0, 0,
3405                                  LOCKING_ANDX_OPLOCK_RELEASE, false,
3406                                  cinode->clientCanCacheRead ? 1 : 0);
3407                 cFYI(1, "Oplock release rc = %d", rc);
3408         }
3409 }
3410
3411 const struct address_space_operations cifs_addr_ops = {
3412         .readpage = cifs_readpage,
3413         .readpages = cifs_readpages,
3414         .writepage = cifs_writepage,
3415         .writepages = cifs_writepages,
3416         .write_begin = cifs_write_begin,
3417         .write_end = cifs_write_end,
3418         .set_page_dirty = __set_page_dirty_nobuffers,
3419         .releasepage = cifs_release_page,
3420         .invalidatepage = cifs_invalidate_page,
3421         .launder_page = cifs_launder_page,
3422 };
3423
3424 /*
3425  * cifs_readpages requires the server to support a buffer large enough to
3426  * contain the header plus one complete page of data.  Otherwise, we need
3427  * to leave cifs_readpages out of the address space operations.
3428  */
3429 const struct address_space_operations cifs_addr_ops_smallbuf = {
3430         .readpage = cifs_readpage,
3431         .writepage = cifs_writepage,
3432         .writepages = cifs_writepages,
3433         .write_begin = cifs_write_begin,
3434         .write_end = cifs_write_end,
3435         .set_page_dirty = __set_page_dirty_nobuffers,
3436         .releasepage = cifs_release_page,
3437         .invalidatepage = cifs_invalidate_page,
3438         .launder_page = cifs_launder_page,
3439 };