CIFS: Reconnect durable handles for SMB2
[platform/adaptation/renesas_rcar/renesas_kernel.git] / fs / cifs / file.c
1 /*
2  *   fs/cifs/file.c
3  *
4  *   vfs operations that deal with files
5  *
6  *   Copyright (C) International Business Machines  Corp., 2002,2010
7  *   Author(s): Steve French (sfrench@us.ibm.com)
8  *              Jeremy Allison (jra@samba.org)
9  *
10  *   This library is free software; you can redistribute it and/or modify
11  *   it under the terms of the GNU Lesser General Public License as published
12  *   by the Free Software Foundation; either version 2.1 of the License, or
13  *   (at your option) any later version.
14  *
15  *   This library is distributed in the hope that it will be useful,
16  *   but WITHOUT ANY WARRANTY; without even the implied warranty of
17  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
18  *   the GNU Lesser General Public License for more details.
19  *
20  *   You should have received a copy of the GNU Lesser General Public License
21  *   along with this library; if not, write to the Free Software
22  *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
23  */
24 #include <linux/fs.h>
25 #include <linux/backing-dev.h>
26 #include <linux/stat.h>
27 #include <linux/fcntl.h>
28 #include <linux/pagemap.h>
29 #include <linux/pagevec.h>
30 #include <linux/writeback.h>
31 #include <linux/task_io_accounting_ops.h>
32 #include <linux/delay.h>
33 #include <linux/mount.h>
34 #include <linux/slab.h>
35 #include <linux/swap.h>
36 #include <asm/div64.h>
37 #include "cifsfs.h"
38 #include "cifspdu.h"
39 #include "cifsglob.h"
40 #include "cifsproto.h"
41 #include "cifs_unicode.h"
42 #include "cifs_debug.h"
43 #include "cifs_fs_sb.h"
44 #include "fscache.h"
45
46
47 static inline int cifs_convert_flags(unsigned int flags)
48 {
49         if ((flags & O_ACCMODE) == O_RDONLY)
50                 return GENERIC_READ;
51         else if ((flags & O_ACCMODE) == O_WRONLY)
52                 return GENERIC_WRITE;
53         else if ((flags & O_ACCMODE) == O_RDWR) {
54                 /* GENERIC_ALL is too much permission to request
55                    can cause unnecessary access denied on create */
56                 /* return GENERIC_ALL; */
57                 return (GENERIC_READ | GENERIC_WRITE);
58         }
59
60         return (READ_CONTROL | FILE_WRITE_ATTRIBUTES | FILE_READ_ATTRIBUTES |
61                 FILE_WRITE_EA | FILE_APPEND_DATA | FILE_WRITE_DATA |
62                 FILE_READ_DATA);
63 }
64
65 static u32 cifs_posix_convert_flags(unsigned int flags)
66 {
67         u32 posix_flags = 0;
68
69         if ((flags & O_ACCMODE) == O_RDONLY)
70                 posix_flags = SMB_O_RDONLY;
71         else if ((flags & O_ACCMODE) == O_WRONLY)
72                 posix_flags = SMB_O_WRONLY;
73         else if ((flags & O_ACCMODE) == O_RDWR)
74                 posix_flags = SMB_O_RDWR;
75
76         if (flags & O_CREAT) {
77                 posix_flags |= SMB_O_CREAT;
78                 if (flags & O_EXCL)
79                         posix_flags |= SMB_O_EXCL;
80         } else if (flags & O_EXCL)
81                 cifs_dbg(FYI, "Application %s pid %d has incorrectly set O_EXCL flag but not O_CREAT on file open. Ignoring O_EXCL\n",
82                          current->comm, current->tgid);
83
84         if (flags & O_TRUNC)
85                 posix_flags |= SMB_O_TRUNC;
86         /* be safe and imply O_SYNC for O_DSYNC */
87         if (flags & O_DSYNC)
88                 posix_flags |= SMB_O_SYNC;
89         if (flags & O_DIRECTORY)
90                 posix_flags |= SMB_O_DIRECTORY;
91         if (flags & O_NOFOLLOW)
92                 posix_flags |= SMB_O_NOFOLLOW;
93         if (flags & O_DIRECT)
94                 posix_flags |= SMB_O_DIRECT;
95
96         return posix_flags;
97 }
98
99 static inline int cifs_get_disposition(unsigned int flags)
100 {
101         if ((flags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL))
102                 return FILE_CREATE;
103         else if ((flags & (O_CREAT | O_TRUNC)) == (O_CREAT | O_TRUNC))
104                 return FILE_OVERWRITE_IF;
105         else if ((flags & O_CREAT) == O_CREAT)
106                 return FILE_OPEN_IF;
107         else if ((flags & O_TRUNC) == O_TRUNC)
108                 return FILE_OVERWRITE;
109         else
110                 return FILE_OPEN;
111 }
112
113 int cifs_posix_open(char *full_path, struct inode **pinode,
114                         struct super_block *sb, int mode, unsigned int f_flags,
115                         __u32 *poplock, __u16 *pnetfid, unsigned int xid)
116 {
117         int rc;
118         FILE_UNIX_BASIC_INFO *presp_data;
119         __u32 posix_flags = 0;
120         struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
121         struct cifs_fattr fattr;
122         struct tcon_link *tlink;
123         struct cifs_tcon *tcon;
124
125         cifs_dbg(FYI, "posix open %s\n", full_path);
126
127         presp_data = kzalloc(sizeof(FILE_UNIX_BASIC_INFO), GFP_KERNEL);
128         if (presp_data == NULL)
129                 return -ENOMEM;
130
131         tlink = cifs_sb_tlink(cifs_sb);
132         if (IS_ERR(tlink)) {
133                 rc = PTR_ERR(tlink);
134                 goto posix_open_ret;
135         }
136
137         tcon = tlink_tcon(tlink);
138         mode &= ~current_umask();
139
140         posix_flags = cifs_posix_convert_flags(f_flags);
141         rc = CIFSPOSIXCreate(xid, tcon, posix_flags, mode, pnetfid, presp_data,
142                              poplock, full_path, cifs_sb->local_nls,
143                              cifs_sb->mnt_cifs_flags &
144                                         CIFS_MOUNT_MAP_SPECIAL_CHR);
145         cifs_put_tlink(tlink);
146
147         if (rc)
148                 goto posix_open_ret;
149
150         if (presp_data->Type == cpu_to_le32(-1))
151                 goto posix_open_ret; /* open ok, caller does qpathinfo */
152
153         if (!pinode)
154                 goto posix_open_ret; /* caller does not need info */
155
156         cifs_unix_basic_to_fattr(&fattr, presp_data, cifs_sb);
157
158         /* get new inode and set it up */
159         if (*pinode == NULL) {
160                 cifs_fill_uniqueid(sb, &fattr);
161                 *pinode = cifs_iget(sb, &fattr);
162                 if (!*pinode) {
163                         rc = -ENOMEM;
164                         goto posix_open_ret;
165                 }
166         } else {
167                 cifs_fattr_to_inode(*pinode, &fattr);
168         }
169
170 posix_open_ret:
171         kfree(presp_data);
172         return rc;
173 }
174
175 static int
176 cifs_nt_open(char *full_path, struct inode *inode, struct cifs_sb_info *cifs_sb,
177              struct cifs_tcon *tcon, unsigned int f_flags, __u32 *oplock,
178              struct cifs_fid *fid, unsigned int xid)
179 {
180         int rc;
181         int desired_access;
182         int disposition;
183         int create_options = CREATE_NOT_DIR;
184         FILE_ALL_INFO *buf;
185         struct TCP_Server_Info *server = tcon->ses->server;
186         struct cifs_open_parms oparms;
187
188         if (!server->ops->open)
189                 return -ENOSYS;
190
191         desired_access = cifs_convert_flags(f_flags);
192
193 /*********************************************************************
194  *  open flag mapping table:
195  *
196  *      POSIX Flag            CIFS Disposition
197  *      ----------            ----------------
198  *      O_CREAT               FILE_OPEN_IF
199  *      O_CREAT | O_EXCL      FILE_CREATE
200  *      O_CREAT | O_TRUNC     FILE_OVERWRITE_IF
201  *      O_TRUNC               FILE_OVERWRITE
202  *      none of the above     FILE_OPEN
203  *
204  *      Note that there is not a direct match between disposition
205  *      FILE_SUPERSEDE (ie create whether or not file exists although
206  *      O_CREAT | O_TRUNC is similar but truncates the existing
207  *      file rather than creating a new file as FILE_SUPERSEDE does
208  *      (which uses the attributes / metadata passed in on open call)
209  *?
210  *?  O_SYNC is a reasonable match to CIFS writethrough flag
211  *?  and the read write flags match reasonably.  O_LARGEFILE
212  *?  is irrelevant because largefile support is always used
213  *?  by this client. Flags O_APPEND, O_DIRECT, O_DIRECTORY,
214  *       O_FASYNC, O_NOFOLLOW, O_NONBLOCK need further investigation
215  *********************************************************************/
216
217         disposition = cifs_get_disposition(f_flags);
218
219         /* BB pass O_SYNC flag through on file attributes .. BB */
220
221         buf = kmalloc(sizeof(FILE_ALL_INFO), GFP_KERNEL);
222         if (!buf)
223                 return -ENOMEM;
224
225         if (backup_cred(cifs_sb))
226                 create_options |= CREATE_OPEN_BACKUP_INTENT;
227
228         oparms.tcon = tcon;
229         oparms.cifs_sb = cifs_sb;
230         oparms.desired_access = desired_access;
231         oparms.create_options = create_options;
232         oparms.disposition = disposition;
233         oparms.path = full_path;
234         oparms.fid = fid;
235         oparms.reconnect = false;
236
237         rc = server->ops->open(xid, &oparms, oplock, buf);
238
239         if (rc)
240                 goto out;
241
242         if (tcon->unix_ext)
243                 rc = cifs_get_inode_info_unix(&inode, full_path, inode->i_sb,
244                                               xid);
245         else
246                 rc = cifs_get_inode_info(&inode, full_path, buf, inode->i_sb,
247                                          xid, &fid->netfid);
248
249 out:
250         kfree(buf);
251         return rc;
252 }
253
254 static bool
255 cifs_has_mand_locks(struct cifsInodeInfo *cinode)
256 {
257         struct cifs_fid_locks *cur;
258         bool has_locks = false;
259
260         down_read(&cinode->lock_sem);
261         list_for_each_entry(cur, &cinode->llist, llist) {
262                 if (!list_empty(&cur->locks)) {
263                         has_locks = true;
264                         break;
265                 }
266         }
267         up_read(&cinode->lock_sem);
268         return has_locks;
269 }
270
271 struct cifsFileInfo *
272 cifs_new_fileinfo(struct cifs_fid *fid, struct file *file,
273                   struct tcon_link *tlink, __u32 oplock)
274 {
275         struct dentry *dentry = file->f_path.dentry;
276         struct inode *inode = dentry->d_inode;
277         struct cifsInodeInfo *cinode = CIFS_I(inode);
278         struct cifsFileInfo *cfile;
279         struct cifs_fid_locks *fdlocks;
280         struct cifs_tcon *tcon = tlink_tcon(tlink);
281         struct TCP_Server_Info *server = tcon->ses->server;
282
283         cfile = kzalloc(sizeof(struct cifsFileInfo), GFP_KERNEL);
284         if (cfile == NULL)
285                 return cfile;
286
287         fdlocks = kzalloc(sizeof(struct cifs_fid_locks), GFP_KERNEL);
288         if (!fdlocks) {
289                 kfree(cfile);
290                 return NULL;
291         }
292
293         INIT_LIST_HEAD(&fdlocks->locks);
294         fdlocks->cfile = cfile;
295         cfile->llist = fdlocks;
296         down_write(&cinode->lock_sem);
297         list_add(&fdlocks->llist, &cinode->llist);
298         up_write(&cinode->lock_sem);
299
300         cfile->count = 1;
301         cfile->pid = current->tgid;
302         cfile->uid = current_fsuid();
303         cfile->dentry = dget(dentry);
304         cfile->f_flags = file->f_flags;
305         cfile->invalidHandle = false;
306         cfile->tlink = cifs_get_tlink(tlink);
307         INIT_WORK(&cfile->oplock_break, cifs_oplock_break);
308         mutex_init(&cfile->fh_mutex);
309
310         cifs_sb_active(inode->i_sb);
311
312         /*
313          * If the server returned a read oplock and we have mandatory brlocks,
314          * set oplock level to None.
315          */
316         if (oplock == server->vals->oplock_read &&
317                                                 cifs_has_mand_locks(cinode)) {
318                 cifs_dbg(FYI, "Reset oplock val from read to None due to mand locks\n");
319                 oplock = 0;
320         }
321
322         spin_lock(&cifs_file_list_lock);
323         if (fid->pending_open->oplock != CIFS_OPLOCK_NO_CHANGE && oplock)
324                 oplock = fid->pending_open->oplock;
325         list_del(&fid->pending_open->olist);
326
327         server->ops->set_fid(cfile, fid, oplock);
328
329         list_add(&cfile->tlist, &tcon->openFileList);
330         /* if readable file instance put first in list*/
331         if (file->f_mode & FMODE_READ)
332                 list_add(&cfile->flist, &cinode->openFileList);
333         else
334                 list_add_tail(&cfile->flist, &cinode->openFileList);
335         spin_unlock(&cifs_file_list_lock);
336
337         file->private_data = cfile;
338         return cfile;
339 }
340
341 struct cifsFileInfo *
342 cifsFileInfo_get(struct cifsFileInfo *cifs_file)
343 {
344         spin_lock(&cifs_file_list_lock);
345         cifsFileInfo_get_locked(cifs_file);
346         spin_unlock(&cifs_file_list_lock);
347         return cifs_file;
348 }
349
350 /*
351  * Release a reference on the file private data. This may involve closing
352  * the filehandle out on the server. Must be called without holding
353  * cifs_file_list_lock.
354  */
355 void cifsFileInfo_put(struct cifsFileInfo *cifs_file)
356 {
357         struct inode *inode = cifs_file->dentry->d_inode;
358         struct cifs_tcon *tcon = tlink_tcon(cifs_file->tlink);
359         struct TCP_Server_Info *server = tcon->ses->server;
360         struct cifsInodeInfo *cifsi = CIFS_I(inode);
361         struct super_block *sb = inode->i_sb;
362         struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
363         struct cifsLockInfo *li, *tmp;
364         struct cifs_fid fid;
365         struct cifs_pending_open open;
366
367         spin_lock(&cifs_file_list_lock);
368         if (--cifs_file->count > 0) {
369                 spin_unlock(&cifs_file_list_lock);
370                 return;
371         }
372
373         if (server->ops->get_lease_key)
374                 server->ops->get_lease_key(inode, &fid);
375
376         /* store open in pending opens to make sure we don't miss lease break */
377         cifs_add_pending_open_locked(&fid, cifs_file->tlink, &open);
378
379         /* remove it from the lists */
380         list_del(&cifs_file->flist);
381         list_del(&cifs_file->tlist);
382
383         if (list_empty(&cifsi->openFileList)) {
384                 cifs_dbg(FYI, "closing last open instance for inode %p\n",
385                          cifs_file->dentry->d_inode);
386                 /*
387                  * In strict cache mode we need invalidate mapping on the last
388                  * close  because it may cause a error when we open this file
389                  * again and get at least level II oplock.
390                  */
391                 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO)
392                         CIFS_I(inode)->invalid_mapping = true;
393                 cifs_set_oplock_level(cifsi, 0);
394         }
395         spin_unlock(&cifs_file_list_lock);
396
397         cancel_work_sync(&cifs_file->oplock_break);
398
399         if (!tcon->need_reconnect && !cifs_file->invalidHandle) {
400                 struct TCP_Server_Info *server = tcon->ses->server;
401                 unsigned int xid;
402
403                 xid = get_xid();
404                 if (server->ops->close)
405                         server->ops->close(xid, tcon, &cifs_file->fid);
406                 _free_xid(xid);
407         }
408
409         cifs_del_pending_open(&open);
410
411         /*
412          * Delete any outstanding lock records. We'll lose them when the file
413          * is closed anyway.
414          */
415         down_write(&cifsi->lock_sem);
416         list_for_each_entry_safe(li, tmp, &cifs_file->llist->locks, llist) {
417                 list_del(&li->llist);
418                 cifs_del_lock_waiters(li);
419                 kfree(li);
420         }
421         list_del(&cifs_file->llist->llist);
422         kfree(cifs_file->llist);
423         up_write(&cifsi->lock_sem);
424
425         cifs_put_tlink(cifs_file->tlink);
426         dput(cifs_file->dentry);
427         cifs_sb_deactive(sb);
428         kfree(cifs_file);
429 }
430
431 int cifs_open(struct inode *inode, struct file *file)
432
433 {
434         int rc = -EACCES;
435         unsigned int xid;
436         __u32 oplock;
437         struct cifs_sb_info *cifs_sb;
438         struct TCP_Server_Info *server;
439         struct cifs_tcon *tcon;
440         struct tcon_link *tlink;
441         struct cifsFileInfo *cfile = NULL;
442         char *full_path = NULL;
443         bool posix_open_ok = false;
444         struct cifs_fid fid;
445         struct cifs_pending_open open;
446
447         xid = get_xid();
448
449         cifs_sb = CIFS_SB(inode->i_sb);
450         tlink = cifs_sb_tlink(cifs_sb);
451         if (IS_ERR(tlink)) {
452                 free_xid(xid);
453                 return PTR_ERR(tlink);
454         }
455         tcon = tlink_tcon(tlink);
456         server = tcon->ses->server;
457
458         full_path = build_path_from_dentry(file->f_path.dentry);
459         if (full_path == NULL) {
460                 rc = -ENOMEM;
461                 goto out;
462         }
463
464         cifs_dbg(FYI, "inode = 0x%p file flags are 0x%x for %s\n",
465                  inode, file->f_flags, full_path);
466
467         if (server->oplocks)
468                 oplock = REQ_OPLOCK;
469         else
470                 oplock = 0;
471
472         if (!tcon->broken_posix_open && tcon->unix_ext &&
473             cap_unix(tcon->ses) && (CIFS_UNIX_POSIX_PATH_OPS_CAP &
474                                 le64_to_cpu(tcon->fsUnixInfo.Capability))) {
475                 /* can not refresh inode info since size could be stale */
476                 rc = cifs_posix_open(full_path, &inode, inode->i_sb,
477                                 cifs_sb->mnt_file_mode /* ignored */,
478                                 file->f_flags, &oplock, &fid.netfid, xid);
479                 if (rc == 0) {
480                         cifs_dbg(FYI, "posix open succeeded\n");
481                         posix_open_ok = true;
482                 } else if ((rc == -EINVAL) || (rc == -EOPNOTSUPP)) {
483                         if (tcon->ses->serverNOS)
484                                 cifs_dbg(VFS, "server %s of type %s returned unexpected error on SMB posix open, disabling posix open support. Check if server update available.\n",
485                                          tcon->ses->serverName,
486                                          tcon->ses->serverNOS);
487                         tcon->broken_posix_open = true;
488                 } else if ((rc != -EIO) && (rc != -EREMOTE) &&
489                          (rc != -EOPNOTSUPP)) /* path not found or net err */
490                         goto out;
491                 /*
492                  * Else fallthrough to retry open the old way on network i/o
493                  * or DFS errors.
494                  */
495         }
496
497         if (server->ops->get_lease_key)
498                 server->ops->get_lease_key(inode, &fid);
499
500         cifs_add_pending_open(&fid, tlink, &open);
501
502         if (!posix_open_ok) {
503                 if (server->ops->get_lease_key)
504                         server->ops->get_lease_key(inode, &fid);
505
506                 rc = cifs_nt_open(full_path, inode, cifs_sb, tcon,
507                                   file->f_flags, &oplock, &fid, xid);
508                 if (rc) {
509                         cifs_del_pending_open(&open);
510                         goto out;
511                 }
512         }
513
514         cfile = cifs_new_fileinfo(&fid, file, tlink, oplock);
515         if (cfile == NULL) {
516                 if (server->ops->close)
517                         server->ops->close(xid, tcon, &fid);
518                 cifs_del_pending_open(&open);
519                 rc = -ENOMEM;
520                 goto out;
521         }
522
523         cifs_fscache_set_inode_cookie(inode, file);
524
525         if ((oplock & CIFS_CREATE_ACTION) && !posix_open_ok && tcon->unix_ext) {
526                 /*
527                  * Time to set mode which we can not set earlier due to
528                  * problems creating new read-only files.
529                  */
530                 struct cifs_unix_set_info_args args = {
531                         .mode   = inode->i_mode,
532                         .uid    = INVALID_UID, /* no change */
533                         .gid    = INVALID_GID, /* no change */
534                         .ctime  = NO_CHANGE_64,
535                         .atime  = NO_CHANGE_64,
536                         .mtime  = NO_CHANGE_64,
537                         .device = 0,
538                 };
539                 CIFSSMBUnixSetFileInfo(xid, tcon, &args, fid.netfid,
540                                        cfile->pid);
541         }
542
543 out:
544         kfree(full_path);
545         free_xid(xid);
546         cifs_put_tlink(tlink);
547         return rc;
548 }
549
550 static int cifs_push_posix_locks(struct cifsFileInfo *cfile);
551
552 /*
553  * Try to reacquire byte range locks that were released when session
554  * to server was lost.
555  */
556 static int
557 cifs_relock_file(struct cifsFileInfo *cfile)
558 {
559         struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
560         struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode);
561         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
562         int rc = 0;
563
564         /* we are going to update can_cache_brlcks here - need a write access */
565         down_write(&cinode->lock_sem);
566         if (cinode->can_cache_brlcks) {
567                 /* can cache locks - no need to push them */
568                 up_write(&cinode->lock_sem);
569                 return rc;
570         }
571
572         if (cap_unix(tcon->ses) &&
573             (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
574             ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
575                 rc = cifs_push_posix_locks(cfile);
576         else
577                 rc = tcon->ses->server->ops->push_mand_locks(cfile);
578
579         up_write(&cinode->lock_sem);
580         return rc;
581 }
582
583 static int
584 cifs_reopen_file(struct cifsFileInfo *cfile, bool can_flush)
585 {
586         int rc = -EACCES;
587         unsigned int xid;
588         __u32 oplock;
589         struct cifs_sb_info *cifs_sb;
590         struct cifs_tcon *tcon;
591         struct TCP_Server_Info *server;
592         struct cifsInodeInfo *cinode;
593         struct inode *inode;
594         char *full_path = NULL;
595         int desired_access;
596         int disposition = FILE_OPEN;
597         int create_options = CREATE_NOT_DIR;
598         struct cifs_open_parms oparms;
599
600         xid = get_xid();
601         mutex_lock(&cfile->fh_mutex);
602         if (!cfile->invalidHandle) {
603                 mutex_unlock(&cfile->fh_mutex);
604                 rc = 0;
605                 free_xid(xid);
606                 return rc;
607         }
608
609         inode = cfile->dentry->d_inode;
610         cifs_sb = CIFS_SB(inode->i_sb);
611         tcon = tlink_tcon(cfile->tlink);
612         server = tcon->ses->server;
613
614         /*
615          * Can not grab rename sem here because various ops, including those
616          * that already have the rename sem can end up causing writepage to get
617          * called and if the server was down that means we end up here, and we
618          * can never tell if the caller already has the rename_sem.
619          */
620         full_path = build_path_from_dentry(cfile->dentry);
621         if (full_path == NULL) {
622                 rc = -ENOMEM;
623                 mutex_unlock(&cfile->fh_mutex);
624                 free_xid(xid);
625                 return rc;
626         }
627
628         cifs_dbg(FYI, "inode = 0x%p file flags 0x%x for %s\n",
629                  inode, cfile->f_flags, full_path);
630
631         if (tcon->ses->server->oplocks)
632                 oplock = REQ_OPLOCK;
633         else
634                 oplock = 0;
635
636         if (tcon->unix_ext && cap_unix(tcon->ses) &&
637             (CIFS_UNIX_POSIX_PATH_OPS_CAP &
638                                 le64_to_cpu(tcon->fsUnixInfo.Capability))) {
639                 /*
640                  * O_CREAT, O_EXCL and O_TRUNC already had their effect on the
641                  * original open. Must mask them off for a reopen.
642                  */
643                 unsigned int oflags = cfile->f_flags &
644                                                 ~(O_CREAT | O_EXCL | O_TRUNC);
645
646                 rc = cifs_posix_open(full_path, NULL, inode->i_sb,
647                                      cifs_sb->mnt_file_mode /* ignored */,
648                                      oflags, &oplock, &cfile->fid.netfid, xid);
649                 if (rc == 0) {
650                         cifs_dbg(FYI, "posix reopen succeeded\n");
651                         goto reopen_success;
652                 }
653                 /*
654                  * fallthrough to retry open the old way on errors, especially
655                  * in the reconnect path it is important to retry hard
656                  */
657         }
658
659         desired_access = cifs_convert_flags(cfile->f_flags);
660
661         if (backup_cred(cifs_sb))
662                 create_options |= CREATE_OPEN_BACKUP_INTENT;
663
664         if (server->ops->get_lease_key)
665                 server->ops->get_lease_key(inode, &cfile->fid);
666
667         oparms.tcon = tcon;
668         oparms.cifs_sb = cifs_sb;
669         oparms.desired_access = desired_access;
670         oparms.create_options = create_options;
671         oparms.disposition = disposition;
672         oparms.path = full_path;
673         oparms.fid = &cfile->fid;
674         oparms.reconnect = true;
675
676         /*
677          * Can not refresh inode by passing in file_info buf to be returned by
678          * CIFSSMBOpen and then calling get_inode_info with returned buf since
679          * file might have write behind data that needs to be flushed and server
680          * version of file size can be stale. If we knew for sure that inode was
681          * not dirty locally we could do this.
682          */
683         rc = server->ops->open(xid, &oparms, &oplock, NULL);
684         if (rc) {
685                 mutex_unlock(&cfile->fh_mutex);
686                 cifs_dbg(FYI, "cifs_reopen returned 0x%x\n", rc);
687                 cifs_dbg(FYI, "oplock: %d\n", oplock);
688                 goto reopen_error_exit;
689         }
690
691 reopen_success:
692         cfile->invalidHandle = false;
693         mutex_unlock(&cfile->fh_mutex);
694         cinode = CIFS_I(inode);
695
696         if (can_flush) {
697                 rc = filemap_write_and_wait(inode->i_mapping);
698                 mapping_set_error(inode->i_mapping, rc);
699
700                 if (tcon->unix_ext)
701                         rc = cifs_get_inode_info_unix(&inode, full_path,
702                                                       inode->i_sb, xid);
703                 else
704                         rc = cifs_get_inode_info(&inode, full_path, NULL,
705                                                  inode->i_sb, xid, NULL);
706         }
707         /*
708          * Else we are writing out data to server already and could deadlock if
709          * we tried to flush data, and since we do not know if we have data that
710          * would invalidate the current end of file on the server we can not go
711          * to the server to get the new inode info.
712          */
713
714         server->ops->set_fid(cfile, &cfile->fid, oplock);
715         if (oparms.reconnect)
716                 cifs_relock_file(cfile);
717
718 reopen_error_exit:
719         kfree(full_path);
720         free_xid(xid);
721         return rc;
722 }
723
724 int cifs_close(struct inode *inode, struct file *file)
725 {
726         if (file->private_data != NULL) {
727                 cifsFileInfo_put(file->private_data);
728                 file->private_data = NULL;
729         }
730
731         /* return code from the ->release op is always ignored */
732         return 0;
733 }
734
735 int cifs_closedir(struct inode *inode, struct file *file)
736 {
737         int rc = 0;
738         unsigned int xid;
739         struct cifsFileInfo *cfile = file->private_data;
740         struct cifs_tcon *tcon;
741         struct TCP_Server_Info *server;
742         char *buf;
743
744         cifs_dbg(FYI, "Closedir inode = 0x%p\n", inode);
745
746         if (cfile == NULL)
747                 return rc;
748
749         xid = get_xid();
750         tcon = tlink_tcon(cfile->tlink);
751         server = tcon->ses->server;
752
753         cifs_dbg(FYI, "Freeing private data in close dir\n");
754         spin_lock(&cifs_file_list_lock);
755         if (!cfile->srch_inf.endOfSearch && !cfile->invalidHandle) {
756                 cfile->invalidHandle = true;
757                 spin_unlock(&cifs_file_list_lock);
758                 if (server->ops->close_dir)
759                         rc = server->ops->close_dir(xid, tcon, &cfile->fid);
760                 else
761                         rc = -ENOSYS;
762                 cifs_dbg(FYI, "Closing uncompleted readdir with rc %d\n", rc);
763                 /* not much we can do if it fails anyway, ignore rc */
764                 rc = 0;
765         } else
766                 spin_unlock(&cifs_file_list_lock);
767
768         buf = cfile->srch_inf.ntwrk_buf_start;
769         if (buf) {
770                 cifs_dbg(FYI, "closedir free smb buf in srch struct\n");
771                 cfile->srch_inf.ntwrk_buf_start = NULL;
772                 if (cfile->srch_inf.smallBuf)
773                         cifs_small_buf_release(buf);
774                 else
775                         cifs_buf_release(buf);
776         }
777
778         cifs_put_tlink(cfile->tlink);
779         kfree(file->private_data);
780         file->private_data = NULL;
781         /* BB can we lock the filestruct while this is going on? */
782         free_xid(xid);
783         return rc;
784 }
785
786 static struct cifsLockInfo *
787 cifs_lock_init(__u64 offset, __u64 length, __u8 type)
788 {
789         struct cifsLockInfo *lock =
790                 kmalloc(sizeof(struct cifsLockInfo), GFP_KERNEL);
791         if (!lock)
792                 return lock;
793         lock->offset = offset;
794         lock->length = length;
795         lock->type = type;
796         lock->pid = current->tgid;
797         INIT_LIST_HEAD(&lock->blist);
798         init_waitqueue_head(&lock->block_q);
799         return lock;
800 }
801
802 void
803 cifs_del_lock_waiters(struct cifsLockInfo *lock)
804 {
805         struct cifsLockInfo *li, *tmp;
806         list_for_each_entry_safe(li, tmp, &lock->blist, blist) {
807                 list_del_init(&li->blist);
808                 wake_up(&li->block_q);
809         }
810 }
811
812 #define CIFS_LOCK_OP    0
813 #define CIFS_READ_OP    1
814 #define CIFS_WRITE_OP   2
815
816 /* @rw_check : 0 - no op, 1 - read, 2 - write */
817 static bool
818 cifs_find_fid_lock_conflict(struct cifs_fid_locks *fdlocks, __u64 offset,
819                             __u64 length, __u8 type, struct cifsFileInfo *cfile,
820                             struct cifsLockInfo **conf_lock, int rw_check)
821 {
822         struct cifsLockInfo *li;
823         struct cifsFileInfo *cur_cfile = fdlocks->cfile;
824         struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
825
826         list_for_each_entry(li, &fdlocks->locks, llist) {
827                 if (offset + length <= li->offset ||
828                     offset >= li->offset + li->length)
829                         continue;
830                 if (rw_check != CIFS_LOCK_OP && current->tgid == li->pid &&
831                     server->ops->compare_fids(cfile, cur_cfile)) {
832                         /* shared lock prevents write op through the same fid */
833                         if (!(li->type & server->vals->shared_lock_type) ||
834                             rw_check != CIFS_WRITE_OP)
835                                 continue;
836                 }
837                 if ((type & server->vals->shared_lock_type) &&
838                     ((server->ops->compare_fids(cfile, cur_cfile) &&
839                      current->tgid == li->pid) || type == li->type))
840                         continue;
841                 if (conf_lock)
842                         *conf_lock = li;
843                 return true;
844         }
845         return false;
846 }
847
848 bool
849 cifs_find_lock_conflict(struct cifsFileInfo *cfile, __u64 offset, __u64 length,
850                         __u8 type, struct cifsLockInfo **conf_lock,
851                         int rw_check)
852 {
853         bool rc = false;
854         struct cifs_fid_locks *cur;
855         struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode);
856
857         list_for_each_entry(cur, &cinode->llist, llist) {
858                 rc = cifs_find_fid_lock_conflict(cur, offset, length, type,
859                                                  cfile, conf_lock, rw_check);
860                 if (rc)
861                         break;
862         }
863
864         return rc;
865 }
866
867 /*
868  * Check if there is another lock that prevents us to set the lock (mandatory
869  * style). If such a lock exists, update the flock structure with its
870  * properties. Otherwise, set the flock type to F_UNLCK if we can cache brlocks
871  * or leave it the same if we can't. Returns 0 if we don't need to request to
872  * the server or 1 otherwise.
873  */
874 static int
875 cifs_lock_test(struct cifsFileInfo *cfile, __u64 offset, __u64 length,
876                __u8 type, struct file_lock *flock)
877 {
878         int rc = 0;
879         struct cifsLockInfo *conf_lock;
880         struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode);
881         struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
882         bool exist;
883
884         down_read(&cinode->lock_sem);
885
886         exist = cifs_find_lock_conflict(cfile, offset, length, type,
887                                         &conf_lock, CIFS_LOCK_OP);
888         if (exist) {
889                 flock->fl_start = conf_lock->offset;
890                 flock->fl_end = conf_lock->offset + conf_lock->length - 1;
891                 flock->fl_pid = conf_lock->pid;
892                 if (conf_lock->type & server->vals->shared_lock_type)
893                         flock->fl_type = F_RDLCK;
894                 else
895                         flock->fl_type = F_WRLCK;
896         } else if (!cinode->can_cache_brlcks)
897                 rc = 1;
898         else
899                 flock->fl_type = F_UNLCK;
900
901         up_read(&cinode->lock_sem);
902         return rc;
903 }
904
905 static void
906 cifs_lock_add(struct cifsFileInfo *cfile, struct cifsLockInfo *lock)
907 {
908         struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode);
909         down_write(&cinode->lock_sem);
910         list_add_tail(&lock->llist, &cfile->llist->locks);
911         up_write(&cinode->lock_sem);
912 }
913
914 /*
915  * Set the byte-range lock (mandatory style). Returns:
916  * 1) 0, if we set the lock and don't need to request to the server;
917  * 2) 1, if no locks prevent us but we need to request to the server;
918  * 3) -EACCESS, if there is a lock that prevents us and wait is false.
919  */
920 static int
921 cifs_lock_add_if(struct cifsFileInfo *cfile, struct cifsLockInfo *lock,
922                  bool wait)
923 {
924         struct cifsLockInfo *conf_lock;
925         struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode);
926         bool exist;
927         int rc = 0;
928
929 try_again:
930         exist = false;
931         down_write(&cinode->lock_sem);
932
933         exist = cifs_find_lock_conflict(cfile, lock->offset, lock->length,
934                                         lock->type, &conf_lock, CIFS_LOCK_OP);
935         if (!exist && cinode->can_cache_brlcks) {
936                 list_add_tail(&lock->llist, &cfile->llist->locks);
937                 up_write(&cinode->lock_sem);
938                 return rc;
939         }
940
941         if (!exist)
942                 rc = 1;
943         else if (!wait)
944                 rc = -EACCES;
945         else {
946                 list_add_tail(&lock->blist, &conf_lock->blist);
947                 up_write(&cinode->lock_sem);
948                 rc = wait_event_interruptible(lock->block_q,
949                                         (lock->blist.prev == &lock->blist) &&
950                                         (lock->blist.next == &lock->blist));
951                 if (!rc)
952                         goto try_again;
953                 down_write(&cinode->lock_sem);
954                 list_del_init(&lock->blist);
955         }
956
957         up_write(&cinode->lock_sem);
958         return rc;
959 }
960
961 /*
962  * Check if there is another lock that prevents us to set the lock (posix
963  * style). If such a lock exists, update the flock structure with its
964  * properties. Otherwise, set the flock type to F_UNLCK if we can cache brlocks
965  * or leave it the same if we can't. Returns 0 if we don't need to request to
966  * the server or 1 otherwise.
967  */
968 static int
969 cifs_posix_lock_test(struct file *file, struct file_lock *flock)
970 {
971         int rc = 0;
972         struct cifsInodeInfo *cinode = CIFS_I(file_inode(file));
973         unsigned char saved_type = flock->fl_type;
974
975         if ((flock->fl_flags & FL_POSIX) == 0)
976                 return 1;
977
978         down_read(&cinode->lock_sem);
979         posix_test_lock(file, flock);
980
981         if (flock->fl_type == F_UNLCK && !cinode->can_cache_brlcks) {
982                 flock->fl_type = saved_type;
983                 rc = 1;
984         }
985
986         up_read(&cinode->lock_sem);
987         return rc;
988 }
989
990 /*
991  * Set the byte-range lock (posix style). Returns:
992  * 1) 0, if we set the lock and don't need to request to the server;
993  * 2) 1, if we need to request to the server;
994  * 3) <0, if the error occurs while setting the lock.
995  */
996 static int
997 cifs_posix_lock_set(struct file *file, struct file_lock *flock)
998 {
999         struct cifsInodeInfo *cinode = CIFS_I(file_inode(file));
1000         int rc = 1;
1001
1002         if ((flock->fl_flags & FL_POSIX) == 0)
1003                 return rc;
1004
1005 try_again:
1006         down_write(&cinode->lock_sem);
1007         if (!cinode->can_cache_brlcks) {
1008                 up_write(&cinode->lock_sem);
1009                 return rc;
1010         }
1011
1012         rc = posix_lock_file(file, flock, NULL);
1013         up_write(&cinode->lock_sem);
1014         if (rc == FILE_LOCK_DEFERRED) {
1015                 rc = wait_event_interruptible(flock->fl_wait, !flock->fl_next);
1016                 if (!rc)
1017                         goto try_again;
1018                 posix_unblock_lock(flock);
1019         }
1020         return rc;
1021 }
1022
1023 int
1024 cifs_push_mandatory_locks(struct cifsFileInfo *cfile)
1025 {
1026         unsigned int xid;
1027         int rc = 0, stored_rc;
1028         struct cifsLockInfo *li, *tmp;
1029         struct cifs_tcon *tcon;
1030         unsigned int num, max_num, max_buf;
1031         LOCKING_ANDX_RANGE *buf, *cur;
1032         int types[] = {LOCKING_ANDX_LARGE_FILES,
1033                        LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES};
1034         int i;
1035
1036         xid = get_xid();
1037         tcon = tlink_tcon(cfile->tlink);
1038
1039         /*
1040          * Accessing maxBuf is racy with cifs_reconnect - need to store value
1041          * and check it for zero before using.
1042          */
1043         max_buf = tcon->ses->server->maxBuf;
1044         if (!max_buf) {
1045                 free_xid(xid);
1046                 return -EINVAL;
1047         }
1048
1049         max_num = (max_buf - sizeof(struct smb_hdr)) /
1050                                                 sizeof(LOCKING_ANDX_RANGE);
1051         buf = kzalloc(max_num * sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
1052         if (!buf) {
1053                 free_xid(xid);
1054                 return -ENOMEM;
1055         }
1056
1057         for (i = 0; i < 2; i++) {
1058                 cur = buf;
1059                 num = 0;
1060                 list_for_each_entry_safe(li, tmp, &cfile->llist->locks, llist) {
1061                         if (li->type != types[i])
1062                                 continue;
1063                         cur->Pid = cpu_to_le16(li->pid);
1064                         cur->LengthLow = cpu_to_le32((u32)li->length);
1065                         cur->LengthHigh = cpu_to_le32((u32)(li->length>>32));
1066                         cur->OffsetLow = cpu_to_le32((u32)li->offset);
1067                         cur->OffsetHigh = cpu_to_le32((u32)(li->offset>>32));
1068                         if (++num == max_num) {
1069                                 stored_rc = cifs_lockv(xid, tcon,
1070                                                        cfile->fid.netfid,
1071                                                        (__u8)li->type, 0, num,
1072                                                        buf);
1073                                 if (stored_rc)
1074                                         rc = stored_rc;
1075                                 cur = buf;
1076                                 num = 0;
1077                         } else
1078                                 cur++;
1079                 }
1080
1081                 if (num) {
1082                         stored_rc = cifs_lockv(xid, tcon, cfile->fid.netfid,
1083                                                (__u8)types[i], 0, num, buf);
1084                         if (stored_rc)
1085                                 rc = stored_rc;
1086                 }
1087         }
1088
1089         kfree(buf);
1090         free_xid(xid);
1091         return rc;
1092 }
1093
1094 /* copied from fs/locks.c with a name change */
1095 #define cifs_for_each_lock(inode, lockp) \
1096         for (lockp = &inode->i_flock; *lockp != NULL; \
1097              lockp = &(*lockp)->fl_next)
1098
1099 struct lock_to_push {
1100         struct list_head llist;
1101         __u64 offset;
1102         __u64 length;
1103         __u32 pid;
1104         __u16 netfid;
1105         __u8 type;
1106 };
1107
1108 static int
1109 cifs_push_posix_locks(struct cifsFileInfo *cfile)
1110 {
1111         struct inode *inode = cfile->dentry->d_inode;
1112         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1113         struct file_lock *flock, **before;
1114         unsigned int count = 0, i = 0;
1115         int rc = 0, xid, type;
1116         struct list_head locks_to_send, *el;
1117         struct lock_to_push *lck, *tmp;
1118         __u64 length;
1119
1120         xid = get_xid();
1121
1122         spin_lock(&inode->i_lock);
1123         cifs_for_each_lock(inode, before) {
1124                 if ((*before)->fl_flags & FL_POSIX)
1125                         count++;
1126         }
1127         spin_unlock(&inode->i_lock);
1128
1129         INIT_LIST_HEAD(&locks_to_send);
1130
1131         /*
1132          * Allocating count locks is enough because no FL_POSIX locks can be
1133          * added to the list while we are holding cinode->lock_sem that
1134          * protects locking operations of this inode.
1135          */
1136         for (; i < count; i++) {
1137                 lck = kmalloc(sizeof(struct lock_to_push), GFP_KERNEL);
1138                 if (!lck) {
1139                         rc = -ENOMEM;
1140                         goto err_out;
1141                 }
1142                 list_add_tail(&lck->llist, &locks_to_send);
1143         }
1144
1145         el = locks_to_send.next;
1146         spin_lock(&inode->i_lock);
1147         cifs_for_each_lock(inode, before) {
1148                 flock = *before;
1149                 if ((flock->fl_flags & FL_POSIX) == 0)
1150                         continue;
1151                 if (el == &locks_to_send) {
1152                         /*
1153                          * The list ended. We don't have enough allocated
1154                          * structures - something is really wrong.
1155                          */
1156                         cifs_dbg(VFS, "Can't push all brlocks!\n");
1157                         break;
1158                 }
1159                 length = 1 + flock->fl_end - flock->fl_start;
1160                 if (flock->fl_type == F_RDLCK || flock->fl_type == F_SHLCK)
1161                         type = CIFS_RDLCK;
1162                 else
1163                         type = CIFS_WRLCK;
1164                 lck = list_entry(el, struct lock_to_push, llist);
1165                 lck->pid = flock->fl_pid;
1166                 lck->netfid = cfile->fid.netfid;
1167                 lck->length = length;
1168                 lck->type = type;
1169                 lck->offset = flock->fl_start;
1170                 el = el->next;
1171         }
1172         spin_unlock(&inode->i_lock);
1173
1174         list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) {
1175                 int stored_rc;
1176
1177                 stored_rc = CIFSSMBPosixLock(xid, tcon, lck->netfid, lck->pid,
1178                                              lck->offset, lck->length, NULL,
1179                                              lck->type, 0);
1180                 if (stored_rc)
1181                         rc = stored_rc;
1182                 list_del(&lck->llist);
1183                 kfree(lck);
1184         }
1185
1186 out:
1187         free_xid(xid);
1188         return rc;
1189 err_out:
1190         list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) {
1191                 list_del(&lck->llist);
1192                 kfree(lck);
1193         }
1194         goto out;
1195 }
1196
1197 static int
1198 cifs_push_locks(struct cifsFileInfo *cfile)
1199 {
1200         struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
1201         struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode);
1202         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1203         int rc = 0;
1204
1205         /* we are going to update can_cache_brlcks here - need a write access */
1206         down_write(&cinode->lock_sem);
1207         if (!cinode->can_cache_brlcks) {
1208                 up_write(&cinode->lock_sem);
1209                 return rc;
1210         }
1211
1212         if (cap_unix(tcon->ses) &&
1213             (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
1214             ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
1215                 rc = cifs_push_posix_locks(cfile);
1216         else
1217                 rc = tcon->ses->server->ops->push_mand_locks(cfile);
1218
1219         cinode->can_cache_brlcks = false;
1220         up_write(&cinode->lock_sem);
1221         return rc;
1222 }
1223
1224 static void
1225 cifs_read_flock(struct file_lock *flock, __u32 *type, int *lock, int *unlock,
1226                 bool *wait_flag, struct TCP_Server_Info *server)
1227 {
1228         if (flock->fl_flags & FL_POSIX)
1229                 cifs_dbg(FYI, "Posix\n");
1230         if (flock->fl_flags & FL_FLOCK)
1231                 cifs_dbg(FYI, "Flock\n");
1232         if (flock->fl_flags & FL_SLEEP) {
1233                 cifs_dbg(FYI, "Blocking lock\n");
1234                 *wait_flag = true;
1235         }
1236         if (flock->fl_flags & FL_ACCESS)
1237                 cifs_dbg(FYI, "Process suspended by mandatory locking - not implemented yet\n");
1238         if (flock->fl_flags & FL_LEASE)
1239                 cifs_dbg(FYI, "Lease on file - not implemented yet\n");
1240         if (flock->fl_flags &
1241             (~(FL_POSIX | FL_FLOCK | FL_SLEEP |
1242                FL_ACCESS | FL_LEASE | FL_CLOSE)))
1243                 cifs_dbg(FYI, "Unknown lock flags 0x%x\n", flock->fl_flags);
1244
1245         *type = server->vals->large_lock_type;
1246         if (flock->fl_type == F_WRLCK) {
1247                 cifs_dbg(FYI, "F_WRLCK\n");
1248                 *type |= server->vals->exclusive_lock_type;
1249                 *lock = 1;
1250         } else if (flock->fl_type == F_UNLCK) {
1251                 cifs_dbg(FYI, "F_UNLCK\n");
1252                 *type |= server->vals->unlock_lock_type;
1253                 *unlock = 1;
1254                 /* Check if unlock includes more than one lock range */
1255         } else if (flock->fl_type == F_RDLCK) {
1256                 cifs_dbg(FYI, "F_RDLCK\n");
1257                 *type |= server->vals->shared_lock_type;
1258                 *lock = 1;
1259         } else if (flock->fl_type == F_EXLCK) {
1260                 cifs_dbg(FYI, "F_EXLCK\n");
1261                 *type |= server->vals->exclusive_lock_type;
1262                 *lock = 1;
1263         } else if (flock->fl_type == F_SHLCK) {
1264                 cifs_dbg(FYI, "F_SHLCK\n");
1265                 *type |= server->vals->shared_lock_type;
1266                 *lock = 1;
1267         } else
1268                 cifs_dbg(FYI, "Unknown type of lock\n");
1269 }
1270
1271 static int
1272 cifs_getlk(struct file *file, struct file_lock *flock, __u32 type,
1273            bool wait_flag, bool posix_lck, unsigned int xid)
1274 {
1275         int rc = 0;
1276         __u64 length = 1 + flock->fl_end - flock->fl_start;
1277         struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
1278         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1279         struct TCP_Server_Info *server = tcon->ses->server;
1280         __u16 netfid = cfile->fid.netfid;
1281
1282         if (posix_lck) {
1283                 int posix_lock_type;
1284
1285                 rc = cifs_posix_lock_test(file, flock);
1286                 if (!rc)
1287                         return rc;
1288
1289                 if (type & server->vals->shared_lock_type)
1290                         posix_lock_type = CIFS_RDLCK;
1291                 else
1292                         posix_lock_type = CIFS_WRLCK;
1293                 rc = CIFSSMBPosixLock(xid, tcon, netfid, current->tgid,
1294                                       flock->fl_start, length, flock,
1295                                       posix_lock_type, wait_flag);
1296                 return rc;
1297         }
1298
1299         rc = cifs_lock_test(cfile, flock->fl_start, length, type, flock);
1300         if (!rc)
1301                 return rc;
1302
1303         /* BB we could chain these into one lock request BB */
1304         rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length, type,
1305                                     1, 0, false);
1306         if (rc == 0) {
1307                 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1308                                             type, 0, 1, false);
1309                 flock->fl_type = F_UNLCK;
1310                 if (rc != 0)
1311                         cifs_dbg(VFS, "Error unlocking previously locked range %d during test of lock\n",
1312                                  rc);
1313                 return 0;
1314         }
1315
1316         if (type & server->vals->shared_lock_type) {
1317                 flock->fl_type = F_WRLCK;
1318                 return 0;
1319         }
1320
1321         type &= ~server->vals->exclusive_lock_type;
1322
1323         rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1324                                     type | server->vals->shared_lock_type,
1325                                     1, 0, false);
1326         if (rc == 0) {
1327                 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1328                         type | server->vals->shared_lock_type, 0, 1, false);
1329                 flock->fl_type = F_RDLCK;
1330                 if (rc != 0)
1331                         cifs_dbg(VFS, "Error unlocking previously locked range %d during test of lock\n",
1332                                  rc);
1333         } else
1334                 flock->fl_type = F_WRLCK;
1335
1336         return 0;
1337 }
1338
1339 void
1340 cifs_move_llist(struct list_head *source, struct list_head *dest)
1341 {
1342         struct list_head *li, *tmp;
1343         list_for_each_safe(li, tmp, source)
1344                 list_move(li, dest);
1345 }
1346
1347 void
1348 cifs_free_llist(struct list_head *llist)
1349 {
1350         struct cifsLockInfo *li, *tmp;
1351         list_for_each_entry_safe(li, tmp, llist, llist) {
1352                 cifs_del_lock_waiters(li);
1353                 list_del(&li->llist);
1354                 kfree(li);
1355         }
1356 }
1357
1358 int
1359 cifs_unlock_range(struct cifsFileInfo *cfile, struct file_lock *flock,
1360                   unsigned int xid)
1361 {
1362         int rc = 0, stored_rc;
1363         int types[] = {LOCKING_ANDX_LARGE_FILES,
1364                        LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES};
1365         unsigned int i;
1366         unsigned int max_num, num, max_buf;
1367         LOCKING_ANDX_RANGE *buf, *cur;
1368         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1369         struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode);
1370         struct cifsLockInfo *li, *tmp;
1371         __u64 length = 1 + flock->fl_end - flock->fl_start;
1372         struct list_head tmp_llist;
1373
1374         INIT_LIST_HEAD(&tmp_llist);
1375
1376         /*
1377          * Accessing maxBuf is racy with cifs_reconnect - need to store value
1378          * and check it for zero before using.
1379          */
1380         max_buf = tcon->ses->server->maxBuf;
1381         if (!max_buf)
1382                 return -EINVAL;
1383
1384         max_num = (max_buf - sizeof(struct smb_hdr)) /
1385                                                 sizeof(LOCKING_ANDX_RANGE);
1386         buf = kzalloc(max_num * sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
1387         if (!buf)
1388                 return -ENOMEM;
1389
1390         down_write(&cinode->lock_sem);
1391         for (i = 0; i < 2; i++) {
1392                 cur = buf;
1393                 num = 0;
1394                 list_for_each_entry_safe(li, tmp, &cfile->llist->locks, llist) {
1395                         if (flock->fl_start > li->offset ||
1396                             (flock->fl_start + length) <
1397                             (li->offset + li->length))
1398                                 continue;
1399                         if (current->tgid != li->pid)
1400                                 continue;
1401                         if (types[i] != li->type)
1402                                 continue;
1403                         if (cinode->can_cache_brlcks) {
1404                                 /*
1405                                  * We can cache brlock requests - simply remove
1406                                  * a lock from the file's list.
1407                                  */
1408                                 list_del(&li->llist);
1409                                 cifs_del_lock_waiters(li);
1410                                 kfree(li);
1411                                 continue;
1412                         }
1413                         cur->Pid = cpu_to_le16(li->pid);
1414                         cur->LengthLow = cpu_to_le32((u32)li->length);
1415                         cur->LengthHigh = cpu_to_le32((u32)(li->length>>32));
1416                         cur->OffsetLow = cpu_to_le32((u32)li->offset);
1417                         cur->OffsetHigh = cpu_to_le32((u32)(li->offset>>32));
1418                         /*
1419                          * We need to save a lock here to let us add it again to
1420                          * the file's list if the unlock range request fails on
1421                          * the server.
1422                          */
1423                         list_move(&li->llist, &tmp_llist);
1424                         if (++num == max_num) {
1425                                 stored_rc = cifs_lockv(xid, tcon,
1426                                                        cfile->fid.netfid,
1427                                                        li->type, num, 0, buf);
1428                                 if (stored_rc) {
1429                                         /*
1430                                          * We failed on the unlock range
1431                                          * request - add all locks from the tmp
1432                                          * list to the head of the file's list.
1433                                          */
1434                                         cifs_move_llist(&tmp_llist,
1435                                                         &cfile->llist->locks);
1436                                         rc = stored_rc;
1437                                 } else
1438                                         /*
1439                                          * The unlock range request succeed -
1440                                          * free the tmp list.
1441                                          */
1442                                         cifs_free_llist(&tmp_llist);
1443                                 cur = buf;
1444                                 num = 0;
1445                         } else
1446                                 cur++;
1447                 }
1448                 if (num) {
1449                         stored_rc = cifs_lockv(xid, tcon, cfile->fid.netfid,
1450                                                types[i], num, 0, buf);
1451                         if (stored_rc) {
1452                                 cifs_move_llist(&tmp_llist,
1453                                                 &cfile->llist->locks);
1454                                 rc = stored_rc;
1455                         } else
1456                                 cifs_free_llist(&tmp_llist);
1457                 }
1458         }
1459
1460         up_write(&cinode->lock_sem);
1461         kfree(buf);
1462         return rc;
1463 }
1464
1465 static int
1466 cifs_setlk(struct file *file, struct file_lock *flock, __u32 type,
1467            bool wait_flag, bool posix_lck, int lock, int unlock,
1468            unsigned int xid)
1469 {
1470         int rc = 0;
1471         __u64 length = 1 + flock->fl_end - flock->fl_start;
1472         struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
1473         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1474         struct TCP_Server_Info *server = tcon->ses->server;
1475         struct inode *inode = cfile->dentry->d_inode;
1476
1477         if (posix_lck) {
1478                 int posix_lock_type;
1479
1480                 rc = cifs_posix_lock_set(file, flock);
1481                 if (!rc || rc < 0)
1482                         return rc;
1483
1484                 if (type & server->vals->shared_lock_type)
1485                         posix_lock_type = CIFS_RDLCK;
1486                 else
1487                         posix_lock_type = CIFS_WRLCK;
1488
1489                 if (unlock == 1)
1490                         posix_lock_type = CIFS_UNLCK;
1491
1492                 rc = CIFSSMBPosixLock(xid, tcon, cfile->fid.netfid,
1493                                       current->tgid, flock->fl_start, length,
1494                                       NULL, posix_lock_type, wait_flag);
1495                 goto out;
1496         }
1497
1498         if (lock) {
1499                 struct cifsLockInfo *lock;
1500
1501                 lock = cifs_lock_init(flock->fl_start, length, type);
1502                 if (!lock)
1503                         return -ENOMEM;
1504
1505                 rc = cifs_lock_add_if(cfile, lock, wait_flag);
1506                 if (rc < 0) {
1507                         kfree(lock);
1508                         return rc;
1509                 }
1510                 if (!rc)
1511                         goto out;
1512
1513
1514                 /*
1515                  * Windows 7 server can delay breaking lease from read to None
1516                  * if we set a byte-range lock on a file - break it explicitly
1517                  * before sending the lock to the server to be sure the next
1518                  * read won't conflict with non-overlapted locks due to
1519                  * pagereading.
1520                  */
1521                 if (!CIFS_I(inode)->clientCanCacheAll &&
1522                                         CIFS_I(inode)->clientCanCacheRead) {
1523                         cifs_invalidate_mapping(inode);
1524                         cifs_dbg(FYI, "Set no oplock for inode=%p due to mand locks\n",
1525                                  inode);
1526                         CIFS_I(inode)->clientCanCacheRead = false;
1527                 }
1528
1529                 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1530                                             type, 1, 0, wait_flag);
1531                 if (rc) {
1532                         kfree(lock);
1533                         return rc;
1534                 }
1535
1536                 cifs_lock_add(cfile, lock);
1537         } else if (unlock)
1538                 rc = server->ops->mand_unlock_range(cfile, flock, xid);
1539
1540 out:
1541         if (flock->fl_flags & FL_POSIX)
1542                 posix_lock_file_wait(file, flock);
1543         return rc;
1544 }
1545
1546 int cifs_lock(struct file *file, int cmd, struct file_lock *flock)
1547 {
1548         int rc, xid;
1549         int lock = 0, unlock = 0;
1550         bool wait_flag = false;
1551         bool posix_lck = false;
1552         struct cifs_sb_info *cifs_sb;
1553         struct cifs_tcon *tcon;
1554         struct cifsInodeInfo *cinode;
1555         struct cifsFileInfo *cfile;
1556         __u16 netfid;
1557         __u32 type;
1558
1559         rc = -EACCES;
1560         xid = get_xid();
1561
1562         cifs_dbg(FYI, "Lock parm: 0x%x flockflags: 0x%x flocktype: 0x%x start: %lld end: %lld\n",
1563                  cmd, flock->fl_flags, flock->fl_type,
1564                  flock->fl_start, flock->fl_end);
1565
1566         cfile = (struct cifsFileInfo *)file->private_data;
1567         tcon = tlink_tcon(cfile->tlink);
1568
1569         cifs_read_flock(flock, &type, &lock, &unlock, &wait_flag,
1570                         tcon->ses->server);
1571
1572         cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
1573         netfid = cfile->fid.netfid;
1574         cinode = CIFS_I(file_inode(file));
1575
1576         if (cap_unix(tcon->ses) &&
1577             (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
1578             ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
1579                 posix_lck = true;
1580         /*
1581          * BB add code here to normalize offset and length to account for
1582          * negative length which we can not accept over the wire.
1583          */
1584         if (IS_GETLK(cmd)) {
1585                 rc = cifs_getlk(file, flock, type, wait_flag, posix_lck, xid);
1586                 free_xid(xid);
1587                 return rc;
1588         }
1589
1590         if (!lock && !unlock) {
1591                 /*
1592                  * if no lock or unlock then nothing to do since we do not
1593                  * know what it is
1594                  */
1595                 free_xid(xid);
1596                 return -EOPNOTSUPP;
1597         }
1598
1599         rc = cifs_setlk(file, flock, type, wait_flag, posix_lck, lock, unlock,
1600                         xid);
1601         free_xid(xid);
1602         return rc;
1603 }
1604
1605 /*
1606  * update the file size (if needed) after a write. Should be called with
1607  * the inode->i_lock held
1608  */
1609 void
1610 cifs_update_eof(struct cifsInodeInfo *cifsi, loff_t offset,
1611                       unsigned int bytes_written)
1612 {
1613         loff_t end_of_write = offset + bytes_written;
1614
1615         if (end_of_write > cifsi->server_eof)
1616                 cifsi->server_eof = end_of_write;
1617 }
1618
1619 static ssize_t
1620 cifs_write(struct cifsFileInfo *open_file, __u32 pid, const char *write_data,
1621            size_t write_size, loff_t *offset)
1622 {
1623         int rc = 0;
1624         unsigned int bytes_written = 0;
1625         unsigned int total_written;
1626         struct cifs_sb_info *cifs_sb;
1627         struct cifs_tcon *tcon;
1628         struct TCP_Server_Info *server;
1629         unsigned int xid;
1630         struct dentry *dentry = open_file->dentry;
1631         struct cifsInodeInfo *cifsi = CIFS_I(dentry->d_inode);
1632         struct cifs_io_parms io_parms;
1633
1634         cifs_sb = CIFS_SB(dentry->d_sb);
1635
1636         cifs_dbg(FYI, "write %zd bytes to offset %lld of %s\n",
1637                  write_size, *offset, dentry->d_name.name);
1638
1639         tcon = tlink_tcon(open_file->tlink);
1640         server = tcon->ses->server;
1641
1642         if (!server->ops->sync_write)
1643                 return -ENOSYS;
1644
1645         xid = get_xid();
1646
1647         for (total_written = 0; write_size > total_written;
1648              total_written += bytes_written) {
1649                 rc = -EAGAIN;
1650                 while (rc == -EAGAIN) {
1651                         struct kvec iov[2];
1652                         unsigned int len;
1653
1654                         if (open_file->invalidHandle) {
1655                                 /* we could deadlock if we called
1656                                    filemap_fdatawait from here so tell
1657                                    reopen_file not to flush data to
1658                                    server now */
1659                                 rc = cifs_reopen_file(open_file, false);
1660                                 if (rc != 0)
1661                                         break;
1662                         }
1663
1664                         len = min((size_t)cifs_sb->wsize,
1665                                   write_size - total_written);
1666                         /* iov[0] is reserved for smb header */
1667                         iov[1].iov_base = (char *)write_data + total_written;
1668                         iov[1].iov_len = len;
1669                         io_parms.pid = pid;
1670                         io_parms.tcon = tcon;
1671                         io_parms.offset = *offset;
1672                         io_parms.length = len;
1673                         rc = server->ops->sync_write(xid, open_file, &io_parms,
1674                                                      &bytes_written, iov, 1);
1675                 }
1676                 if (rc || (bytes_written == 0)) {
1677                         if (total_written)
1678                                 break;
1679                         else {
1680                                 free_xid(xid);
1681                                 return rc;
1682                         }
1683                 } else {
1684                         spin_lock(&dentry->d_inode->i_lock);
1685                         cifs_update_eof(cifsi, *offset, bytes_written);
1686                         spin_unlock(&dentry->d_inode->i_lock);
1687                         *offset += bytes_written;
1688                 }
1689         }
1690
1691         cifs_stats_bytes_written(tcon, total_written);
1692
1693         if (total_written > 0) {
1694                 spin_lock(&dentry->d_inode->i_lock);
1695                 if (*offset > dentry->d_inode->i_size)
1696                         i_size_write(dentry->d_inode, *offset);
1697                 spin_unlock(&dentry->d_inode->i_lock);
1698         }
1699         mark_inode_dirty_sync(dentry->d_inode);
1700         free_xid(xid);
1701         return total_written;
1702 }
1703
1704 struct cifsFileInfo *find_readable_file(struct cifsInodeInfo *cifs_inode,
1705                                         bool fsuid_only)
1706 {
1707         struct cifsFileInfo *open_file = NULL;
1708         struct cifs_sb_info *cifs_sb = CIFS_SB(cifs_inode->vfs_inode.i_sb);
1709
1710         /* only filter by fsuid on multiuser mounts */
1711         if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
1712                 fsuid_only = false;
1713
1714         spin_lock(&cifs_file_list_lock);
1715         /* we could simply get the first_list_entry since write-only entries
1716            are always at the end of the list but since the first entry might
1717            have a close pending, we go through the whole list */
1718         list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
1719                 if (fsuid_only && !uid_eq(open_file->uid, current_fsuid()))
1720                         continue;
1721                 if (OPEN_FMODE(open_file->f_flags) & FMODE_READ) {
1722                         if (!open_file->invalidHandle) {
1723                                 /* found a good file */
1724                                 /* lock it so it will not be closed on us */
1725                                 cifsFileInfo_get_locked(open_file);
1726                                 spin_unlock(&cifs_file_list_lock);
1727                                 return open_file;
1728                         } /* else might as well continue, and look for
1729                              another, or simply have the caller reopen it
1730                              again rather than trying to fix this handle */
1731                 } else /* write only file */
1732                         break; /* write only files are last so must be done */
1733         }
1734         spin_unlock(&cifs_file_list_lock);
1735         return NULL;
1736 }
1737
1738 struct cifsFileInfo *find_writable_file(struct cifsInodeInfo *cifs_inode,
1739                                         bool fsuid_only)
1740 {
1741         struct cifsFileInfo *open_file, *inv_file = NULL;
1742         struct cifs_sb_info *cifs_sb;
1743         bool any_available = false;
1744         int rc;
1745         unsigned int refind = 0;
1746
1747         /* Having a null inode here (because mapping->host was set to zero by
1748         the VFS or MM) should not happen but we had reports of on oops (due to
1749         it being zero) during stress testcases so we need to check for it */
1750
1751         if (cifs_inode == NULL) {
1752                 cifs_dbg(VFS, "Null inode passed to cifs_writeable_file\n");
1753                 dump_stack();
1754                 return NULL;
1755         }
1756
1757         cifs_sb = CIFS_SB(cifs_inode->vfs_inode.i_sb);
1758
1759         /* only filter by fsuid on multiuser mounts */
1760         if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
1761                 fsuid_only = false;
1762
1763         spin_lock(&cifs_file_list_lock);
1764 refind_writable:
1765         if (refind > MAX_REOPEN_ATT) {
1766                 spin_unlock(&cifs_file_list_lock);
1767                 return NULL;
1768         }
1769         list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
1770                 if (!any_available && open_file->pid != current->tgid)
1771                         continue;
1772                 if (fsuid_only && !uid_eq(open_file->uid, current_fsuid()))
1773                         continue;
1774                 if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) {
1775                         if (!open_file->invalidHandle) {
1776                                 /* found a good writable file */
1777                                 cifsFileInfo_get_locked(open_file);
1778                                 spin_unlock(&cifs_file_list_lock);
1779                                 return open_file;
1780                         } else {
1781                                 if (!inv_file)
1782                                         inv_file = open_file;
1783                         }
1784                 }
1785         }
1786         /* couldn't find useable FH with same pid, try any available */
1787         if (!any_available) {
1788                 any_available = true;
1789                 goto refind_writable;
1790         }
1791
1792         if (inv_file) {
1793                 any_available = false;
1794                 cifsFileInfo_get_locked(inv_file);
1795         }
1796
1797         spin_unlock(&cifs_file_list_lock);
1798
1799         if (inv_file) {
1800                 rc = cifs_reopen_file(inv_file, false);
1801                 if (!rc)
1802                         return inv_file;
1803                 else {
1804                         spin_lock(&cifs_file_list_lock);
1805                         list_move_tail(&inv_file->flist,
1806                                         &cifs_inode->openFileList);
1807                         spin_unlock(&cifs_file_list_lock);
1808                         cifsFileInfo_put(inv_file);
1809                         spin_lock(&cifs_file_list_lock);
1810                         ++refind;
1811                         goto refind_writable;
1812                 }
1813         }
1814
1815         return NULL;
1816 }
1817
1818 static int cifs_partialpagewrite(struct page *page, unsigned from, unsigned to)
1819 {
1820         struct address_space *mapping = page->mapping;
1821         loff_t offset = (loff_t)page->index << PAGE_CACHE_SHIFT;
1822         char *write_data;
1823         int rc = -EFAULT;
1824         int bytes_written = 0;
1825         struct inode *inode;
1826         struct cifsFileInfo *open_file;
1827
1828         if (!mapping || !mapping->host)
1829                 return -EFAULT;
1830
1831         inode = page->mapping->host;
1832
1833         offset += (loff_t)from;
1834         write_data = kmap(page);
1835         write_data += from;
1836
1837         if ((to > PAGE_CACHE_SIZE) || (from > to)) {
1838                 kunmap(page);
1839                 return -EIO;
1840         }
1841
1842         /* racing with truncate? */
1843         if (offset > mapping->host->i_size) {
1844                 kunmap(page);
1845                 return 0; /* don't care */
1846         }
1847
1848         /* check to make sure that we are not extending the file */
1849         if (mapping->host->i_size - offset < (loff_t)to)
1850                 to = (unsigned)(mapping->host->i_size - offset);
1851
1852         open_file = find_writable_file(CIFS_I(mapping->host), false);
1853         if (open_file) {
1854                 bytes_written = cifs_write(open_file, open_file->pid,
1855                                            write_data, to - from, &offset);
1856                 cifsFileInfo_put(open_file);
1857                 /* Does mm or vfs already set times? */
1858                 inode->i_atime = inode->i_mtime = current_fs_time(inode->i_sb);
1859                 if ((bytes_written > 0) && (offset))
1860                         rc = 0;
1861                 else if (bytes_written < 0)
1862                         rc = bytes_written;
1863         } else {
1864                 cifs_dbg(FYI, "No writeable filehandles for inode\n");
1865                 rc = -EIO;
1866         }
1867
1868         kunmap(page);
1869         return rc;
1870 }
1871
1872 static int cifs_writepages(struct address_space *mapping,
1873                            struct writeback_control *wbc)
1874 {
1875         struct cifs_sb_info *cifs_sb = CIFS_SB(mapping->host->i_sb);
1876         bool done = false, scanned = false, range_whole = false;
1877         pgoff_t end, index;
1878         struct cifs_writedata *wdata;
1879         struct TCP_Server_Info *server;
1880         struct page *page;
1881         int rc = 0;
1882
1883         /*
1884          * If wsize is smaller than the page cache size, default to writing
1885          * one page at a time via cifs_writepage
1886          */
1887         if (cifs_sb->wsize < PAGE_CACHE_SIZE)
1888                 return generic_writepages(mapping, wbc);
1889
1890         if (wbc->range_cyclic) {
1891                 index = mapping->writeback_index; /* Start from prev offset */
1892                 end = -1;
1893         } else {
1894                 index = wbc->range_start >> PAGE_CACHE_SHIFT;
1895                 end = wbc->range_end >> PAGE_CACHE_SHIFT;
1896                 if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
1897                         range_whole = true;
1898                 scanned = true;
1899         }
1900 retry:
1901         while (!done && index <= end) {
1902                 unsigned int i, nr_pages, found_pages;
1903                 pgoff_t next = 0, tofind;
1904                 struct page **pages;
1905
1906                 tofind = min((cifs_sb->wsize / PAGE_CACHE_SIZE) - 1,
1907                                 end - index) + 1;
1908
1909                 wdata = cifs_writedata_alloc((unsigned int)tofind,
1910                                              cifs_writev_complete);
1911                 if (!wdata) {
1912                         rc = -ENOMEM;
1913                         break;
1914                 }
1915
1916                 /*
1917                  * find_get_pages_tag seems to return a max of 256 on each
1918                  * iteration, so we must call it several times in order to
1919                  * fill the array or the wsize is effectively limited to
1920                  * 256 * PAGE_CACHE_SIZE.
1921                  */
1922                 found_pages = 0;
1923                 pages = wdata->pages;
1924                 do {
1925                         nr_pages = find_get_pages_tag(mapping, &index,
1926                                                         PAGECACHE_TAG_DIRTY,
1927                                                         tofind, pages);
1928                         found_pages += nr_pages;
1929                         tofind -= nr_pages;
1930                         pages += nr_pages;
1931                 } while (nr_pages && tofind && index <= end);
1932
1933                 if (found_pages == 0) {
1934                         kref_put(&wdata->refcount, cifs_writedata_release);
1935                         break;
1936                 }
1937
1938                 nr_pages = 0;
1939                 for (i = 0; i < found_pages; i++) {
1940                         page = wdata->pages[i];
1941                         /*
1942                          * At this point we hold neither mapping->tree_lock nor
1943                          * lock on the page itself: the page may be truncated or
1944                          * invalidated (changing page->mapping to NULL), or even
1945                          * swizzled back from swapper_space to tmpfs file
1946                          * mapping
1947                          */
1948
1949                         if (nr_pages == 0)
1950                                 lock_page(page);
1951                         else if (!trylock_page(page))
1952                                 break;
1953
1954                         if (unlikely(page->mapping != mapping)) {
1955                                 unlock_page(page);
1956                                 break;
1957                         }
1958
1959                         if (!wbc->range_cyclic && page->index > end) {
1960                                 done = true;
1961                                 unlock_page(page);
1962                                 break;
1963                         }
1964
1965                         if (next && (page->index != next)) {
1966                                 /* Not next consecutive page */
1967                                 unlock_page(page);
1968                                 break;
1969                         }
1970
1971                         if (wbc->sync_mode != WB_SYNC_NONE)
1972                                 wait_on_page_writeback(page);
1973
1974                         if (PageWriteback(page) ||
1975                                         !clear_page_dirty_for_io(page)) {
1976                                 unlock_page(page);
1977                                 break;
1978                         }
1979
1980                         /*
1981                          * This actually clears the dirty bit in the radix tree.
1982                          * See cifs_writepage() for more commentary.
1983                          */
1984                         set_page_writeback(page);
1985
1986                         if (page_offset(page) >= i_size_read(mapping->host)) {
1987                                 done = true;
1988                                 unlock_page(page);
1989                                 end_page_writeback(page);
1990                                 break;
1991                         }
1992
1993                         wdata->pages[i] = page;
1994                         next = page->index + 1;
1995                         ++nr_pages;
1996                 }
1997
1998                 /* reset index to refind any pages skipped */
1999                 if (nr_pages == 0)
2000                         index = wdata->pages[0]->index + 1;
2001
2002                 /* put any pages we aren't going to use */
2003                 for (i = nr_pages; i < found_pages; i++) {
2004                         page_cache_release(wdata->pages[i]);
2005                         wdata->pages[i] = NULL;
2006                 }
2007
2008                 /* nothing to write? */
2009                 if (nr_pages == 0) {
2010                         kref_put(&wdata->refcount, cifs_writedata_release);
2011                         continue;
2012                 }
2013
2014                 wdata->sync_mode = wbc->sync_mode;
2015                 wdata->nr_pages = nr_pages;
2016                 wdata->offset = page_offset(wdata->pages[0]);
2017                 wdata->pagesz = PAGE_CACHE_SIZE;
2018                 wdata->tailsz =
2019                         min(i_size_read(mapping->host) -
2020                             page_offset(wdata->pages[nr_pages - 1]),
2021                             (loff_t)PAGE_CACHE_SIZE);
2022                 wdata->bytes = ((nr_pages - 1) * PAGE_CACHE_SIZE) +
2023                                         wdata->tailsz;
2024
2025                 do {
2026                         if (wdata->cfile != NULL)
2027                                 cifsFileInfo_put(wdata->cfile);
2028                         wdata->cfile = find_writable_file(CIFS_I(mapping->host),
2029                                                           false);
2030                         if (!wdata->cfile) {
2031                                 cifs_dbg(VFS, "No writable handles for inode\n");
2032                                 rc = -EBADF;
2033                                 break;
2034                         }
2035                         wdata->pid = wdata->cfile->pid;
2036                         server = tlink_tcon(wdata->cfile->tlink)->ses->server;
2037                         rc = server->ops->async_writev(wdata);
2038                 } while (wbc->sync_mode == WB_SYNC_ALL && rc == -EAGAIN);
2039
2040                 for (i = 0; i < nr_pages; ++i)
2041                         unlock_page(wdata->pages[i]);
2042
2043                 /* send failure -- clean up the mess */
2044                 if (rc != 0) {
2045                         for (i = 0; i < nr_pages; ++i) {
2046                                 if (rc == -EAGAIN)
2047                                         redirty_page_for_writepage(wbc,
2048                                                            wdata->pages[i]);
2049                                 else
2050                                         SetPageError(wdata->pages[i]);
2051                                 end_page_writeback(wdata->pages[i]);
2052                                 page_cache_release(wdata->pages[i]);
2053                         }
2054                         if (rc != -EAGAIN)
2055                                 mapping_set_error(mapping, rc);
2056                 }
2057                 kref_put(&wdata->refcount, cifs_writedata_release);
2058
2059                 wbc->nr_to_write -= nr_pages;
2060                 if (wbc->nr_to_write <= 0)
2061                         done = true;
2062
2063                 index = next;
2064         }
2065
2066         if (!scanned && !done) {
2067                 /*
2068                  * We hit the last page and there is more work to be done: wrap
2069                  * back to the start of the file
2070                  */
2071                 scanned = true;
2072                 index = 0;
2073                 goto retry;
2074         }
2075
2076         if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0))
2077                 mapping->writeback_index = index;
2078
2079         return rc;
2080 }
2081
2082 static int
2083 cifs_writepage_locked(struct page *page, struct writeback_control *wbc)
2084 {
2085         int rc;
2086         unsigned int xid;
2087
2088         xid = get_xid();
2089 /* BB add check for wbc flags */
2090         page_cache_get(page);
2091         if (!PageUptodate(page))
2092                 cifs_dbg(FYI, "ppw - page not up to date\n");
2093
2094         /*
2095          * Set the "writeback" flag, and clear "dirty" in the radix tree.
2096          *
2097          * A writepage() implementation always needs to do either this,
2098          * or re-dirty the page with "redirty_page_for_writepage()" in
2099          * the case of a failure.
2100          *
2101          * Just unlocking the page will cause the radix tree tag-bits
2102          * to fail to update with the state of the page correctly.
2103          */
2104         set_page_writeback(page);
2105 retry_write:
2106         rc = cifs_partialpagewrite(page, 0, PAGE_CACHE_SIZE);
2107         if (rc == -EAGAIN && wbc->sync_mode == WB_SYNC_ALL)
2108                 goto retry_write;
2109         else if (rc == -EAGAIN)
2110                 redirty_page_for_writepage(wbc, page);
2111         else if (rc != 0)
2112                 SetPageError(page);
2113         else
2114                 SetPageUptodate(page);
2115         end_page_writeback(page);
2116         page_cache_release(page);
2117         free_xid(xid);
2118         return rc;
2119 }
2120
2121 static int cifs_writepage(struct page *page, struct writeback_control *wbc)
2122 {
2123         int rc = cifs_writepage_locked(page, wbc);
2124         unlock_page(page);
2125         return rc;
2126 }
2127
2128 static int cifs_write_end(struct file *file, struct address_space *mapping,
2129                         loff_t pos, unsigned len, unsigned copied,
2130                         struct page *page, void *fsdata)
2131 {
2132         int rc;
2133         struct inode *inode = mapping->host;
2134         struct cifsFileInfo *cfile = file->private_data;
2135         struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
2136         __u32 pid;
2137
2138         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
2139                 pid = cfile->pid;
2140         else
2141                 pid = current->tgid;
2142
2143         cifs_dbg(FYI, "write_end for page %p from pos %lld with %d bytes\n",
2144                  page, pos, copied);
2145
2146         if (PageChecked(page)) {
2147                 if (copied == len)
2148                         SetPageUptodate(page);
2149                 ClearPageChecked(page);
2150         } else if (!PageUptodate(page) && copied == PAGE_CACHE_SIZE)
2151                 SetPageUptodate(page);
2152
2153         if (!PageUptodate(page)) {
2154                 char *page_data;
2155                 unsigned offset = pos & (PAGE_CACHE_SIZE - 1);
2156                 unsigned int xid;
2157
2158                 xid = get_xid();
2159                 /* this is probably better than directly calling
2160                    partialpage_write since in this function the file handle is
2161                    known which we might as well leverage */
2162                 /* BB check if anything else missing out of ppw
2163                    such as updating last write time */
2164                 page_data = kmap(page);
2165                 rc = cifs_write(cfile, pid, page_data + offset, copied, &pos);
2166                 /* if (rc < 0) should we set writebehind rc? */
2167                 kunmap(page);
2168
2169                 free_xid(xid);
2170         } else {
2171                 rc = copied;
2172                 pos += copied;
2173                 set_page_dirty(page);
2174         }
2175
2176         if (rc > 0) {
2177                 spin_lock(&inode->i_lock);
2178                 if (pos > inode->i_size)
2179                         i_size_write(inode, pos);
2180                 spin_unlock(&inode->i_lock);
2181         }
2182
2183         unlock_page(page);
2184         page_cache_release(page);
2185
2186         return rc;
2187 }
2188
2189 int cifs_strict_fsync(struct file *file, loff_t start, loff_t end,
2190                       int datasync)
2191 {
2192         unsigned int xid;
2193         int rc = 0;
2194         struct cifs_tcon *tcon;
2195         struct TCP_Server_Info *server;
2196         struct cifsFileInfo *smbfile = file->private_data;
2197         struct inode *inode = file_inode(file);
2198         struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
2199
2200         rc = filemap_write_and_wait_range(inode->i_mapping, start, end);
2201         if (rc)
2202                 return rc;
2203         mutex_lock(&inode->i_mutex);
2204
2205         xid = get_xid();
2206
2207         cifs_dbg(FYI, "Sync file - name: %s datasync: 0x%x\n",
2208                  file->f_path.dentry->d_name.name, datasync);
2209
2210         if (!CIFS_I(inode)->clientCanCacheRead) {
2211                 rc = cifs_invalidate_mapping(inode);
2212                 if (rc) {
2213                         cifs_dbg(FYI, "rc: %d during invalidate phase\n", rc);
2214                         rc = 0; /* don't care about it in fsync */
2215                 }
2216         }
2217
2218         tcon = tlink_tcon(smbfile->tlink);
2219         if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) {
2220                 server = tcon->ses->server;
2221                 if (server->ops->flush)
2222                         rc = server->ops->flush(xid, tcon, &smbfile->fid);
2223                 else
2224                         rc = -ENOSYS;
2225         }
2226
2227         free_xid(xid);
2228         mutex_unlock(&inode->i_mutex);
2229         return rc;
2230 }
2231
2232 int cifs_fsync(struct file *file, loff_t start, loff_t end, int datasync)
2233 {
2234         unsigned int xid;
2235         int rc = 0;
2236         struct cifs_tcon *tcon;
2237         struct TCP_Server_Info *server;
2238         struct cifsFileInfo *smbfile = file->private_data;
2239         struct cifs_sb_info *cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
2240         struct inode *inode = file->f_mapping->host;
2241
2242         rc = filemap_write_and_wait_range(inode->i_mapping, start, end);
2243         if (rc)
2244                 return rc;
2245         mutex_lock(&inode->i_mutex);
2246
2247         xid = get_xid();
2248
2249         cifs_dbg(FYI, "Sync file - name: %s datasync: 0x%x\n",
2250                  file->f_path.dentry->d_name.name, datasync);
2251
2252         tcon = tlink_tcon(smbfile->tlink);
2253         if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) {
2254                 server = tcon->ses->server;
2255                 if (server->ops->flush)
2256                         rc = server->ops->flush(xid, tcon, &smbfile->fid);
2257                 else
2258                         rc = -ENOSYS;
2259         }
2260
2261         free_xid(xid);
2262         mutex_unlock(&inode->i_mutex);
2263         return rc;
2264 }
2265
2266 /*
2267  * As file closes, flush all cached write data for this inode checking
2268  * for write behind errors.
2269  */
2270 int cifs_flush(struct file *file, fl_owner_t id)
2271 {
2272         struct inode *inode = file_inode(file);
2273         int rc = 0;
2274
2275         if (file->f_mode & FMODE_WRITE)
2276                 rc = filemap_write_and_wait(inode->i_mapping);
2277
2278         cifs_dbg(FYI, "Flush inode %p file %p rc %d\n", inode, file, rc);
2279
2280         return rc;
2281 }
2282
2283 static int
2284 cifs_write_allocate_pages(struct page **pages, unsigned long num_pages)
2285 {
2286         int rc = 0;
2287         unsigned long i;
2288
2289         for (i = 0; i < num_pages; i++) {
2290                 pages[i] = alloc_page(GFP_KERNEL|__GFP_HIGHMEM);
2291                 if (!pages[i]) {
2292                         /*
2293                          * save number of pages we have already allocated and
2294                          * return with ENOMEM error
2295                          */
2296                         num_pages = i;
2297                         rc = -ENOMEM;
2298                         break;
2299                 }
2300         }
2301
2302         if (rc) {
2303                 for (i = 0; i < num_pages; i++)
2304                         put_page(pages[i]);
2305         }
2306         return rc;
2307 }
2308
2309 static inline
2310 size_t get_numpages(const size_t wsize, const size_t len, size_t *cur_len)
2311 {
2312         size_t num_pages;
2313         size_t clen;
2314
2315         clen = min_t(const size_t, len, wsize);
2316         num_pages = DIV_ROUND_UP(clen, PAGE_SIZE);
2317
2318         if (cur_len)
2319                 *cur_len = clen;
2320
2321         return num_pages;
2322 }
2323
2324 static void
2325 cifs_uncached_writev_complete(struct work_struct *work)
2326 {
2327         int i;
2328         struct cifs_writedata *wdata = container_of(work,
2329                                         struct cifs_writedata, work);
2330         struct inode *inode = wdata->cfile->dentry->d_inode;
2331         struct cifsInodeInfo *cifsi = CIFS_I(inode);
2332
2333         spin_lock(&inode->i_lock);
2334         cifs_update_eof(cifsi, wdata->offset, wdata->bytes);
2335         if (cifsi->server_eof > inode->i_size)
2336                 i_size_write(inode, cifsi->server_eof);
2337         spin_unlock(&inode->i_lock);
2338
2339         complete(&wdata->done);
2340
2341         if (wdata->result != -EAGAIN) {
2342                 for (i = 0; i < wdata->nr_pages; i++)
2343                         put_page(wdata->pages[i]);
2344         }
2345
2346         kref_put(&wdata->refcount, cifs_writedata_release);
2347 }
2348
2349 /* attempt to send write to server, retry on any -EAGAIN errors */
2350 static int
2351 cifs_uncached_retry_writev(struct cifs_writedata *wdata)
2352 {
2353         int rc;
2354         struct TCP_Server_Info *server;
2355
2356         server = tlink_tcon(wdata->cfile->tlink)->ses->server;
2357
2358         do {
2359                 if (wdata->cfile->invalidHandle) {
2360                         rc = cifs_reopen_file(wdata->cfile, false);
2361                         if (rc != 0)
2362                                 continue;
2363                 }
2364                 rc = server->ops->async_writev(wdata);
2365         } while (rc == -EAGAIN);
2366
2367         return rc;
2368 }
2369
2370 static ssize_t
2371 cifs_iovec_write(struct file *file, const struct iovec *iov,
2372                  unsigned long nr_segs, loff_t *poffset)
2373 {
2374         unsigned long nr_pages, i;
2375         size_t copied, len, cur_len;
2376         ssize_t total_written = 0;
2377         loff_t offset;
2378         struct iov_iter it;
2379         struct cifsFileInfo *open_file;
2380         struct cifs_tcon *tcon;
2381         struct cifs_sb_info *cifs_sb;
2382         struct cifs_writedata *wdata, *tmp;
2383         struct list_head wdata_list;
2384         int rc;
2385         pid_t pid;
2386
2387         len = iov_length(iov, nr_segs);
2388         if (!len)
2389                 return 0;
2390
2391         rc = generic_write_checks(file, poffset, &len, 0);
2392         if (rc)
2393                 return rc;
2394
2395         INIT_LIST_HEAD(&wdata_list);
2396         cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
2397         open_file = file->private_data;
2398         tcon = tlink_tcon(open_file->tlink);
2399
2400         if (!tcon->ses->server->ops->async_writev)
2401                 return -ENOSYS;
2402
2403         offset = *poffset;
2404
2405         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
2406                 pid = open_file->pid;
2407         else
2408                 pid = current->tgid;
2409
2410         iov_iter_init(&it, iov, nr_segs, len, 0);
2411         do {
2412                 size_t save_len;
2413
2414                 nr_pages = get_numpages(cifs_sb->wsize, len, &cur_len);
2415                 wdata = cifs_writedata_alloc(nr_pages,
2416                                              cifs_uncached_writev_complete);
2417                 if (!wdata) {
2418                         rc = -ENOMEM;
2419                         break;
2420                 }
2421
2422                 rc = cifs_write_allocate_pages(wdata->pages, nr_pages);
2423                 if (rc) {
2424                         kfree(wdata);
2425                         break;
2426                 }
2427
2428                 save_len = cur_len;
2429                 for (i = 0; i < nr_pages; i++) {
2430                         copied = min_t(const size_t, cur_len, PAGE_SIZE);
2431                         copied = iov_iter_copy_from_user(wdata->pages[i], &it,
2432                                                          0, copied);
2433                         cur_len -= copied;
2434                         iov_iter_advance(&it, copied);
2435                 }
2436                 cur_len = save_len - cur_len;
2437
2438                 wdata->sync_mode = WB_SYNC_ALL;
2439                 wdata->nr_pages = nr_pages;
2440                 wdata->offset = (__u64)offset;
2441                 wdata->cfile = cifsFileInfo_get(open_file);
2442                 wdata->pid = pid;
2443                 wdata->bytes = cur_len;
2444                 wdata->pagesz = PAGE_SIZE;
2445                 wdata->tailsz = cur_len - ((nr_pages - 1) * PAGE_SIZE);
2446                 rc = cifs_uncached_retry_writev(wdata);
2447                 if (rc) {
2448                         kref_put(&wdata->refcount, cifs_writedata_release);
2449                         break;
2450                 }
2451
2452                 list_add_tail(&wdata->list, &wdata_list);
2453                 offset += cur_len;
2454                 len -= cur_len;
2455         } while (len > 0);
2456
2457         /*
2458          * If at least one write was successfully sent, then discard any rc
2459          * value from the later writes. If the other write succeeds, then
2460          * we'll end up returning whatever was written. If it fails, then
2461          * we'll get a new rc value from that.
2462          */
2463         if (!list_empty(&wdata_list))
2464                 rc = 0;
2465
2466         /*
2467          * Wait for and collect replies for any successful sends in order of
2468          * increasing offset. Once an error is hit or we get a fatal signal
2469          * while waiting, then return without waiting for any more replies.
2470          */
2471 restart_loop:
2472         list_for_each_entry_safe(wdata, tmp, &wdata_list, list) {
2473                 if (!rc) {
2474                         /* FIXME: freezable too? */
2475                         rc = wait_for_completion_killable(&wdata->done);
2476                         if (rc)
2477                                 rc = -EINTR;
2478                         else if (wdata->result)
2479                                 rc = wdata->result;
2480                         else
2481                                 total_written += wdata->bytes;
2482
2483                         /* resend call if it's a retryable error */
2484                         if (rc == -EAGAIN) {
2485                                 rc = cifs_uncached_retry_writev(wdata);
2486                                 goto restart_loop;
2487                         }
2488                 }
2489                 list_del_init(&wdata->list);
2490                 kref_put(&wdata->refcount, cifs_writedata_release);
2491         }
2492
2493         if (total_written > 0)
2494                 *poffset += total_written;
2495
2496         cifs_stats_bytes_written(tcon, total_written);
2497         return total_written ? total_written : (ssize_t)rc;
2498 }
2499
2500 ssize_t cifs_user_writev(struct kiocb *iocb, const struct iovec *iov,
2501                                 unsigned long nr_segs, loff_t pos)
2502 {
2503         ssize_t written;
2504         struct inode *inode;
2505
2506         inode = file_inode(iocb->ki_filp);
2507
2508         /*
2509          * BB - optimize the way when signing is disabled. We can drop this
2510          * extra memory-to-memory copying and use iovec buffers for constructing
2511          * write request.
2512          */
2513
2514         written = cifs_iovec_write(iocb->ki_filp, iov, nr_segs, &pos);
2515         if (written > 0) {
2516                 CIFS_I(inode)->invalid_mapping = true;
2517                 iocb->ki_pos = pos;
2518         }
2519
2520         return written;
2521 }
2522
2523 static ssize_t
2524 cifs_writev(struct kiocb *iocb, const struct iovec *iov,
2525             unsigned long nr_segs, loff_t pos)
2526 {
2527         struct file *file = iocb->ki_filp;
2528         struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
2529         struct inode *inode = file->f_mapping->host;
2530         struct cifsInodeInfo *cinode = CIFS_I(inode);
2531         struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
2532         ssize_t rc = -EACCES;
2533
2534         BUG_ON(iocb->ki_pos != pos);
2535
2536         /*
2537          * We need to hold the sem to be sure nobody modifies lock list
2538          * with a brlock that prevents writing.
2539          */
2540         down_read(&cinode->lock_sem);
2541         if (!cifs_find_lock_conflict(cfile, pos, iov_length(iov, nr_segs),
2542                                      server->vals->exclusive_lock_type, NULL,
2543                                      CIFS_WRITE_OP)) {
2544                 mutex_lock(&inode->i_mutex);
2545                 rc = __generic_file_aio_write(iocb, iov, nr_segs,
2546                                                &iocb->ki_pos);
2547                 mutex_unlock(&inode->i_mutex);
2548         }
2549
2550         if (rc > 0 || rc == -EIOCBQUEUED) {
2551                 ssize_t err;
2552
2553                 err = generic_write_sync(file, pos, rc);
2554                 if (err < 0 && rc > 0)
2555                         rc = err;
2556         }
2557
2558         up_read(&cinode->lock_sem);
2559         return rc;
2560 }
2561
2562 ssize_t
2563 cifs_strict_writev(struct kiocb *iocb, const struct iovec *iov,
2564                    unsigned long nr_segs, loff_t pos)
2565 {
2566         struct inode *inode = file_inode(iocb->ki_filp);
2567         struct cifsInodeInfo *cinode = CIFS_I(inode);
2568         struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
2569         struct cifsFileInfo *cfile = (struct cifsFileInfo *)
2570                                                 iocb->ki_filp->private_data;
2571         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
2572         ssize_t written;
2573
2574         if (cinode->clientCanCacheAll) {
2575                 if (cap_unix(tcon->ses) &&
2576                 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability))
2577                     && ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
2578                         return generic_file_aio_write(iocb, iov, nr_segs, pos);
2579                 return cifs_writev(iocb, iov, nr_segs, pos);
2580         }
2581         /*
2582          * For non-oplocked files in strict cache mode we need to write the data
2583          * to the server exactly from the pos to pos+len-1 rather than flush all
2584          * affected pages because it may cause a error with mandatory locks on
2585          * these pages but not on the region from pos to ppos+len-1.
2586          */
2587         written = cifs_user_writev(iocb, iov, nr_segs, pos);
2588         if (written > 0 && cinode->clientCanCacheRead) {
2589                 /*
2590                  * Windows 7 server can delay breaking level2 oplock if a write
2591                  * request comes - break it on the client to prevent reading
2592                  * an old data.
2593                  */
2594                 cifs_invalidate_mapping(inode);
2595                 cifs_dbg(FYI, "Set no oplock for inode=%p after a write operation\n",
2596                          inode);
2597                 cinode->clientCanCacheRead = false;
2598         }
2599         return written;
2600 }
2601
2602 static struct cifs_readdata *
2603 cifs_readdata_alloc(unsigned int nr_pages, work_func_t complete)
2604 {
2605         struct cifs_readdata *rdata;
2606
2607         rdata = kzalloc(sizeof(*rdata) + (sizeof(struct page *) * nr_pages),
2608                         GFP_KERNEL);
2609         if (rdata != NULL) {
2610                 kref_init(&rdata->refcount);
2611                 INIT_LIST_HEAD(&rdata->list);
2612                 init_completion(&rdata->done);
2613                 INIT_WORK(&rdata->work, complete);
2614         }
2615
2616         return rdata;
2617 }
2618
2619 void
2620 cifs_readdata_release(struct kref *refcount)
2621 {
2622         struct cifs_readdata *rdata = container_of(refcount,
2623                                         struct cifs_readdata, refcount);
2624
2625         if (rdata->cfile)
2626                 cifsFileInfo_put(rdata->cfile);
2627
2628         kfree(rdata);
2629 }
2630
2631 static int
2632 cifs_read_allocate_pages(struct cifs_readdata *rdata, unsigned int nr_pages)
2633 {
2634         int rc = 0;
2635         struct page *page;
2636         unsigned int i;
2637
2638         for (i = 0; i < nr_pages; i++) {
2639                 page = alloc_page(GFP_KERNEL|__GFP_HIGHMEM);
2640                 if (!page) {
2641                         rc = -ENOMEM;
2642                         break;
2643                 }
2644                 rdata->pages[i] = page;
2645         }
2646
2647         if (rc) {
2648                 for (i = 0; i < nr_pages; i++) {
2649                         put_page(rdata->pages[i]);
2650                         rdata->pages[i] = NULL;
2651                 }
2652         }
2653         return rc;
2654 }
2655
2656 static void
2657 cifs_uncached_readdata_release(struct kref *refcount)
2658 {
2659         struct cifs_readdata *rdata = container_of(refcount,
2660                                         struct cifs_readdata, refcount);
2661         unsigned int i;
2662
2663         for (i = 0; i < rdata->nr_pages; i++) {
2664                 put_page(rdata->pages[i]);
2665                 rdata->pages[i] = NULL;
2666         }
2667         cifs_readdata_release(refcount);
2668 }
2669
2670 static int
2671 cifs_retry_async_readv(struct cifs_readdata *rdata)
2672 {
2673         int rc;
2674         struct TCP_Server_Info *server;
2675
2676         server = tlink_tcon(rdata->cfile->tlink)->ses->server;
2677
2678         do {
2679                 if (rdata->cfile->invalidHandle) {
2680                         rc = cifs_reopen_file(rdata->cfile, true);
2681                         if (rc != 0)
2682                                 continue;
2683                 }
2684                 rc = server->ops->async_readv(rdata);
2685         } while (rc == -EAGAIN);
2686
2687         return rc;
2688 }
2689
2690 /**
2691  * cifs_readdata_to_iov - copy data from pages in response to an iovec
2692  * @rdata:      the readdata response with list of pages holding data
2693  * @iov:        vector in which we should copy the data
2694  * @nr_segs:    number of segments in vector
2695  * @offset:     offset into file of the first iovec
2696  * @copied:     used to return the amount of data copied to the iov
2697  *
2698  * This function copies data from a list of pages in a readdata response into
2699  * an array of iovecs. It will first calculate where the data should go
2700  * based on the info in the readdata and then copy the data into that spot.
2701  */
2702 static ssize_t
2703 cifs_readdata_to_iov(struct cifs_readdata *rdata, const struct iovec *iov,
2704                         unsigned long nr_segs, loff_t offset, ssize_t *copied)
2705 {
2706         int rc = 0;
2707         struct iov_iter ii;
2708         size_t pos = rdata->offset - offset;
2709         ssize_t remaining = rdata->bytes;
2710         unsigned char *pdata;
2711         unsigned int i;
2712
2713         /* set up iov_iter and advance to the correct offset */
2714         iov_iter_init(&ii, iov, nr_segs, iov_length(iov, nr_segs), 0);
2715         iov_iter_advance(&ii, pos);
2716
2717         *copied = 0;
2718         for (i = 0; i < rdata->nr_pages; i++) {
2719                 ssize_t copy;
2720                 struct page *page = rdata->pages[i];
2721
2722                 /* copy a whole page or whatever's left */
2723                 copy = min_t(ssize_t, remaining, PAGE_SIZE);
2724
2725                 /* ...but limit it to whatever space is left in the iov */
2726                 copy = min_t(ssize_t, copy, iov_iter_count(&ii));
2727
2728                 /* go while there's data to be copied and no errors */
2729                 if (copy && !rc) {
2730                         pdata = kmap(page);
2731                         rc = memcpy_toiovecend(ii.iov, pdata, ii.iov_offset,
2732                                                 (int)copy);
2733                         kunmap(page);
2734                         if (!rc) {
2735                                 *copied += copy;
2736                                 remaining -= copy;
2737                                 iov_iter_advance(&ii, copy);
2738                         }
2739                 }
2740         }
2741
2742         return rc;
2743 }
2744
2745 static void
2746 cifs_uncached_readv_complete(struct work_struct *work)
2747 {
2748         struct cifs_readdata *rdata = container_of(work,
2749                                                 struct cifs_readdata, work);
2750
2751         complete(&rdata->done);
2752         kref_put(&rdata->refcount, cifs_uncached_readdata_release);
2753 }
2754
2755 static int
2756 cifs_uncached_read_into_pages(struct TCP_Server_Info *server,
2757                         struct cifs_readdata *rdata, unsigned int len)
2758 {
2759         int total_read = 0, result = 0;
2760         unsigned int i;
2761         unsigned int nr_pages = rdata->nr_pages;
2762         struct kvec iov;
2763
2764         rdata->tailsz = PAGE_SIZE;
2765         for (i = 0; i < nr_pages; i++) {
2766                 struct page *page = rdata->pages[i];
2767
2768                 if (len >= PAGE_SIZE) {
2769                         /* enough data to fill the page */
2770                         iov.iov_base = kmap(page);
2771                         iov.iov_len = PAGE_SIZE;
2772                         cifs_dbg(FYI, "%u: iov_base=%p iov_len=%zu\n",
2773                                  i, iov.iov_base, iov.iov_len);
2774                         len -= PAGE_SIZE;
2775                 } else if (len > 0) {
2776                         /* enough for partial page, fill and zero the rest */
2777                         iov.iov_base = kmap(page);
2778                         iov.iov_len = len;
2779                         cifs_dbg(FYI, "%u: iov_base=%p iov_len=%zu\n",
2780                                  i, iov.iov_base, iov.iov_len);
2781                         memset(iov.iov_base + len, '\0', PAGE_SIZE - len);
2782                         rdata->tailsz = len;
2783                         len = 0;
2784                 } else {
2785                         /* no need to hold page hostage */
2786                         rdata->pages[i] = NULL;
2787                         rdata->nr_pages--;
2788                         put_page(page);
2789                         continue;
2790                 }
2791
2792                 result = cifs_readv_from_socket(server, &iov, 1, iov.iov_len);
2793                 kunmap(page);
2794                 if (result < 0)
2795                         break;
2796
2797                 total_read += result;
2798         }
2799
2800         return total_read > 0 ? total_read : result;
2801 }
2802
2803 static ssize_t
2804 cifs_iovec_read(struct file *file, const struct iovec *iov,
2805                  unsigned long nr_segs, loff_t *poffset)
2806 {
2807         ssize_t rc;
2808         size_t len, cur_len;
2809         ssize_t total_read = 0;
2810         loff_t offset = *poffset;
2811         unsigned int npages;
2812         struct cifs_sb_info *cifs_sb;
2813         struct cifs_tcon *tcon;
2814         struct cifsFileInfo *open_file;
2815         struct cifs_readdata *rdata, *tmp;
2816         struct list_head rdata_list;
2817         pid_t pid;
2818
2819         if (!nr_segs)
2820                 return 0;
2821
2822         len = iov_length(iov, nr_segs);
2823         if (!len)
2824                 return 0;
2825
2826         INIT_LIST_HEAD(&rdata_list);
2827         cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
2828         open_file = file->private_data;
2829         tcon = tlink_tcon(open_file->tlink);
2830
2831         if (!tcon->ses->server->ops->async_readv)
2832                 return -ENOSYS;
2833
2834         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
2835                 pid = open_file->pid;
2836         else
2837                 pid = current->tgid;
2838
2839         if ((file->f_flags & O_ACCMODE) == O_WRONLY)
2840                 cifs_dbg(FYI, "attempting read on write only file instance\n");
2841
2842         do {
2843                 cur_len = min_t(const size_t, len - total_read, cifs_sb->rsize);
2844                 npages = DIV_ROUND_UP(cur_len, PAGE_SIZE);
2845
2846                 /* allocate a readdata struct */
2847                 rdata = cifs_readdata_alloc(npages,
2848                                             cifs_uncached_readv_complete);
2849                 if (!rdata) {
2850                         rc = -ENOMEM;
2851                         goto error;
2852                 }
2853
2854                 rc = cifs_read_allocate_pages(rdata, npages);
2855                 if (rc)
2856                         goto error;
2857
2858                 rdata->cfile = cifsFileInfo_get(open_file);
2859                 rdata->nr_pages = npages;
2860                 rdata->offset = offset;
2861                 rdata->bytes = cur_len;
2862                 rdata->pid = pid;
2863                 rdata->pagesz = PAGE_SIZE;
2864                 rdata->read_into_pages = cifs_uncached_read_into_pages;
2865
2866                 rc = cifs_retry_async_readv(rdata);
2867 error:
2868                 if (rc) {
2869                         kref_put(&rdata->refcount,
2870                                  cifs_uncached_readdata_release);
2871                         break;
2872                 }
2873
2874                 list_add_tail(&rdata->list, &rdata_list);
2875                 offset += cur_len;
2876                 len -= cur_len;
2877         } while (len > 0);
2878
2879         /* if at least one read request send succeeded, then reset rc */
2880         if (!list_empty(&rdata_list))
2881                 rc = 0;
2882
2883         /* the loop below should proceed in the order of increasing offsets */
2884 restart_loop:
2885         list_for_each_entry_safe(rdata, tmp, &rdata_list, list) {
2886                 if (!rc) {
2887                         ssize_t copied;
2888
2889                         /* FIXME: freezable sleep too? */
2890                         rc = wait_for_completion_killable(&rdata->done);
2891                         if (rc)
2892                                 rc = -EINTR;
2893                         else if (rdata->result)
2894                                 rc = rdata->result;
2895                         else {
2896                                 rc = cifs_readdata_to_iov(rdata, iov,
2897                                                         nr_segs, *poffset,
2898                                                         &copied);
2899                                 total_read += copied;
2900                         }
2901
2902                         /* resend call if it's a retryable error */
2903                         if (rc == -EAGAIN) {
2904                                 rc = cifs_retry_async_readv(rdata);
2905                                 goto restart_loop;
2906                         }
2907                 }
2908                 list_del_init(&rdata->list);
2909                 kref_put(&rdata->refcount, cifs_uncached_readdata_release);
2910         }
2911
2912         cifs_stats_bytes_read(tcon, total_read);
2913         *poffset += total_read;
2914
2915         /* mask nodata case */
2916         if (rc == -ENODATA)
2917                 rc = 0;
2918
2919         return total_read ? total_read : rc;
2920 }
2921
2922 ssize_t cifs_user_readv(struct kiocb *iocb, const struct iovec *iov,
2923                                unsigned long nr_segs, loff_t pos)
2924 {
2925         ssize_t read;
2926
2927         read = cifs_iovec_read(iocb->ki_filp, iov, nr_segs, &pos);
2928         if (read > 0)
2929                 iocb->ki_pos = pos;
2930
2931         return read;
2932 }
2933
2934 ssize_t
2935 cifs_strict_readv(struct kiocb *iocb, const struct iovec *iov,
2936                   unsigned long nr_segs, loff_t pos)
2937 {
2938         struct inode *inode = file_inode(iocb->ki_filp);
2939         struct cifsInodeInfo *cinode = CIFS_I(inode);
2940         struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
2941         struct cifsFileInfo *cfile = (struct cifsFileInfo *)
2942                                                 iocb->ki_filp->private_data;
2943         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
2944         int rc = -EACCES;
2945
2946         /*
2947          * In strict cache mode we need to read from the server all the time
2948          * if we don't have level II oplock because the server can delay mtime
2949          * change - so we can't make a decision about inode invalidating.
2950          * And we can also fail with pagereading if there are mandatory locks
2951          * on pages affected by this read but not on the region from pos to
2952          * pos+len-1.
2953          */
2954         if (!cinode->clientCanCacheRead)
2955                 return cifs_user_readv(iocb, iov, nr_segs, pos);
2956
2957         if (cap_unix(tcon->ses) &&
2958             (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
2959             ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
2960                 return generic_file_aio_read(iocb, iov, nr_segs, pos);
2961
2962         /*
2963          * We need to hold the sem to be sure nobody modifies lock list
2964          * with a brlock that prevents reading.
2965          */
2966         down_read(&cinode->lock_sem);
2967         if (!cifs_find_lock_conflict(cfile, pos, iov_length(iov, nr_segs),
2968                                      tcon->ses->server->vals->shared_lock_type,
2969                                      NULL, CIFS_READ_OP))
2970                 rc = generic_file_aio_read(iocb, iov, nr_segs, pos);
2971         up_read(&cinode->lock_sem);
2972         return rc;
2973 }
2974
2975 static ssize_t
2976 cifs_read(struct file *file, char *read_data, size_t read_size, loff_t *offset)
2977 {
2978         int rc = -EACCES;
2979         unsigned int bytes_read = 0;
2980         unsigned int total_read;
2981         unsigned int current_read_size;
2982         unsigned int rsize;
2983         struct cifs_sb_info *cifs_sb;
2984         struct cifs_tcon *tcon;
2985         struct TCP_Server_Info *server;
2986         unsigned int xid;
2987         char *cur_offset;
2988         struct cifsFileInfo *open_file;
2989         struct cifs_io_parms io_parms;
2990         int buf_type = CIFS_NO_BUFFER;
2991         __u32 pid;
2992
2993         xid = get_xid();
2994         cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
2995
2996         /* FIXME: set up handlers for larger reads and/or convert to async */
2997         rsize = min_t(unsigned int, cifs_sb->rsize, CIFSMaxBufSize);
2998
2999         if (file->private_data == NULL) {
3000                 rc = -EBADF;
3001                 free_xid(xid);
3002                 return rc;
3003         }
3004         open_file = file->private_data;
3005         tcon = tlink_tcon(open_file->tlink);
3006         server = tcon->ses->server;
3007
3008         if (!server->ops->sync_read) {
3009                 free_xid(xid);
3010                 return -ENOSYS;
3011         }
3012
3013         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
3014                 pid = open_file->pid;
3015         else
3016                 pid = current->tgid;
3017
3018         if ((file->f_flags & O_ACCMODE) == O_WRONLY)
3019                 cifs_dbg(FYI, "attempting read on write only file instance\n");
3020
3021         for (total_read = 0, cur_offset = read_data; read_size > total_read;
3022              total_read += bytes_read, cur_offset += bytes_read) {
3023                 current_read_size = min_t(uint, read_size - total_read, rsize);
3024                 /*
3025                  * For windows me and 9x we do not want to request more than it
3026                  * negotiated since it will refuse the read then.
3027                  */
3028                 if ((tcon->ses) && !(tcon->ses->capabilities &
3029                                 tcon->ses->server->vals->cap_large_files)) {
3030                         current_read_size = min_t(uint, current_read_size,
3031                                         CIFSMaxBufSize);
3032                 }
3033                 rc = -EAGAIN;
3034                 while (rc == -EAGAIN) {
3035                         if (open_file->invalidHandle) {
3036                                 rc = cifs_reopen_file(open_file, true);
3037                                 if (rc != 0)
3038                                         break;
3039                         }
3040                         io_parms.pid = pid;
3041                         io_parms.tcon = tcon;
3042                         io_parms.offset = *offset;
3043                         io_parms.length = current_read_size;
3044                         rc = server->ops->sync_read(xid, open_file, &io_parms,
3045                                                     &bytes_read, &cur_offset,
3046                                                     &buf_type);
3047                 }
3048                 if (rc || (bytes_read == 0)) {
3049                         if (total_read) {
3050                                 break;
3051                         } else {
3052                                 free_xid(xid);
3053                                 return rc;
3054                         }
3055                 } else {
3056                         cifs_stats_bytes_read(tcon, total_read);
3057                         *offset += bytes_read;
3058                 }
3059         }
3060         free_xid(xid);
3061         return total_read;
3062 }
3063
3064 /*
3065  * If the page is mmap'ed into a process' page tables, then we need to make
3066  * sure that it doesn't change while being written back.
3067  */
3068 static int
3069 cifs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
3070 {
3071         struct page *page = vmf->page;
3072
3073         lock_page(page);
3074         return VM_FAULT_LOCKED;
3075 }
3076
3077 static struct vm_operations_struct cifs_file_vm_ops = {
3078         .fault = filemap_fault,
3079         .page_mkwrite = cifs_page_mkwrite,
3080         .remap_pages = generic_file_remap_pages,
3081 };
3082
3083 int cifs_file_strict_mmap(struct file *file, struct vm_area_struct *vma)
3084 {
3085         int rc, xid;
3086         struct inode *inode = file_inode(file);
3087
3088         xid = get_xid();
3089
3090         if (!CIFS_I(inode)->clientCanCacheRead) {
3091                 rc = cifs_invalidate_mapping(inode);
3092                 if (rc)
3093                         return rc;
3094         }
3095
3096         rc = generic_file_mmap(file, vma);
3097         if (rc == 0)
3098                 vma->vm_ops = &cifs_file_vm_ops;
3099         free_xid(xid);
3100         return rc;
3101 }
3102
3103 int cifs_file_mmap(struct file *file, struct vm_area_struct *vma)
3104 {
3105         int rc, xid;
3106
3107         xid = get_xid();
3108         rc = cifs_revalidate_file(file);
3109         if (rc) {
3110                 cifs_dbg(FYI, "Validation prior to mmap failed, error=%d\n",
3111                          rc);
3112                 free_xid(xid);
3113                 return rc;
3114         }
3115         rc = generic_file_mmap(file, vma);
3116         if (rc == 0)
3117                 vma->vm_ops = &cifs_file_vm_ops;
3118         free_xid(xid);
3119         return rc;
3120 }
3121
3122 static void
3123 cifs_readv_complete(struct work_struct *work)
3124 {
3125         unsigned int i;
3126         struct cifs_readdata *rdata = container_of(work,
3127                                                 struct cifs_readdata, work);
3128
3129         for (i = 0; i < rdata->nr_pages; i++) {
3130                 struct page *page = rdata->pages[i];
3131
3132                 lru_cache_add_file(page);
3133
3134                 if (rdata->result == 0) {
3135                         flush_dcache_page(page);
3136                         SetPageUptodate(page);
3137                 }
3138
3139                 unlock_page(page);
3140
3141                 if (rdata->result == 0)
3142                         cifs_readpage_to_fscache(rdata->mapping->host, page);
3143
3144                 page_cache_release(page);
3145                 rdata->pages[i] = NULL;
3146         }
3147         kref_put(&rdata->refcount, cifs_readdata_release);
3148 }
3149
3150 static int
3151 cifs_readpages_read_into_pages(struct TCP_Server_Info *server,
3152                         struct cifs_readdata *rdata, unsigned int len)
3153 {
3154         int total_read = 0, result = 0;
3155         unsigned int i;
3156         u64 eof;
3157         pgoff_t eof_index;
3158         unsigned int nr_pages = rdata->nr_pages;
3159         struct kvec iov;
3160
3161         /* determine the eof that the server (probably) has */
3162         eof = CIFS_I(rdata->mapping->host)->server_eof;
3163         eof_index = eof ? (eof - 1) >> PAGE_CACHE_SHIFT : 0;
3164         cifs_dbg(FYI, "eof=%llu eof_index=%lu\n", eof, eof_index);
3165
3166         rdata->tailsz = PAGE_CACHE_SIZE;
3167         for (i = 0; i < nr_pages; i++) {
3168                 struct page *page = rdata->pages[i];
3169
3170                 if (len >= PAGE_CACHE_SIZE) {
3171                         /* enough data to fill the page */
3172                         iov.iov_base = kmap(page);
3173                         iov.iov_len = PAGE_CACHE_SIZE;
3174                         cifs_dbg(FYI, "%u: idx=%lu iov_base=%p iov_len=%zu\n",
3175                                  i, page->index, iov.iov_base, iov.iov_len);
3176                         len -= PAGE_CACHE_SIZE;
3177                 } else if (len > 0) {
3178                         /* enough for partial page, fill and zero the rest */
3179                         iov.iov_base = kmap(page);
3180                         iov.iov_len = len;
3181                         cifs_dbg(FYI, "%u: idx=%lu iov_base=%p iov_len=%zu\n",
3182                                  i, page->index, iov.iov_base, iov.iov_len);
3183                         memset(iov.iov_base + len,
3184                                 '\0', PAGE_CACHE_SIZE - len);
3185                         rdata->tailsz = len;
3186                         len = 0;
3187                 } else if (page->index > eof_index) {
3188                         /*
3189                          * The VFS will not try to do readahead past the
3190                          * i_size, but it's possible that we have outstanding
3191                          * writes with gaps in the middle and the i_size hasn't
3192                          * caught up yet. Populate those with zeroed out pages
3193                          * to prevent the VFS from repeatedly attempting to
3194                          * fill them until the writes are flushed.
3195                          */
3196                         zero_user(page, 0, PAGE_CACHE_SIZE);
3197                         lru_cache_add_file(page);
3198                         flush_dcache_page(page);
3199                         SetPageUptodate(page);
3200                         unlock_page(page);
3201                         page_cache_release(page);
3202                         rdata->pages[i] = NULL;
3203                         rdata->nr_pages--;
3204                         continue;
3205                 } else {
3206                         /* no need to hold page hostage */
3207                         lru_cache_add_file(page);
3208                         unlock_page(page);
3209                         page_cache_release(page);
3210                         rdata->pages[i] = NULL;
3211                         rdata->nr_pages--;
3212                         continue;
3213                 }
3214
3215                 result = cifs_readv_from_socket(server, &iov, 1, iov.iov_len);
3216                 kunmap(page);
3217                 if (result < 0)
3218                         break;
3219
3220                 total_read += result;
3221         }
3222
3223         return total_read > 0 ? total_read : result;
3224 }
3225
3226 static int cifs_readpages(struct file *file, struct address_space *mapping,
3227         struct list_head *page_list, unsigned num_pages)
3228 {
3229         int rc;
3230         struct list_head tmplist;
3231         struct cifsFileInfo *open_file = file->private_data;
3232         struct cifs_sb_info *cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
3233         unsigned int rsize = cifs_sb->rsize;
3234         pid_t pid;
3235
3236         /*
3237          * Give up immediately if rsize is too small to read an entire page.
3238          * The VFS will fall back to readpage. We should never reach this
3239          * point however since we set ra_pages to 0 when the rsize is smaller
3240          * than a cache page.
3241          */
3242         if (unlikely(rsize < PAGE_CACHE_SIZE))
3243                 return 0;
3244
3245         /*
3246          * Reads as many pages as possible from fscache. Returns -ENOBUFS
3247          * immediately if the cookie is negative
3248          */
3249         rc = cifs_readpages_from_fscache(mapping->host, mapping, page_list,
3250                                          &num_pages);
3251         if (rc == 0)
3252                 return rc;
3253
3254         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
3255                 pid = open_file->pid;
3256         else
3257                 pid = current->tgid;
3258
3259         rc = 0;
3260         INIT_LIST_HEAD(&tmplist);
3261
3262         cifs_dbg(FYI, "%s: file=%p mapping=%p num_pages=%u\n",
3263                  __func__, file, mapping, num_pages);
3264
3265         /*
3266          * Start with the page at end of list and move it to private
3267          * list. Do the same with any following pages until we hit
3268          * the rsize limit, hit an index discontinuity, or run out of
3269          * pages. Issue the async read and then start the loop again
3270          * until the list is empty.
3271          *
3272          * Note that list order is important. The page_list is in
3273          * the order of declining indexes. When we put the pages in
3274          * the rdata->pages, then we want them in increasing order.
3275          */
3276         while (!list_empty(page_list)) {
3277                 unsigned int i;
3278                 unsigned int bytes = PAGE_CACHE_SIZE;
3279                 unsigned int expected_index;
3280                 unsigned int nr_pages = 1;
3281                 loff_t offset;
3282                 struct page *page, *tpage;
3283                 struct cifs_readdata *rdata;
3284
3285                 page = list_entry(page_list->prev, struct page, lru);
3286
3287                 /*
3288                  * Lock the page and put it in the cache. Since no one else
3289                  * should have access to this page, we're safe to simply set
3290                  * PG_locked without checking it first.
3291                  */
3292                 __set_page_locked(page);
3293                 rc = add_to_page_cache_locked(page, mapping,
3294                                               page->index, GFP_KERNEL);
3295
3296                 /* give up if we can't stick it in the cache */
3297                 if (rc) {
3298                         __clear_page_locked(page);
3299                         break;
3300                 }
3301
3302                 /* move first page to the tmplist */
3303                 offset = (loff_t)page->index << PAGE_CACHE_SHIFT;
3304                 list_move_tail(&page->lru, &tmplist);
3305
3306                 /* now try and add more pages onto the request */
3307                 expected_index = page->index + 1;
3308                 list_for_each_entry_safe_reverse(page, tpage, page_list, lru) {
3309                         /* discontinuity ? */
3310                         if (page->index != expected_index)
3311                                 break;
3312
3313                         /* would this page push the read over the rsize? */
3314                         if (bytes + PAGE_CACHE_SIZE > rsize)
3315                                 break;
3316
3317                         __set_page_locked(page);
3318                         if (add_to_page_cache_locked(page, mapping,
3319                                                 page->index, GFP_KERNEL)) {
3320                                 __clear_page_locked(page);
3321                                 break;
3322                         }
3323                         list_move_tail(&page->lru, &tmplist);
3324                         bytes += PAGE_CACHE_SIZE;
3325                         expected_index++;
3326                         nr_pages++;
3327                 }
3328
3329                 rdata = cifs_readdata_alloc(nr_pages, cifs_readv_complete);
3330                 if (!rdata) {
3331                         /* best to give up if we're out of mem */
3332                         list_for_each_entry_safe(page, tpage, &tmplist, lru) {
3333                                 list_del(&page->lru);
3334                                 lru_cache_add_file(page);
3335                                 unlock_page(page);
3336                                 page_cache_release(page);
3337                         }
3338                         rc = -ENOMEM;
3339                         break;
3340                 }
3341
3342                 rdata->cfile = cifsFileInfo_get(open_file);
3343                 rdata->mapping = mapping;
3344                 rdata->offset = offset;
3345                 rdata->bytes = bytes;
3346                 rdata->pid = pid;
3347                 rdata->pagesz = PAGE_CACHE_SIZE;
3348                 rdata->read_into_pages = cifs_readpages_read_into_pages;
3349
3350                 list_for_each_entry_safe(page, tpage, &tmplist, lru) {
3351                         list_del(&page->lru);
3352                         rdata->pages[rdata->nr_pages++] = page;
3353                 }
3354
3355                 rc = cifs_retry_async_readv(rdata);
3356                 if (rc != 0) {
3357                         for (i = 0; i < rdata->nr_pages; i++) {
3358                                 page = rdata->pages[i];
3359                                 lru_cache_add_file(page);
3360                                 unlock_page(page);
3361                                 page_cache_release(page);
3362                         }
3363                         kref_put(&rdata->refcount, cifs_readdata_release);
3364                         break;
3365                 }
3366
3367                 kref_put(&rdata->refcount, cifs_readdata_release);
3368         }
3369
3370         return rc;
3371 }
3372
3373 static int cifs_readpage_worker(struct file *file, struct page *page,
3374         loff_t *poffset)
3375 {
3376         char *read_data;
3377         int rc;
3378
3379         /* Is the page cached? */
3380         rc = cifs_readpage_from_fscache(file_inode(file), page);
3381         if (rc == 0)
3382                 goto read_complete;
3383
3384         page_cache_get(page);
3385         read_data = kmap(page);
3386         /* for reads over a certain size could initiate async read ahead */
3387
3388         rc = cifs_read(file, read_data, PAGE_CACHE_SIZE, poffset);
3389
3390         if (rc < 0)
3391                 goto io_error;
3392         else
3393                 cifs_dbg(FYI, "Bytes read %d\n", rc);
3394
3395         file_inode(file)->i_atime =
3396                 current_fs_time(file_inode(file)->i_sb);
3397
3398         if (PAGE_CACHE_SIZE > rc)
3399                 memset(read_data + rc, 0, PAGE_CACHE_SIZE - rc);
3400
3401         flush_dcache_page(page);
3402         SetPageUptodate(page);
3403
3404         /* send this page to the cache */
3405         cifs_readpage_to_fscache(file_inode(file), page);
3406
3407         rc = 0;
3408
3409 io_error:
3410         kunmap(page);
3411         page_cache_release(page);
3412
3413 read_complete:
3414         return rc;
3415 }
3416
3417 static int cifs_readpage(struct file *file, struct page *page)
3418 {
3419         loff_t offset = (loff_t)page->index << PAGE_CACHE_SHIFT;
3420         int rc = -EACCES;
3421         unsigned int xid;
3422
3423         xid = get_xid();
3424
3425         if (file->private_data == NULL) {
3426                 rc = -EBADF;
3427                 free_xid(xid);
3428                 return rc;
3429         }
3430
3431         cifs_dbg(FYI, "readpage %p at offset %d 0x%x\n",
3432                  page, (int)offset, (int)offset);
3433
3434         rc = cifs_readpage_worker(file, page, &offset);
3435
3436         unlock_page(page);
3437
3438         free_xid(xid);
3439         return rc;
3440 }
3441
3442 static int is_inode_writable(struct cifsInodeInfo *cifs_inode)
3443 {
3444         struct cifsFileInfo *open_file;
3445
3446         spin_lock(&cifs_file_list_lock);
3447         list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
3448                 if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) {
3449                         spin_unlock(&cifs_file_list_lock);
3450                         return 1;
3451                 }
3452         }
3453         spin_unlock(&cifs_file_list_lock);
3454         return 0;
3455 }
3456
3457 /* We do not want to update the file size from server for inodes
3458    open for write - to avoid races with writepage extending
3459    the file - in the future we could consider allowing
3460    refreshing the inode only on increases in the file size
3461    but this is tricky to do without racing with writebehind
3462    page caching in the current Linux kernel design */
3463 bool is_size_safe_to_change(struct cifsInodeInfo *cifsInode, __u64 end_of_file)
3464 {
3465         if (!cifsInode)
3466                 return true;
3467
3468         if (is_inode_writable(cifsInode)) {
3469                 /* This inode is open for write at least once */
3470                 struct cifs_sb_info *cifs_sb;
3471
3472                 cifs_sb = CIFS_SB(cifsInode->vfs_inode.i_sb);
3473                 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DIRECT_IO) {
3474                         /* since no page cache to corrupt on directio
3475                         we can change size safely */
3476                         return true;
3477                 }
3478
3479                 if (i_size_read(&cifsInode->vfs_inode) < end_of_file)
3480                         return true;
3481
3482                 return false;
3483         } else
3484                 return true;
3485 }
3486
3487 static int cifs_write_begin(struct file *file, struct address_space *mapping,
3488                         loff_t pos, unsigned len, unsigned flags,
3489                         struct page **pagep, void **fsdata)
3490 {
3491         pgoff_t index = pos >> PAGE_CACHE_SHIFT;
3492         loff_t offset = pos & (PAGE_CACHE_SIZE - 1);
3493         loff_t page_start = pos & PAGE_MASK;
3494         loff_t i_size;
3495         struct page *page;
3496         int rc = 0;
3497
3498         cifs_dbg(FYI, "write_begin from %lld len %d\n", (long long)pos, len);
3499
3500         page = grab_cache_page_write_begin(mapping, index, flags);
3501         if (!page) {
3502                 rc = -ENOMEM;
3503                 goto out;
3504         }
3505
3506         if (PageUptodate(page))
3507                 goto out;
3508
3509         /*
3510          * If we write a full page it will be up to date, no need to read from
3511          * the server. If the write is short, we'll end up doing a sync write
3512          * instead.
3513          */
3514         if (len == PAGE_CACHE_SIZE)
3515                 goto out;
3516
3517         /*
3518          * optimize away the read when we have an oplock, and we're not
3519          * expecting to use any of the data we'd be reading in. That
3520          * is, when the page lies beyond the EOF, or straddles the EOF
3521          * and the write will cover all of the existing data.
3522          */
3523         if (CIFS_I(mapping->host)->clientCanCacheRead) {
3524                 i_size = i_size_read(mapping->host);
3525                 if (page_start >= i_size ||
3526                     (offset == 0 && (pos + len) >= i_size)) {
3527                         zero_user_segments(page, 0, offset,
3528                                            offset + len,
3529                                            PAGE_CACHE_SIZE);
3530                         /*
3531                          * PageChecked means that the parts of the page
3532                          * to which we're not writing are considered up
3533                          * to date. Once the data is copied to the
3534                          * page, it can be set uptodate.
3535                          */
3536                         SetPageChecked(page);
3537                         goto out;
3538                 }
3539         }
3540
3541         if ((file->f_flags & O_ACCMODE) != O_WRONLY) {
3542                 /*
3543                  * might as well read a page, it is fast enough. If we get
3544                  * an error, we don't need to return it. cifs_write_end will
3545                  * do a sync write instead since PG_uptodate isn't set.
3546                  */
3547                 cifs_readpage_worker(file, page, &page_start);
3548         } else {
3549                 /* we could try using another file handle if there is one -
3550                    but how would we lock it to prevent close of that handle
3551                    racing with this read? In any case
3552                    this will be written out by write_end so is fine */
3553         }
3554 out:
3555         *pagep = page;
3556         return rc;
3557 }
3558
3559 static int cifs_release_page(struct page *page, gfp_t gfp)
3560 {
3561         if (PagePrivate(page))
3562                 return 0;
3563
3564         return cifs_fscache_release_page(page, gfp);
3565 }
3566
3567 static void cifs_invalidate_page(struct page *page, unsigned int offset,
3568                                  unsigned int length)
3569 {
3570         struct cifsInodeInfo *cifsi = CIFS_I(page->mapping->host);
3571
3572         if (offset == 0 && length == PAGE_CACHE_SIZE)
3573                 cifs_fscache_invalidate_page(page, &cifsi->vfs_inode);
3574 }
3575
3576 static int cifs_launder_page(struct page *page)
3577 {
3578         int rc = 0;
3579         loff_t range_start = page_offset(page);
3580         loff_t range_end = range_start + (loff_t)(PAGE_CACHE_SIZE - 1);
3581         struct writeback_control wbc = {
3582                 .sync_mode = WB_SYNC_ALL,
3583                 .nr_to_write = 0,
3584                 .range_start = range_start,
3585                 .range_end = range_end,
3586         };
3587
3588         cifs_dbg(FYI, "Launder page: %p\n", page);
3589
3590         if (clear_page_dirty_for_io(page))
3591                 rc = cifs_writepage_locked(page, &wbc);
3592
3593         cifs_fscache_invalidate_page(page, page->mapping->host);
3594         return rc;
3595 }
3596
3597 void cifs_oplock_break(struct work_struct *work)
3598 {
3599         struct cifsFileInfo *cfile = container_of(work, struct cifsFileInfo,
3600                                                   oplock_break);
3601         struct inode *inode = cfile->dentry->d_inode;
3602         struct cifsInodeInfo *cinode = CIFS_I(inode);
3603         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
3604         int rc = 0;
3605
3606         if (!cinode->clientCanCacheAll && cinode->clientCanCacheRead &&
3607                                                 cifs_has_mand_locks(cinode)) {
3608                 cifs_dbg(FYI, "Reset oplock to None for inode=%p due to mand locks\n",
3609                          inode);
3610                 cinode->clientCanCacheRead = false;
3611         }
3612
3613         if (inode && S_ISREG(inode->i_mode)) {
3614                 if (cinode->clientCanCacheRead)
3615                         break_lease(inode, O_RDONLY);
3616                 else
3617                         break_lease(inode, O_WRONLY);
3618                 rc = filemap_fdatawrite(inode->i_mapping);
3619                 if (cinode->clientCanCacheRead == 0) {
3620                         rc = filemap_fdatawait(inode->i_mapping);
3621                         mapping_set_error(inode->i_mapping, rc);
3622                         cifs_invalidate_mapping(inode);
3623                 }
3624                 cifs_dbg(FYI, "Oplock flush inode %p rc %d\n", inode, rc);
3625         }
3626
3627         rc = cifs_push_locks(cfile);
3628         if (rc)
3629                 cifs_dbg(VFS, "Push locks rc = %d\n", rc);
3630
3631         /*
3632          * releasing stale oplock after recent reconnect of smb session using
3633          * a now incorrect file handle is not a data integrity issue but do
3634          * not bother sending an oplock release if session to server still is
3635          * disconnected since oplock already released by the server
3636          */
3637         if (!cfile->oplock_break_cancelled) {
3638                 rc = tcon->ses->server->ops->oplock_response(tcon, &cfile->fid,
3639                                                              cinode);
3640                 cifs_dbg(FYI, "Oplock release rc = %d\n", rc);
3641         }
3642 }
3643
3644 const struct address_space_operations cifs_addr_ops = {
3645         .readpage = cifs_readpage,
3646         .readpages = cifs_readpages,
3647         .writepage = cifs_writepage,
3648         .writepages = cifs_writepages,
3649         .write_begin = cifs_write_begin,
3650         .write_end = cifs_write_end,
3651         .set_page_dirty = __set_page_dirty_nobuffers,
3652         .releasepage = cifs_release_page,
3653         .invalidatepage = cifs_invalidate_page,
3654         .launder_page = cifs_launder_page,
3655 };
3656
3657 /*
3658  * cifs_readpages requires the server to support a buffer large enough to
3659  * contain the header plus one complete page of data.  Otherwise, we need
3660  * to leave cifs_readpages out of the address space operations.
3661  */
3662 const struct address_space_operations cifs_addr_ops_smallbuf = {
3663         .readpage = cifs_readpage,
3664         .writepage = cifs_writepage,
3665         .writepages = cifs_writepages,
3666         .write_begin = cifs_write_begin,
3667         .write_end = cifs_write_end,
3668         .set_page_dirty = __set_page_dirty_nobuffers,
3669         .releasepage = cifs_release_page,
3670         .invalidatepage = cifs_invalidate_page,
3671         .launder_page = cifs_launder_page,
3672 };