Merge tag 'for-linus' of git://git.armlinux.org.uk/~rmk/linux-arm
[platform/kernel/linux-rpi.git] / fs / cifs / file.c
1 // SPDX-License-Identifier: LGPL-2.1
2 /*
3  *
4  *   vfs operations that deal with files
5  *
6  *   Copyright (C) International Business Machines  Corp., 2002,2010
7  *   Author(s): Steve French (sfrench@us.ibm.com)
8  *              Jeremy Allison (jra@samba.org)
9  *
10  */
11 #include <linux/fs.h>
12 #include <linux/backing-dev.h>
13 #include <linux/stat.h>
14 #include <linux/fcntl.h>
15 #include <linux/pagemap.h>
16 #include <linux/pagevec.h>
17 #include <linux/writeback.h>
18 #include <linux/task_io_accounting_ops.h>
19 #include <linux/delay.h>
20 #include <linux/mount.h>
21 #include <linux/slab.h>
22 #include <linux/swap.h>
23 #include <linux/mm.h>
24 #include <asm/div64.h>
25 #include "cifsfs.h"
26 #include "cifspdu.h"
27 #include "cifsglob.h"
28 #include "cifsproto.h"
29 #include "cifs_unicode.h"
30 #include "cifs_debug.h"
31 #include "cifs_fs_sb.h"
32 #include "fscache.h"
33 #include "smbdirect.h"
34 #include "fs_context.h"
35 #include "cifs_ioctl.h"
36
37 static inline int cifs_convert_flags(unsigned int flags)
38 {
39         if ((flags & O_ACCMODE) == O_RDONLY)
40                 return GENERIC_READ;
41         else if ((flags & O_ACCMODE) == O_WRONLY)
42                 return GENERIC_WRITE;
43         else if ((flags & O_ACCMODE) == O_RDWR) {
44                 /* GENERIC_ALL is too much permission to request
45                    can cause unnecessary access denied on create */
46                 /* return GENERIC_ALL; */
47                 return (GENERIC_READ | GENERIC_WRITE);
48         }
49
50         return (READ_CONTROL | FILE_WRITE_ATTRIBUTES | FILE_READ_ATTRIBUTES |
51                 FILE_WRITE_EA | FILE_APPEND_DATA | FILE_WRITE_DATA |
52                 FILE_READ_DATA);
53 }
54
55 static u32 cifs_posix_convert_flags(unsigned int flags)
56 {
57         u32 posix_flags = 0;
58
59         if ((flags & O_ACCMODE) == O_RDONLY)
60                 posix_flags = SMB_O_RDONLY;
61         else if ((flags & O_ACCMODE) == O_WRONLY)
62                 posix_flags = SMB_O_WRONLY;
63         else if ((flags & O_ACCMODE) == O_RDWR)
64                 posix_flags = SMB_O_RDWR;
65
66         if (flags & O_CREAT) {
67                 posix_flags |= SMB_O_CREAT;
68                 if (flags & O_EXCL)
69                         posix_flags |= SMB_O_EXCL;
70         } else if (flags & O_EXCL)
71                 cifs_dbg(FYI, "Application %s pid %d has incorrectly set O_EXCL flag but not O_CREAT on file open. Ignoring O_EXCL\n",
72                          current->comm, current->tgid);
73
74         if (flags & O_TRUNC)
75                 posix_flags |= SMB_O_TRUNC;
76         /* be safe and imply O_SYNC for O_DSYNC */
77         if (flags & O_DSYNC)
78                 posix_flags |= SMB_O_SYNC;
79         if (flags & O_DIRECTORY)
80                 posix_flags |= SMB_O_DIRECTORY;
81         if (flags & O_NOFOLLOW)
82                 posix_flags |= SMB_O_NOFOLLOW;
83         if (flags & O_DIRECT)
84                 posix_flags |= SMB_O_DIRECT;
85
86         return posix_flags;
87 }
88
89 static inline int cifs_get_disposition(unsigned int flags)
90 {
91         if ((flags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL))
92                 return FILE_CREATE;
93         else if ((flags & (O_CREAT | O_TRUNC)) == (O_CREAT | O_TRUNC))
94                 return FILE_OVERWRITE_IF;
95         else if ((flags & O_CREAT) == O_CREAT)
96                 return FILE_OPEN_IF;
97         else if ((flags & O_TRUNC) == O_TRUNC)
98                 return FILE_OVERWRITE;
99         else
100                 return FILE_OPEN;
101 }
102
103 int cifs_posix_open(const char *full_path, struct inode **pinode,
104                         struct super_block *sb, int mode, unsigned int f_flags,
105                         __u32 *poplock, __u16 *pnetfid, unsigned int xid)
106 {
107         int rc;
108         FILE_UNIX_BASIC_INFO *presp_data;
109         __u32 posix_flags = 0;
110         struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
111         struct cifs_fattr fattr;
112         struct tcon_link *tlink;
113         struct cifs_tcon *tcon;
114
115         cifs_dbg(FYI, "posix open %s\n", full_path);
116
117         presp_data = kzalloc(sizeof(FILE_UNIX_BASIC_INFO), GFP_KERNEL);
118         if (presp_data == NULL)
119                 return -ENOMEM;
120
121         tlink = cifs_sb_tlink(cifs_sb);
122         if (IS_ERR(tlink)) {
123                 rc = PTR_ERR(tlink);
124                 goto posix_open_ret;
125         }
126
127         tcon = tlink_tcon(tlink);
128         mode &= ~current_umask();
129
130         posix_flags = cifs_posix_convert_flags(f_flags);
131         rc = CIFSPOSIXCreate(xid, tcon, posix_flags, mode, pnetfid, presp_data,
132                              poplock, full_path, cifs_sb->local_nls,
133                              cifs_remap(cifs_sb));
134         cifs_put_tlink(tlink);
135
136         if (rc)
137                 goto posix_open_ret;
138
139         if (presp_data->Type == cpu_to_le32(-1))
140                 goto posix_open_ret; /* open ok, caller does qpathinfo */
141
142         if (!pinode)
143                 goto posix_open_ret; /* caller does not need info */
144
145         cifs_unix_basic_to_fattr(&fattr, presp_data, cifs_sb);
146
147         /* get new inode and set it up */
148         if (*pinode == NULL) {
149                 cifs_fill_uniqueid(sb, &fattr);
150                 *pinode = cifs_iget(sb, &fattr);
151                 if (!*pinode) {
152                         rc = -ENOMEM;
153                         goto posix_open_ret;
154                 }
155         } else {
156                 cifs_revalidate_mapping(*pinode);
157                 rc = cifs_fattr_to_inode(*pinode, &fattr);
158         }
159
160 posix_open_ret:
161         kfree(presp_data);
162         return rc;
163 }
164
165 static int
166 cifs_nt_open(const char *full_path, struct inode *inode, struct cifs_sb_info *cifs_sb,
167              struct cifs_tcon *tcon, unsigned int f_flags, __u32 *oplock,
168              struct cifs_fid *fid, unsigned int xid)
169 {
170         int rc;
171         int desired_access;
172         int disposition;
173         int create_options = CREATE_NOT_DIR;
174         FILE_ALL_INFO *buf;
175         struct TCP_Server_Info *server = tcon->ses->server;
176         struct cifs_open_parms oparms;
177
178         if (!server->ops->open)
179                 return -ENOSYS;
180
181         desired_access = cifs_convert_flags(f_flags);
182
183 /*********************************************************************
184  *  open flag mapping table:
185  *
186  *      POSIX Flag            CIFS Disposition
187  *      ----------            ----------------
188  *      O_CREAT               FILE_OPEN_IF
189  *      O_CREAT | O_EXCL      FILE_CREATE
190  *      O_CREAT | O_TRUNC     FILE_OVERWRITE_IF
191  *      O_TRUNC               FILE_OVERWRITE
192  *      none of the above     FILE_OPEN
193  *
194  *      Note that there is not a direct match between disposition
195  *      FILE_SUPERSEDE (ie create whether or not file exists although
196  *      O_CREAT | O_TRUNC is similar but truncates the existing
197  *      file rather than creating a new file as FILE_SUPERSEDE does
198  *      (which uses the attributes / metadata passed in on open call)
199  *?
200  *?  O_SYNC is a reasonable match to CIFS writethrough flag
201  *?  and the read write flags match reasonably.  O_LARGEFILE
202  *?  is irrelevant because largefile support is always used
203  *?  by this client. Flags O_APPEND, O_DIRECT, O_DIRECTORY,
204  *       O_FASYNC, O_NOFOLLOW, O_NONBLOCK need further investigation
205  *********************************************************************/
206
207         disposition = cifs_get_disposition(f_flags);
208
209         /* BB pass O_SYNC flag through on file attributes .. BB */
210
211         buf = kmalloc(sizeof(FILE_ALL_INFO), GFP_KERNEL);
212         if (!buf)
213                 return -ENOMEM;
214
215         /* O_SYNC also has bit for O_DSYNC so following check picks up either */
216         if (f_flags & O_SYNC)
217                 create_options |= CREATE_WRITE_THROUGH;
218
219         if (f_flags & O_DIRECT)
220                 create_options |= CREATE_NO_BUFFER;
221
222         oparms.tcon = tcon;
223         oparms.cifs_sb = cifs_sb;
224         oparms.desired_access = desired_access;
225         oparms.create_options = cifs_create_options(cifs_sb, create_options);
226         oparms.disposition = disposition;
227         oparms.path = full_path;
228         oparms.fid = fid;
229         oparms.reconnect = false;
230
231         rc = server->ops->open(xid, &oparms, oplock, buf);
232
233         if (rc)
234                 goto out;
235
236         /* TODO: Add support for calling posix query info but with passing in fid */
237         if (tcon->unix_ext)
238                 rc = cifs_get_inode_info_unix(&inode, full_path, inode->i_sb,
239                                               xid);
240         else
241                 rc = cifs_get_inode_info(&inode, full_path, buf, inode->i_sb,
242                                          xid, fid);
243
244         if (rc) {
245                 server->ops->close(xid, tcon, fid);
246                 if (rc == -ESTALE)
247                         rc = -EOPENSTALE;
248         }
249
250 out:
251         kfree(buf);
252         return rc;
253 }
254
255 static bool
256 cifs_has_mand_locks(struct cifsInodeInfo *cinode)
257 {
258         struct cifs_fid_locks *cur;
259         bool has_locks = false;
260
261         down_read(&cinode->lock_sem);
262         list_for_each_entry(cur, &cinode->llist, llist) {
263                 if (!list_empty(&cur->locks)) {
264                         has_locks = true;
265                         break;
266                 }
267         }
268         up_read(&cinode->lock_sem);
269         return has_locks;
270 }
271
272 void
273 cifs_down_write(struct rw_semaphore *sem)
274 {
275         while (!down_write_trylock(sem))
276                 msleep(10);
277 }
278
279 static void cifsFileInfo_put_work(struct work_struct *work);
280
281 struct cifsFileInfo *
282 cifs_new_fileinfo(struct cifs_fid *fid, struct file *file,
283                   struct tcon_link *tlink, __u32 oplock)
284 {
285         struct dentry *dentry = file_dentry(file);
286         struct inode *inode = d_inode(dentry);
287         struct cifsInodeInfo *cinode = CIFS_I(inode);
288         struct cifsFileInfo *cfile;
289         struct cifs_fid_locks *fdlocks;
290         struct cifs_tcon *tcon = tlink_tcon(tlink);
291         struct TCP_Server_Info *server = tcon->ses->server;
292
293         cfile = kzalloc(sizeof(struct cifsFileInfo), GFP_KERNEL);
294         if (cfile == NULL)
295                 return cfile;
296
297         fdlocks = kzalloc(sizeof(struct cifs_fid_locks), GFP_KERNEL);
298         if (!fdlocks) {
299                 kfree(cfile);
300                 return NULL;
301         }
302
303         INIT_LIST_HEAD(&fdlocks->locks);
304         fdlocks->cfile = cfile;
305         cfile->llist = fdlocks;
306
307         cfile->count = 1;
308         cfile->pid = current->tgid;
309         cfile->uid = current_fsuid();
310         cfile->dentry = dget(dentry);
311         cfile->f_flags = file->f_flags;
312         cfile->invalidHandle = false;
313         cfile->deferred_close_scheduled = false;
314         cfile->tlink = cifs_get_tlink(tlink);
315         INIT_WORK(&cfile->oplock_break, cifs_oplock_break);
316         INIT_WORK(&cfile->put, cifsFileInfo_put_work);
317         INIT_DELAYED_WORK(&cfile->deferred, smb2_deferred_work_close);
318         mutex_init(&cfile->fh_mutex);
319         spin_lock_init(&cfile->file_info_lock);
320
321         cifs_sb_active(inode->i_sb);
322
323         /*
324          * If the server returned a read oplock and we have mandatory brlocks,
325          * set oplock level to None.
326          */
327         if (server->ops->is_read_op(oplock) && cifs_has_mand_locks(cinode)) {
328                 cifs_dbg(FYI, "Reset oplock val from read to None due to mand locks\n");
329                 oplock = 0;
330         }
331
332         cifs_down_write(&cinode->lock_sem);
333         list_add(&fdlocks->llist, &cinode->llist);
334         up_write(&cinode->lock_sem);
335
336         spin_lock(&tcon->open_file_lock);
337         if (fid->pending_open->oplock != CIFS_OPLOCK_NO_CHANGE && oplock)
338                 oplock = fid->pending_open->oplock;
339         list_del(&fid->pending_open->olist);
340
341         fid->purge_cache = false;
342         server->ops->set_fid(cfile, fid, oplock);
343
344         list_add(&cfile->tlist, &tcon->openFileList);
345         atomic_inc(&tcon->num_local_opens);
346
347         /* if readable file instance put first in list*/
348         spin_lock(&cinode->open_file_lock);
349         if (file->f_mode & FMODE_READ)
350                 list_add(&cfile->flist, &cinode->openFileList);
351         else
352                 list_add_tail(&cfile->flist, &cinode->openFileList);
353         spin_unlock(&cinode->open_file_lock);
354         spin_unlock(&tcon->open_file_lock);
355
356         if (fid->purge_cache)
357                 cifs_zap_mapping(inode);
358
359         file->private_data = cfile;
360         return cfile;
361 }
362
363 struct cifsFileInfo *
364 cifsFileInfo_get(struct cifsFileInfo *cifs_file)
365 {
366         spin_lock(&cifs_file->file_info_lock);
367         cifsFileInfo_get_locked(cifs_file);
368         spin_unlock(&cifs_file->file_info_lock);
369         return cifs_file;
370 }
371
372 static void cifsFileInfo_put_final(struct cifsFileInfo *cifs_file)
373 {
374         struct inode *inode = d_inode(cifs_file->dentry);
375         struct cifsInodeInfo *cifsi = CIFS_I(inode);
376         struct cifsLockInfo *li, *tmp;
377         struct super_block *sb = inode->i_sb;
378
379         cifs_fscache_release_inode_cookie(inode);
380
381         /*
382          * Delete any outstanding lock records. We'll lose them when the file
383          * is closed anyway.
384          */
385         cifs_down_write(&cifsi->lock_sem);
386         list_for_each_entry_safe(li, tmp, &cifs_file->llist->locks, llist) {
387                 list_del(&li->llist);
388                 cifs_del_lock_waiters(li);
389                 kfree(li);
390         }
391         list_del(&cifs_file->llist->llist);
392         kfree(cifs_file->llist);
393         up_write(&cifsi->lock_sem);
394
395         cifs_put_tlink(cifs_file->tlink);
396         dput(cifs_file->dentry);
397         cifs_sb_deactive(sb);
398         kfree(cifs_file);
399 }
400
401 static void cifsFileInfo_put_work(struct work_struct *work)
402 {
403         struct cifsFileInfo *cifs_file = container_of(work,
404                         struct cifsFileInfo, put);
405
406         cifsFileInfo_put_final(cifs_file);
407 }
408
409 /**
410  * cifsFileInfo_put - release a reference of file priv data
411  *
412  * Always potentially wait for oplock handler. See _cifsFileInfo_put().
413  *
414  * @cifs_file:  cifs/smb3 specific info (eg refcounts) for an open file
415  */
416 void cifsFileInfo_put(struct cifsFileInfo *cifs_file)
417 {
418         _cifsFileInfo_put(cifs_file, true, true);
419 }
420
421 /**
422  * _cifsFileInfo_put - release a reference of file priv data
423  *
424  * This may involve closing the filehandle @cifs_file out on the
425  * server. Must be called without holding tcon->open_file_lock,
426  * cinode->open_file_lock and cifs_file->file_info_lock.
427  *
428  * If @wait_for_oplock_handler is true and we are releasing the last
429  * reference, wait for any running oplock break handler of the file
430  * and cancel any pending one.
431  *
432  * @cifs_file:  cifs/smb3 specific info (eg refcounts) for an open file
433  * @wait_oplock_handler: must be false if called from oplock_break_handler
434  * @offload:    not offloaded on close and oplock breaks
435  *
436  */
437 void _cifsFileInfo_put(struct cifsFileInfo *cifs_file,
438                        bool wait_oplock_handler, bool offload)
439 {
440         struct inode *inode = d_inode(cifs_file->dentry);
441         struct cifs_tcon *tcon = tlink_tcon(cifs_file->tlink);
442         struct TCP_Server_Info *server = tcon->ses->server;
443         struct cifsInodeInfo *cifsi = CIFS_I(inode);
444         struct super_block *sb = inode->i_sb;
445         struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
446         struct cifs_fid fid;
447         struct cifs_pending_open open;
448         bool oplock_break_cancelled;
449
450         spin_lock(&tcon->open_file_lock);
451         spin_lock(&cifsi->open_file_lock);
452         spin_lock(&cifs_file->file_info_lock);
453         if (--cifs_file->count > 0) {
454                 spin_unlock(&cifs_file->file_info_lock);
455                 spin_unlock(&cifsi->open_file_lock);
456                 spin_unlock(&tcon->open_file_lock);
457                 return;
458         }
459         spin_unlock(&cifs_file->file_info_lock);
460
461         if (server->ops->get_lease_key)
462                 server->ops->get_lease_key(inode, &fid);
463
464         /* store open in pending opens to make sure we don't miss lease break */
465         cifs_add_pending_open_locked(&fid, cifs_file->tlink, &open);
466
467         /* remove it from the lists */
468         list_del(&cifs_file->flist);
469         list_del(&cifs_file->tlist);
470         atomic_dec(&tcon->num_local_opens);
471
472         if (list_empty(&cifsi->openFileList)) {
473                 cifs_dbg(FYI, "closing last open instance for inode %p\n",
474                          d_inode(cifs_file->dentry));
475                 /*
476                  * In strict cache mode we need invalidate mapping on the last
477                  * close  because it may cause a error when we open this file
478                  * again and get at least level II oplock.
479                  */
480                 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO)
481                         set_bit(CIFS_INO_INVALID_MAPPING, &cifsi->flags);
482                 cifs_set_oplock_level(cifsi, 0);
483         }
484
485         spin_unlock(&cifsi->open_file_lock);
486         spin_unlock(&tcon->open_file_lock);
487
488         oplock_break_cancelled = wait_oplock_handler ?
489                 cancel_work_sync(&cifs_file->oplock_break) : false;
490
491         if (!tcon->need_reconnect && !cifs_file->invalidHandle) {
492                 struct TCP_Server_Info *server = tcon->ses->server;
493                 unsigned int xid;
494
495                 xid = get_xid();
496                 if (server->ops->close_getattr)
497                         server->ops->close_getattr(xid, tcon, cifs_file);
498                 else if (server->ops->close)
499                         server->ops->close(xid, tcon, &cifs_file->fid);
500                 _free_xid(xid);
501         }
502
503         if (oplock_break_cancelled)
504                 cifs_done_oplock_break(cifsi);
505
506         cifs_del_pending_open(&open);
507
508         if (offload)
509                 queue_work(fileinfo_put_wq, &cifs_file->put);
510         else
511                 cifsFileInfo_put_final(cifs_file);
512 }
513
514 int cifs_open(struct inode *inode, struct file *file)
515
516 {
517         int rc = -EACCES;
518         unsigned int xid;
519         __u32 oplock;
520         struct cifs_sb_info *cifs_sb;
521         struct TCP_Server_Info *server;
522         struct cifs_tcon *tcon;
523         struct tcon_link *tlink;
524         struct cifsFileInfo *cfile = NULL;
525         void *page;
526         const char *full_path;
527         bool posix_open_ok = false;
528         struct cifs_fid fid;
529         struct cifs_pending_open open;
530
531         xid = get_xid();
532
533         cifs_sb = CIFS_SB(inode->i_sb);
534         if (unlikely(cifs_forced_shutdown(cifs_sb))) {
535                 free_xid(xid);
536                 return -EIO;
537         }
538
539         tlink = cifs_sb_tlink(cifs_sb);
540         if (IS_ERR(tlink)) {
541                 free_xid(xid);
542                 return PTR_ERR(tlink);
543         }
544         tcon = tlink_tcon(tlink);
545         server = tcon->ses->server;
546
547         page = alloc_dentry_path();
548         full_path = build_path_from_dentry(file_dentry(file), page);
549         if (IS_ERR(full_path)) {
550                 rc = PTR_ERR(full_path);
551                 goto out;
552         }
553
554         cifs_dbg(FYI, "inode = 0x%p file flags are 0x%x for %s\n",
555                  inode, file->f_flags, full_path);
556
557         if (file->f_flags & O_DIRECT &&
558             cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO) {
559                 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_BRL)
560                         file->f_op = &cifs_file_direct_nobrl_ops;
561                 else
562                         file->f_op = &cifs_file_direct_ops;
563         }
564
565         /* Get the cached handle as SMB2 close is deferred */
566         rc = cifs_get_readable_path(tcon, full_path, &cfile);
567         if (rc == 0) {
568                 if (file->f_flags == cfile->f_flags) {
569                         file->private_data = cfile;
570                         spin_lock(&CIFS_I(inode)->deferred_lock);
571                         cifs_del_deferred_close(cfile);
572                         spin_unlock(&CIFS_I(inode)->deferred_lock);
573                         goto out;
574                 } else {
575                         _cifsFileInfo_put(cfile, true, false);
576                 }
577         }
578
579         if (server->oplocks)
580                 oplock = REQ_OPLOCK;
581         else
582                 oplock = 0;
583
584         if (!tcon->broken_posix_open && tcon->unix_ext &&
585             cap_unix(tcon->ses) && (CIFS_UNIX_POSIX_PATH_OPS_CAP &
586                                 le64_to_cpu(tcon->fsUnixInfo.Capability))) {
587                 /* can not refresh inode info since size could be stale */
588                 rc = cifs_posix_open(full_path, &inode, inode->i_sb,
589                                 cifs_sb->ctx->file_mode /* ignored */,
590                                 file->f_flags, &oplock, &fid.netfid, xid);
591                 if (rc == 0) {
592                         cifs_dbg(FYI, "posix open succeeded\n");
593                         posix_open_ok = true;
594                 } else if ((rc == -EINVAL) || (rc == -EOPNOTSUPP)) {
595                         if (tcon->ses->serverNOS)
596                                 cifs_dbg(VFS, "server %s of type %s returned unexpected error on SMB posix open, disabling posix open support. Check if server update available.\n",
597                                          tcon->ses->ip_addr,
598                                          tcon->ses->serverNOS);
599                         tcon->broken_posix_open = true;
600                 } else if ((rc != -EIO) && (rc != -EREMOTE) &&
601                          (rc != -EOPNOTSUPP)) /* path not found or net err */
602                         goto out;
603                 /*
604                  * Else fallthrough to retry open the old way on network i/o
605                  * or DFS errors.
606                  */
607         }
608
609         if (server->ops->get_lease_key)
610                 server->ops->get_lease_key(inode, &fid);
611
612         cifs_add_pending_open(&fid, tlink, &open);
613
614         if (!posix_open_ok) {
615                 if (server->ops->get_lease_key)
616                         server->ops->get_lease_key(inode, &fid);
617
618                 rc = cifs_nt_open(full_path, inode, cifs_sb, tcon,
619                                   file->f_flags, &oplock, &fid, xid);
620                 if (rc) {
621                         cifs_del_pending_open(&open);
622                         goto out;
623                 }
624         }
625
626         cfile = cifs_new_fileinfo(&fid, file, tlink, oplock);
627         if (cfile == NULL) {
628                 if (server->ops->close)
629                         server->ops->close(xid, tcon, &fid);
630                 cifs_del_pending_open(&open);
631                 rc = -ENOMEM;
632                 goto out;
633         }
634
635         cifs_fscache_set_inode_cookie(inode, file);
636
637         if ((oplock & CIFS_CREATE_ACTION) && !posix_open_ok && tcon->unix_ext) {
638                 /*
639                  * Time to set mode which we can not set earlier due to
640                  * problems creating new read-only files.
641                  */
642                 struct cifs_unix_set_info_args args = {
643                         .mode   = inode->i_mode,
644                         .uid    = INVALID_UID, /* no change */
645                         .gid    = INVALID_GID, /* no change */
646                         .ctime  = NO_CHANGE_64,
647                         .atime  = NO_CHANGE_64,
648                         .mtime  = NO_CHANGE_64,
649                         .device = 0,
650                 };
651                 CIFSSMBUnixSetFileInfo(xid, tcon, &args, fid.netfid,
652                                        cfile->pid);
653         }
654
655 out:
656         free_dentry_path(page);
657         free_xid(xid);
658         cifs_put_tlink(tlink);
659         return rc;
660 }
661
662 static int cifs_push_posix_locks(struct cifsFileInfo *cfile);
663
664 /*
665  * Try to reacquire byte range locks that were released when session
666  * to server was lost.
667  */
668 static int
669 cifs_relock_file(struct cifsFileInfo *cfile)
670 {
671         struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
672         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
673         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
674         int rc = 0;
675
676         down_read_nested(&cinode->lock_sem, SINGLE_DEPTH_NESTING);
677         if (cinode->can_cache_brlcks) {
678                 /* can cache locks - no need to relock */
679                 up_read(&cinode->lock_sem);
680                 return rc;
681         }
682
683         if (cap_unix(tcon->ses) &&
684             (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
685             ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
686                 rc = cifs_push_posix_locks(cfile);
687         else
688                 rc = tcon->ses->server->ops->push_mand_locks(cfile);
689
690         up_read(&cinode->lock_sem);
691         return rc;
692 }
693
694 static int
695 cifs_reopen_file(struct cifsFileInfo *cfile, bool can_flush)
696 {
697         int rc = -EACCES;
698         unsigned int xid;
699         __u32 oplock;
700         struct cifs_sb_info *cifs_sb;
701         struct cifs_tcon *tcon;
702         struct TCP_Server_Info *server;
703         struct cifsInodeInfo *cinode;
704         struct inode *inode;
705         void *page;
706         const char *full_path;
707         int desired_access;
708         int disposition = FILE_OPEN;
709         int create_options = CREATE_NOT_DIR;
710         struct cifs_open_parms oparms;
711
712         xid = get_xid();
713         mutex_lock(&cfile->fh_mutex);
714         if (!cfile->invalidHandle) {
715                 mutex_unlock(&cfile->fh_mutex);
716                 free_xid(xid);
717                 return 0;
718         }
719
720         inode = d_inode(cfile->dentry);
721         cifs_sb = CIFS_SB(inode->i_sb);
722         tcon = tlink_tcon(cfile->tlink);
723         server = tcon->ses->server;
724
725         /*
726          * Can not grab rename sem here because various ops, including those
727          * that already have the rename sem can end up causing writepage to get
728          * called and if the server was down that means we end up here, and we
729          * can never tell if the caller already has the rename_sem.
730          */
731         page = alloc_dentry_path();
732         full_path = build_path_from_dentry(cfile->dentry, page);
733         if (IS_ERR(full_path)) {
734                 mutex_unlock(&cfile->fh_mutex);
735                 free_dentry_path(page);
736                 free_xid(xid);
737                 return PTR_ERR(full_path);
738         }
739
740         cifs_dbg(FYI, "inode = 0x%p file flags 0x%x for %s\n",
741                  inode, cfile->f_flags, full_path);
742
743         if (tcon->ses->server->oplocks)
744                 oplock = REQ_OPLOCK;
745         else
746                 oplock = 0;
747
748         if (tcon->unix_ext && cap_unix(tcon->ses) &&
749             (CIFS_UNIX_POSIX_PATH_OPS_CAP &
750                                 le64_to_cpu(tcon->fsUnixInfo.Capability))) {
751                 /*
752                  * O_CREAT, O_EXCL and O_TRUNC already had their effect on the
753                  * original open. Must mask them off for a reopen.
754                  */
755                 unsigned int oflags = cfile->f_flags &
756                                                 ~(O_CREAT | O_EXCL | O_TRUNC);
757
758                 rc = cifs_posix_open(full_path, NULL, inode->i_sb,
759                                      cifs_sb->ctx->file_mode /* ignored */,
760                                      oflags, &oplock, &cfile->fid.netfid, xid);
761                 if (rc == 0) {
762                         cifs_dbg(FYI, "posix reopen succeeded\n");
763                         oparms.reconnect = true;
764                         goto reopen_success;
765                 }
766                 /*
767                  * fallthrough to retry open the old way on errors, especially
768                  * in the reconnect path it is important to retry hard
769                  */
770         }
771
772         desired_access = cifs_convert_flags(cfile->f_flags);
773
774         /* O_SYNC also has bit for O_DSYNC so following check picks up either */
775         if (cfile->f_flags & O_SYNC)
776                 create_options |= CREATE_WRITE_THROUGH;
777
778         if (cfile->f_flags & O_DIRECT)
779                 create_options |= CREATE_NO_BUFFER;
780
781         if (server->ops->get_lease_key)
782                 server->ops->get_lease_key(inode, &cfile->fid);
783
784         oparms.tcon = tcon;
785         oparms.cifs_sb = cifs_sb;
786         oparms.desired_access = desired_access;
787         oparms.create_options = cifs_create_options(cifs_sb, create_options);
788         oparms.disposition = disposition;
789         oparms.path = full_path;
790         oparms.fid = &cfile->fid;
791         oparms.reconnect = true;
792
793         /*
794          * Can not refresh inode by passing in file_info buf to be returned by
795          * ops->open and then calling get_inode_info with returned buf since
796          * file might have write behind data that needs to be flushed and server
797          * version of file size can be stale. If we knew for sure that inode was
798          * not dirty locally we could do this.
799          */
800         rc = server->ops->open(xid, &oparms, &oplock, NULL);
801         if (rc == -ENOENT && oparms.reconnect == false) {
802                 /* durable handle timeout is expired - open the file again */
803                 rc = server->ops->open(xid, &oparms, &oplock, NULL);
804                 /* indicate that we need to relock the file */
805                 oparms.reconnect = true;
806         }
807
808         if (rc) {
809                 mutex_unlock(&cfile->fh_mutex);
810                 cifs_dbg(FYI, "cifs_reopen returned 0x%x\n", rc);
811                 cifs_dbg(FYI, "oplock: %d\n", oplock);
812                 goto reopen_error_exit;
813         }
814
815 reopen_success:
816         cfile->invalidHandle = false;
817         mutex_unlock(&cfile->fh_mutex);
818         cinode = CIFS_I(inode);
819
820         if (can_flush) {
821                 rc = filemap_write_and_wait(inode->i_mapping);
822                 if (!is_interrupt_error(rc))
823                         mapping_set_error(inode->i_mapping, rc);
824
825                 if (tcon->posix_extensions)
826                         rc = smb311_posix_get_inode_info(&inode, full_path, inode->i_sb, xid);
827                 else if (tcon->unix_ext)
828                         rc = cifs_get_inode_info_unix(&inode, full_path,
829                                                       inode->i_sb, xid);
830                 else
831                         rc = cifs_get_inode_info(&inode, full_path, NULL,
832                                                  inode->i_sb, xid, NULL);
833         }
834         /*
835          * Else we are writing out data to server already and could deadlock if
836          * we tried to flush data, and since we do not know if we have data that
837          * would invalidate the current end of file on the server we can not go
838          * to the server to get the new inode info.
839          */
840
841         /*
842          * If the server returned a read oplock and we have mandatory brlocks,
843          * set oplock level to None.
844          */
845         if (server->ops->is_read_op(oplock) && cifs_has_mand_locks(cinode)) {
846                 cifs_dbg(FYI, "Reset oplock val from read to None due to mand locks\n");
847                 oplock = 0;
848         }
849
850         server->ops->set_fid(cfile, &cfile->fid, oplock);
851         if (oparms.reconnect)
852                 cifs_relock_file(cfile);
853
854 reopen_error_exit:
855         free_dentry_path(page);
856         free_xid(xid);
857         return rc;
858 }
859
860 void smb2_deferred_work_close(struct work_struct *work)
861 {
862         struct cifsFileInfo *cfile = container_of(work,
863                         struct cifsFileInfo, deferred.work);
864
865         spin_lock(&CIFS_I(d_inode(cfile->dentry))->deferred_lock);
866         cifs_del_deferred_close(cfile);
867         cfile->deferred_close_scheduled = false;
868         spin_unlock(&CIFS_I(d_inode(cfile->dentry))->deferred_lock);
869         _cifsFileInfo_put(cfile, true, false);
870 }
871
872 int cifs_close(struct inode *inode, struct file *file)
873 {
874         struct cifsFileInfo *cfile;
875         struct cifsInodeInfo *cinode = CIFS_I(inode);
876         struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
877         struct cifs_deferred_close *dclose;
878
879         if (file->private_data != NULL) {
880                 cfile = file->private_data;
881                 file->private_data = NULL;
882                 dclose = kmalloc(sizeof(struct cifs_deferred_close), GFP_KERNEL);
883                 if ((cinode->oplock == CIFS_CACHE_RHW_FLG) &&
884                     cinode->lease_granted &&
885                     !test_bit(CIFS_INO_CLOSE_ON_LOCK, &cinode->flags) &&
886                     dclose) {
887                         if (test_and_clear_bit(CIFS_INO_MODIFIED_ATTR, &cinode->flags)) {
888                                 inode->i_ctime = inode->i_mtime = current_time(inode);
889                                 cifs_fscache_update_inode_cookie(inode);
890                         }
891                         spin_lock(&cinode->deferred_lock);
892                         cifs_add_deferred_close(cfile, dclose);
893                         if (cfile->deferred_close_scheduled &&
894                             delayed_work_pending(&cfile->deferred)) {
895                                 /*
896                                  * If there is no pending work, mod_delayed_work queues new work.
897                                  * So, Increase the ref count to avoid use-after-free.
898                                  */
899                                 if (!mod_delayed_work(deferredclose_wq,
900                                                 &cfile->deferred, cifs_sb->ctx->acregmax))
901                                         cifsFileInfo_get(cfile);
902                         } else {
903                                 /* Deferred close for files */
904                                 queue_delayed_work(deferredclose_wq,
905                                                 &cfile->deferred, cifs_sb->ctx->acregmax);
906                                 cfile->deferred_close_scheduled = true;
907                                 spin_unlock(&cinode->deferred_lock);
908                                 return 0;
909                         }
910                         spin_unlock(&cinode->deferred_lock);
911                         _cifsFileInfo_put(cfile, true, false);
912                 } else {
913                         _cifsFileInfo_put(cfile, true, false);
914                         kfree(dclose);
915                 }
916         }
917
918         /* return code from the ->release op is always ignored */
919         return 0;
920 }
921
922 void
923 cifs_reopen_persistent_handles(struct cifs_tcon *tcon)
924 {
925         struct cifsFileInfo *open_file;
926         struct list_head *tmp;
927         struct list_head *tmp1;
928         struct list_head tmp_list;
929
930         if (!tcon->use_persistent || !tcon->need_reopen_files)
931                 return;
932
933         tcon->need_reopen_files = false;
934
935         cifs_dbg(FYI, "Reopen persistent handles\n");
936         INIT_LIST_HEAD(&tmp_list);
937
938         /* list all files open on tree connection, reopen resilient handles  */
939         spin_lock(&tcon->open_file_lock);
940         list_for_each(tmp, &tcon->openFileList) {
941                 open_file = list_entry(tmp, struct cifsFileInfo, tlist);
942                 if (!open_file->invalidHandle)
943                         continue;
944                 cifsFileInfo_get(open_file);
945                 list_add_tail(&open_file->rlist, &tmp_list);
946         }
947         spin_unlock(&tcon->open_file_lock);
948
949         list_for_each_safe(tmp, tmp1, &tmp_list) {
950                 open_file = list_entry(tmp, struct cifsFileInfo, rlist);
951                 if (cifs_reopen_file(open_file, false /* do not flush */))
952                         tcon->need_reopen_files = true;
953                 list_del_init(&open_file->rlist);
954                 cifsFileInfo_put(open_file);
955         }
956 }
957
958 int cifs_closedir(struct inode *inode, struct file *file)
959 {
960         int rc = 0;
961         unsigned int xid;
962         struct cifsFileInfo *cfile = file->private_data;
963         struct cifs_tcon *tcon;
964         struct TCP_Server_Info *server;
965         char *buf;
966
967         cifs_dbg(FYI, "Closedir inode = 0x%p\n", inode);
968
969         if (cfile == NULL)
970                 return rc;
971
972         xid = get_xid();
973         tcon = tlink_tcon(cfile->tlink);
974         server = tcon->ses->server;
975
976         cifs_dbg(FYI, "Freeing private data in close dir\n");
977         spin_lock(&cfile->file_info_lock);
978         if (server->ops->dir_needs_close(cfile)) {
979                 cfile->invalidHandle = true;
980                 spin_unlock(&cfile->file_info_lock);
981                 if (server->ops->close_dir)
982                         rc = server->ops->close_dir(xid, tcon, &cfile->fid);
983                 else
984                         rc = -ENOSYS;
985                 cifs_dbg(FYI, "Closing uncompleted readdir with rc %d\n", rc);
986                 /* not much we can do if it fails anyway, ignore rc */
987                 rc = 0;
988         } else
989                 spin_unlock(&cfile->file_info_lock);
990
991         buf = cfile->srch_inf.ntwrk_buf_start;
992         if (buf) {
993                 cifs_dbg(FYI, "closedir free smb buf in srch struct\n");
994                 cfile->srch_inf.ntwrk_buf_start = NULL;
995                 if (cfile->srch_inf.smallBuf)
996                         cifs_small_buf_release(buf);
997                 else
998                         cifs_buf_release(buf);
999         }
1000
1001         cifs_put_tlink(cfile->tlink);
1002         kfree(file->private_data);
1003         file->private_data = NULL;
1004         /* BB can we lock the filestruct while this is going on? */
1005         free_xid(xid);
1006         return rc;
1007 }
1008
1009 static struct cifsLockInfo *
1010 cifs_lock_init(__u64 offset, __u64 length, __u8 type, __u16 flags)
1011 {
1012         struct cifsLockInfo *lock =
1013                 kmalloc(sizeof(struct cifsLockInfo), GFP_KERNEL);
1014         if (!lock)
1015                 return lock;
1016         lock->offset = offset;
1017         lock->length = length;
1018         lock->type = type;
1019         lock->pid = current->tgid;
1020         lock->flags = flags;
1021         INIT_LIST_HEAD(&lock->blist);
1022         init_waitqueue_head(&lock->block_q);
1023         return lock;
1024 }
1025
1026 void
1027 cifs_del_lock_waiters(struct cifsLockInfo *lock)
1028 {
1029         struct cifsLockInfo *li, *tmp;
1030         list_for_each_entry_safe(li, tmp, &lock->blist, blist) {
1031                 list_del_init(&li->blist);
1032                 wake_up(&li->block_q);
1033         }
1034 }
1035
1036 #define CIFS_LOCK_OP    0
1037 #define CIFS_READ_OP    1
1038 #define CIFS_WRITE_OP   2
1039
1040 /* @rw_check : 0 - no op, 1 - read, 2 - write */
1041 static bool
1042 cifs_find_fid_lock_conflict(struct cifs_fid_locks *fdlocks, __u64 offset,
1043                             __u64 length, __u8 type, __u16 flags,
1044                             struct cifsFileInfo *cfile,
1045                             struct cifsLockInfo **conf_lock, int rw_check)
1046 {
1047         struct cifsLockInfo *li;
1048         struct cifsFileInfo *cur_cfile = fdlocks->cfile;
1049         struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
1050
1051         list_for_each_entry(li, &fdlocks->locks, llist) {
1052                 if (offset + length <= li->offset ||
1053                     offset >= li->offset + li->length)
1054                         continue;
1055                 if (rw_check != CIFS_LOCK_OP && current->tgid == li->pid &&
1056                     server->ops->compare_fids(cfile, cur_cfile)) {
1057                         /* shared lock prevents write op through the same fid */
1058                         if (!(li->type & server->vals->shared_lock_type) ||
1059                             rw_check != CIFS_WRITE_OP)
1060                                 continue;
1061                 }
1062                 if ((type & server->vals->shared_lock_type) &&
1063                     ((server->ops->compare_fids(cfile, cur_cfile) &&
1064                      current->tgid == li->pid) || type == li->type))
1065                         continue;
1066                 if (rw_check == CIFS_LOCK_OP &&
1067                     (flags & FL_OFDLCK) && (li->flags & FL_OFDLCK) &&
1068                     server->ops->compare_fids(cfile, cur_cfile))
1069                         continue;
1070                 if (conf_lock)
1071                         *conf_lock = li;
1072                 return true;
1073         }
1074         return false;
1075 }
1076
1077 bool
1078 cifs_find_lock_conflict(struct cifsFileInfo *cfile, __u64 offset, __u64 length,
1079                         __u8 type, __u16 flags,
1080                         struct cifsLockInfo **conf_lock, int rw_check)
1081 {
1082         bool rc = false;
1083         struct cifs_fid_locks *cur;
1084         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1085
1086         list_for_each_entry(cur, &cinode->llist, llist) {
1087                 rc = cifs_find_fid_lock_conflict(cur, offset, length, type,
1088                                                  flags, cfile, conf_lock,
1089                                                  rw_check);
1090                 if (rc)
1091                         break;
1092         }
1093
1094         return rc;
1095 }
1096
1097 /*
1098  * Check if there is another lock that prevents us to set the lock (mandatory
1099  * style). If such a lock exists, update the flock structure with its
1100  * properties. Otherwise, set the flock type to F_UNLCK if we can cache brlocks
1101  * or leave it the same if we can't. Returns 0 if we don't need to request to
1102  * the server or 1 otherwise.
1103  */
1104 static int
1105 cifs_lock_test(struct cifsFileInfo *cfile, __u64 offset, __u64 length,
1106                __u8 type, struct file_lock *flock)
1107 {
1108         int rc = 0;
1109         struct cifsLockInfo *conf_lock;
1110         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1111         struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
1112         bool exist;
1113
1114         down_read(&cinode->lock_sem);
1115
1116         exist = cifs_find_lock_conflict(cfile, offset, length, type,
1117                                         flock->fl_flags, &conf_lock,
1118                                         CIFS_LOCK_OP);
1119         if (exist) {
1120                 flock->fl_start = conf_lock->offset;
1121                 flock->fl_end = conf_lock->offset + conf_lock->length - 1;
1122                 flock->fl_pid = conf_lock->pid;
1123                 if (conf_lock->type & server->vals->shared_lock_type)
1124                         flock->fl_type = F_RDLCK;
1125                 else
1126                         flock->fl_type = F_WRLCK;
1127         } else if (!cinode->can_cache_brlcks)
1128                 rc = 1;
1129         else
1130                 flock->fl_type = F_UNLCK;
1131
1132         up_read(&cinode->lock_sem);
1133         return rc;
1134 }
1135
1136 static void
1137 cifs_lock_add(struct cifsFileInfo *cfile, struct cifsLockInfo *lock)
1138 {
1139         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1140         cifs_down_write(&cinode->lock_sem);
1141         list_add_tail(&lock->llist, &cfile->llist->locks);
1142         up_write(&cinode->lock_sem);
1143 }
1144
1145 /*
1146  * Set the byte-range lock (mandatory style). Returns:
1147  * 1) 0, if we set the lock and don't need to request to the server;
1148  * 2) 1, if no locks prevent us but we need to request to the server;
1149  * 3) -EACCES, if there is a lock that prevents us and wait is false.
1150  */
1151 static int
1152 cifs_lock_add_if(struct cifsFileInfo *cfile, struct cifsLockInfo *lock,
1153                  bool wait)
1154 {
1155         struct cifsLockInfo *conf_lock;
1156         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1157         bool exist;
1158         int rc = 0;
1159
1160 try_again:
1161         exist = false;
1162         cifs_down_write(&cinode->lock_sem);
1163
1164         exist = cifs_find_lock_conflict(cfile, lock->offset, lock->length,
1165                                         lock->type, lock->flags, &conf_lock,
1166                                         CIFS_LOCK_OP);
1167         if (!exist && cinode->can_cache_brlcks) {
1168                 list_add_tail(&lock->llist, &cfile->llist->locks);
1169                 up_write(&cinode->lock_sem);
1170                 return rc;
1171         }
1172
1173         if (!exist)
1174                 rc = 1;
1175         else if (!wait)
1176                 rc = -EACCES;
1177         else {
1178                 list_add_tail(&lock->blist, &conf_lock->blist);
1179                 up_write(&cinode->lock_sem);
1180                 rc = wait_event_interruptible(lock->block_q,
1181                                         (lock->blist.prev == &lock->blist) &&
1182                                         (lock->blist.next == &lock->blist));
1183                 if (!rc)
1184                         goto try_again;
1185                 cifs_down_write(&cinode->lock_sem);
1186                 list_del_init(&lock->blist);
1187         }
1188
1189         up_write(&cinode->lock_sem);
1190         return rc;
1191 }
1192
1193 /*
1194  * Check if there is another lock that prevents us to set the lock (posix
1195  * style). If such a lock exists, update the flock structure with its
1196  * properties. Otherwise, set the flock type to F_UNLCK if we can cache brlocks
1197  * or leave it the same if we can't. Returns 0 if we don't need to request to
1198  * the server or 1 otherwise.
1199  */
1200 static int
1201 cifs_posix_lock_test(struct file *file, struct file_lock *flock)
1202 {
1203         int rc = 0;
1204         struct cifsInodeInfo *cinode = CIFS_I(file_inode(file));
1205         unsigned char saved_type = flock->fl_type;
1206
1207         if ((flock->fl_flags & FL_POSIX) == 0)
1208                 return 1;
1209
1210         down_read(&cinode->lock_sem);
1211         posix_test_lock(file, flock);
1212
1213         if (flock->fl_type == F_UNLCK && !cinode->can_cache_brlcks) {
1214                 flock->fl_type = saved_type;
1215                 rc = 1;
1216         }
1217
1218         up_read(&cinode->lock_sem);
1219         return rc;
1220 }
1221
1222 /*
1223  * Set the byte-range lock (posix style). Returns:
1224  * 1) <0, if the error occurs while setting the lock;
1225  * 2) 0, if we set the lock and don't need to request to the server;
1226  * 3) FILE_LOCK_DEFERRED, if we will wait for some other file_lock;
1227  * 4) FILE_LOCK_DEFERRED + 1, if we need to request to the server.
1228  */
1229 static int
1230 cifs_posix_lock_set(struct file *file, struct file_lock *flock)
1231 {
1232         struct cifsInodeInfo *cinode = CIFS_I(file_inode(file));
1233         int rc = FILE_LOCK_DEFERRED + 1;
1234
1235         if ((flock->fl_flags & FL_POSIX) == 0)
1236                 return rc;
1237
1238         cifs_down_write(&cinode->lock_sem);
1239         if (!cinode->can_cache_brlcks) {
1240                 up_write(&cinode->lock_sem);
1241                 return rc;
1242         }
1243
1244         rc = posix_lock_file(file, flock, NULL);
1245         up_write(&cinode->lock_sem);
1246         return rc;
1247 }
1248
1249 int
1250 cifs_push_mandatory_locks(struct cifsFileInfo *cfile)
1251 {
1252         unsigned int xid;
1253         int rc = 0, stored_rc;
1254         struct cifsLockInfo *li, *tmp;
1255         struct cifs_tcon *tcon;
1256         unsigned int num, max_num, max_buf;
1257         LOCKING_ANDX_RANGE *buf, *cur;
1258         static const int types[] = {
1259                 LOCKING_ANDX_LARGE_FILES,
1260                 LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES
1261         };
1262         int i;
1263
1264         xid = get_xid();
1265         tcon = tlink_tcon(cfile->tlink);
1266
1267         /*
1268          * Accessing maxBuf is racy with cifs_reconnect - need to store value
1269          * and check it before using.
1270          */
1271         max_buf = tcon->ses->server->maxBuf;
1272         if (max_buf < (sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE))) {
1273                 free_xid(xid);
1274                 return -EINVAL;
1275         }
1276
1277         BUILD_BUG_ON(sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE) >
1278                      PAGE_SIZE);
1279         max_buf = min_t(unsigned int, max_buf - sizeof(struct smb_hdr),
1280                         PAGE_SIZE);
1281         max_num = (max_buf - sizeof(struct smb_hdr)) /
1282                                                 sizeof(LOCKING_ANDX_RANGE);
1283         buf = kcalloc(max_num, sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
1284         if (!buf) {
1285                 free_xid(xid);
1286                 return -ENOMEM;
1287         }
1288
1289         for (i = 0; i < 2; i++) {
1290                 cur = buf;
1291                 num = 0;
1292                 list_for_each_entry_safe(li, tmp, &cfile->llist->locks, llist) {
1293                         if (li->type != types[i])
1294                                 continue;
1295                         cur->Pid = cpu_to_le16(li->pid);
1296                         cur->LengthLow = cpu_to_le32((u32)li->length);
1297                         cur->LengthHigh = cpu_to_le32((u32)(li->length>>32));
1298                         cur->OffsetLow = cpu_to_le32((u32)li->offset);
1299                         cur->OffsetHigh = cpu_to_le32((u32)(li->offset>>32));
1300                         if (++num == max_num) {
1301                                 stored_rc = cifs_lockv(xid, tcon,
1302                                                        cfile->fid.netfid,
1303                                                        (__u8)li->type, 0, num,
1304                                                        buf);
1305                                 if (stored_rc)
1306                                         rc = stored_rc;
1307                                 cur = buf;
1308                                 num = 0;
1309                         } else
1310                                 cur++;
1311                 }
1312
1313                 if (num) {
1314                         stored_rc = cifs_lockv(xid, tcon, cfile->fid.netfid,
1315                                                (__u8)types[i], 0, num, buf);
1316                         if (stored_rc)
1317                                 rc = stored_rc;
1318                 }
1319         }
1320
1321         kfree(buf);
1322         free_xid(xid);
1323         return rc;
1324 }
1325
1326 static __u32
1327 hash_lockowner(fl_owner_t owner)
1328 {
1329         return cifs_lock_secret ^ hash32_ptr((const void *)owner);
1330 }
1331
1332 struct lock_to_push {
1333         struct list_head llist;
1334         __u64 offset;
1335         __u64 length;
1336         __u32 pid;
1337         __u16 netfid;
1338         __u8 type;
1339 };
1340
1341 static int
1342 cifs_push_posix_locks(struct cifsFileInfo *cfile)
1343 {
1344         struct inode *inode = d_inode(cfile->dentry);
1345         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1346         struct file_lock *flock;
1347         struct file_lock_context *flctx = inode->i_flctx;
1348         unsigned int count = 0, i;
1349         int rc = 0, xid, type;
1350         struct list_head locks_to_send, *el;
1351         struct lock_to_push *lck, *tmp;
1352         __u64 length;
1353
1354         xid = get_xid();
1355
1356         if (!flctx)
1357                 goto out;
1358
1359         spin_lock(&flctx->flc_lock);
1360         list_for_each(el, &flctx->flc_posix) {
1361                 count++;
1362         }
1363         spin_unlock(&flctx->flc_lock);
1364
1365         INIT_LIST_HEAD(&locks_to_send);
1366
1367         /*
1368          * Allocating count locks is enough because no FL_POSIX locks can be
1369          * added to the list while we are holding cinode->lock_sem that
1370          * protects locking operations of this inode.
1371          */
1372         for (i = 0; i < count; i++) {
1373                 lck = kmalloc(sizeof(struct lock_to_push), GFP_KERNEL);
1374                 if (!lck) {
1375                         rc = -ENOMEM;
1376                         goto err_out;
1377                 }
1378                 list_add_tail(&lck->llist, &locks_to_send);
1379         }
1380
1381         el = locks_to_send.next;
1382         spin_lock(&flctx->flc_lock);
1383         list_for_each_entry(flock, &flctx->flc_posix, fl_list) {
1384                 if (el == &locks_to_send) {
1385                         /*
1386                          * The list ended. We don't have enough allocated
1387                          * structures - something is really wrong.
1388                          */
1389                         cifs_dbg(VFS, "Can't push all brlocks!\n");
1390                         break;
1391                 }
1392                 length = 1 + flock->fl_end - flock->fl_start;
1393                 if (flock->fl_type == F_RDLCK || flock->fl_type == F_SHLCK)
1394                         type = CIFS_RDLCK;
1395                 else
1396                         type = CIFS_WRLCK;
1397                 lck = list_entry(el, struct lock_to_push, llist);
1398                 lck->pid = hash_lockowner(flock->fl_owner);
1399                 lck->netfid = cfile->fid.netfid;
1400                 lck->length = length;
1401                 lck->type = type;
1402                 lck->offset = flock->fl_start;
1403         }
1404         spin_unlock(&flctx->flc_lock);
1405
1406         list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) {
1407                 int stored_rc;
1408
1409                 stored_rc = CIFSSMBPosixLock(xid, tcon, lck->netfid, lck->pid,
1410                                              lck->offset, lck->length, NULL,
1411                                              lck->type, 0);
1412                 if (stored_rc)
1413                         rc = stored_rc;
1414                 list_del(&lck->llist);
1415                 kfree(lck);
1416         }
1417
1418 out:
1419         free_xid(xid);
1420         return rc;
1421 err_out:
1422         list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) {
1423                 list_del(&lck->llist);
1424                 kfree(lck);
1425         }
1426         goto out;
1427 }
1428
1429 static int
1430 cifs_push_locks(struct cifsFileInfo *cfile)
1431 {
1432         struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
1433         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1434         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1435         int rc = 0;
1436
1437         /* we are going to update can_cache_brlcks here - need a write access */
1438         cifs_down_write(&cinode->lock_sem);
1439         if (!cinode->can_cache_brlcks) {
1440                 up_write(&cinode->lock_sem);
1441                 return rc;
1442         }
1443
1444         if (cap_unix(tcon->ses) &&
1445             (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
1446             ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
1447                 rc = cifs_push_posix_locks(cfile);
1448         else
1449                 rc = tcon->ses->server->ops->push_mand_locks(cfile);
1450
1451         cinode->can_cache_brlcks = false;
1452         up_write(&cinode->lock_sem);
1453         return rc;
1454 }
1455
1456 static void
1457 cifs_read_flock(struct file_lock *flock, __u32 *type, int *lock, int *unlock,
1458                 bool *wait_flag, struct TCP_Server_Info *server)
1459 {
1460         if (flock->fl_flags & FL_POSIX)
1461                 cifs_dbg(FYI, "Posix\n");
1462         if (flock->fl_flags & FL_FLOCK)
1463                 cifs_dbg(FYI, "Flock\n");
1464         if (flock->fl_flags & FL_SLEEP) {
1465                 cifs_dbg(FYI, "Blocking lock\n");
1466                 *wait_flag = true;
1467         }
1468         if (flock->fl_flags & FL_ACCESS)
1469                 cifs_dbg(FYI, "Process suspended by mandatory locking - not implemented yet\n");
1470         if (flock->fl_flags & FL_LEASE)
1471                 cifs_dbg(FYI, "Lease on file - not implemented yet\n");
1472         if (flock->fl_flags &
1473             (~(FL_POSIX | FL_FLOCK | FL_SLEEP |
1474                FL_ACCESS | FL_LEASE | FL_CLOSE | FL_OFDLCK)))
1475                 cifs_dbg(FYI, "Unknown lock flags 0x%x\n", flock->fl_flags);
1476
1477         *type = server->vals->large_lock_type;
1478         if (flock->fl_type == F_WRLCK) {
1479                 cifs_dbg(FYI, "F_WRLCK\n");
1480                 *type |= server->vals->exclusive_lock_type;
1481                 *lock = 1;
1482         } else if (flock->fl_type == F_UNLCK) {
1483                 cifs_dbg(FYI, "F_UNLCK\n");
1484                 *type |= server->vals->unlock_lock_type;
1485                 *unlock = 1;
1486                 /* Check if unlock includes more than one lock range */
1487         } else if (flock->fl_type == F_RDLCK) {
1488                 cifs_dbg(FYI, "F_RDLCK\n");
1489                 *type |= server->vals->shared_lock_type;
1490                 *lock = 1;
1491         } else if (flock->fl_type == F_EXLCK) {
1492                 cifs_dbg(FYI, "F_EXLCK\n");
1493                 *type |= server->vals->exclusive_lock_type;
1494                 *lock = 1;
1495         } else if (flock->fl_type == F_SHLCK) {
1496                 cifs_dbg(FYI, "F_SHLCK\n");
1497                 *type |= server->vals->shared_lock_type;
1498                 *lock = 1;
1499         } else
1500                 cifs_dbg(FYI, "Unknown type of lock\n");
1501 }
1502
1503 static int
1504 cifs_getlk(struct file *file, struct file_lock *flock, __u32 type,
1505            bool wait_flag, bool posix_lck, unsigned int xid)
1506 {
1507         int rc = 0;
1508         __u64 length = 1 + flock->fl_end - flock->fl_start;
1509         struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
1510         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1511         struct TCP_Server_Info *server = tcon->ses->server;
1512         __u16 netfid = cfile->fid.netfid;
1513
1514         if (posix_lck) {
1515                 int posix_lock_type;
1516
1517                 rc = cifs_posix_lock_test(file, flock);
1518                 if (!rc)
1519                         return rc;
1520
1521                 if (type & server->vals->shared_lock_type)
1522                         posix_lock_type = CIFS_RDLCK;
1523                 else
1524                         posix_lock_type = CIFS_WRLCK;
1525                 rc = CIFSSMBPosixLock(xid, tcon, netfid,
1526                                       hash_lockowner(flock->fl_owner),
1527                                       flock->fl_start, length, flock,
1528                                       posix_lock_type, wait_flag);
1529                 return rc;
1530         }
1531
1532         rc = cifs_lock_test(cfile, flock->fl_start, length, type, flock);
1533         if (!rc)
1534                 return rc;
1535
1536         /* BB we could chain these into one lock request BB */
1537         rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length, type,
1538                                     1, 0, false);
1539         if (rc == 0) {
1540                 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1541                                             type, 0, 1, false);
1542                 flock->fl_type = F_UNLCK;
1543                 if (rc != 0)
1544                         cifs_dbg(VFS, "Error unlocking previously locked range %d during test of lock\n",
1545                                  rc);
1546                 return 0;
1547         }
1548
1549         if (type & server->vals->shared_lock_type) {
1550                 flock->fl_type = F_WRLCK;
1551                 return 0;
1552         }
1553
1554         type &= ~server->vals->exclusive_lock_type;
1555
1556         rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1557                                     type | server->vals->shared_lock_type,
1558                                     1, 0, false);
1559         if (rc == 0) {
1560                 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1561                         type | server->vals->shared_lock_type, 0, 1, false);
1562                 flock->fl_type = F_RDLCK;
1563                 if (rc != 0)
1564                         cifs_dbg(VFS, "Error unlocking previously locked range %d during test of lock\n",
1565                                  rc);
1566         } else
1567                 flock->fl_type = F_WRLCK;
1568
1569         return 0;
1570 }
1571
1572 void
1573 cifs_move_llist(struct list_head *source, struct list_head *dest)
1574 {
1575         struct list_head *li, *tmp;
1576         list_for_each_safe(li, tmp, source)
1577                 list_move(li, dest);
1578 }
1579
1580 void
1581 cifs_free_llist(struct list_head *llist)
1582 {
1583         struct cifsLockInfo *li, *tmp;
1584         list_for_each_entry_safe(li, tmp, llist, llist) {
1585                 cifs_del_lock_waiters(li);
1586                 list_del(&li->llist);
1587                 kfree(li);
1588         }
1589 }
1590
1591 int
1592 cifs_unlock_range(struct cifsFileInfo *cfile, struct file_lock *flock,
1593                   unsigned int xid)
1594 {
1595         int rc = 0, stored_rc;
1596         static const int types[] = {
1597                 LOCKING_ANDX_LARGE_FILES,
1598                 LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES
1599         };
1600         unsigned int i;
1601         unsigned int max_num, num, max_buf;
1602         LOCKING_ANDX_RANGE *buf, *cur;
1603         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1604         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1605         struct cifsLockInfo *li, *tmp;
1606         __u64 length = 1 + flock->fl_end - flock->fl_start;
1607         struct list_head tmp_llist;
1608
1609         INIT_LIST_HEAD(&tmp_llist);
1610
1611         /*
1612          * Accessing maxBuf is racy with cifs_reconnect - need to store value
1613          * and check it before using.
1614          */
1615         max_buf = tcon->ses->server->maxBuf;
1616         if (max_buf < (sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE)))
1617                 return -EINVAL;
1618
1619         BUILD_BUG_ON(sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE) >
1620                      PAGE_SIZE);
1621         max_buf = min_t(unsigned int, max_buf - sizeof(struct smb_hdr),
1622                         PAGE_SIZE);
1623         max_num = (max_buf - sizeof(struct smb_hdr)) /
1624                                                 sizeof(LOCKING_ANDX_RANGE);
1625         buf = kcalloc(max_num, sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
1626         if (!buf)
1627                 return -ENOMEM;
1628
1629         cifs_down_write(&cinode->lock_sem);
1630         for (i = 0; i < 2; i++) {
1631                 cur = buf;
1632                 num = 0;
1633                 list_for_each_entry_safe(li, tmp, &cfile->llist->locks, llist) {
1634                         if (flock->fl_start > li->offset ||
1635                             (flock->fl_start + length) <
1636                             (li->offset + li->length))
1637                                 continue;
1638                         if (current->tgid != li->pid)
1639                                 continue;
1640                         if (types[i] != li->type)
1641                                 continue;
1642                         if (cinode->can_cache_brlcks) {
1643                                 /*
1644                                  * We can cache brlock requests - simply remove
1645                                  * a lock from the file's list.
1646                                  */
1647                                 list_del(&li->llist);
1648                                 cifs_del_lock_waiters(li);
1649                                 kfree(li);
1650                                 continue;
1651                         }
1652                         cur->Pid = cpu_to_le16(li->pid);
1653                         cur->LengthLow = cpu_to_le32((u32)li->length);
1654                         cur->LengthHigh = cpu_to_le32((u32)(li->length>>32));
1655                         cur->OffsetLow = cpu_to_le32((u32)li->offset);
1656                         cur->OffsetHigh = cpu_to_le32((u32)(li->offset>>32));
1657                         /*
1658                          * We need to save a lock here to let us add it again to
1659                          * the file's list if the unlock range request fails on
1660                          * the server.
1661                          */
1662                         list_move(&li->llist, &tmp_llist);
1663                         if (++num == max_num) {
1664                                 stored_rc = cifs_lockv(xid, tcon,
1665                                                        cfile->fid.netfid,
1666                                                        li->type, num, 0, buf);
1667                                 if (stored_rc) {
1668                                         /*
1669                                          * We failed on the unlock range
1670                                          * request - add all locks from the tmp
1671                                          * list to the head of the file's list.
1672                                          */
1673                                         cifs_move_llist(&tmp_llist,
1674                                                         &cfile->llist->locks);
1675                                         rc = stored_rc;
1676                                 } else
1677                                         /*
1678                                          * The unlock range request succeed -
1679                                          * free the tmp list.
1680                                          */
1681                                         cifs_free_llist(&tmp_llist);
1682                                 cur = buf;
1683                                 num = 0;
1684                         } else
1685                                 cur++;
1686                 }
1687                 if (num) {
1688                         stored_rc = cifs_lockv(xid, tcon, cfile->fid.netfid,
1689                                                types[i], num, 0, buf);
1690                         if (stored_rc) {
1691                                 cifs_move_llist(&tmp_llist,
1692                                                 &cfile->llist->locks);
1693                                 rc = stored_rc;
1694                         } else
1695                                 cifs_free_llist(&tmp_llist);
1696                 }
1697         }
1698
1699         up_write(&cinode->lock_sem);
1700         kfree(buf);
1701         return rc;
1702 }
1703
1704 static int
1705 cifs_setlk(struct file *file, struct file_lock *flock, __u32 type,
1706            bool wait_flag, bool posix_lck, int lock, int unlock,
1707            unsigned int xid)
1708 {
1709         int rc = 0;
1710         __u64 length = 1 + flock->fl_end - flock->fl_start;
1711         struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
1712         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1713         struct TCP_Server_Info *server = tcon->ses->server;
1714         struct inode *inode = d_inode(cfile->dentry);
1715
1716         if (posix_lck) {
1717                 int posix_lock_type;
1718
1719                 rc = cifs_posix_lock_set(file, flock);
1720                 if (rc <= FILE_LOCK_DEFERRED)
1721                         return rc;
1722
1723                 if (type & server->vals->shared_lock_type)
1724                         posix_lock_type = CIFS_RDLCK;
1725                 else
1726                         posix_lock_type = CIFS_WRLCK;
1727
1728                 if (unlock == 1)
1729                         posix_lock_type = CIFS_UNLCK;
1730
1731                 rc = CIFSSMBPosixLock(xid, tcon, cfile->fid.netfid,
1732                                       hash_lockowner(flock->fl_owner),
1733                                       flock->fl_start, length,
1734                                       NULL, posix_lock_type, wait_flag);
1735                 goto out;
1736         }
1737
1738         if (lock) {
1739                 struct cifsLockInfo *lock;
1740
1741                 lock = cifs_lock_init(flock->fl_start, length, type,
1742                                       flock->fl_flags);
1743                 if (!lock)
1744                         return -ENOMEM;
1745
1746                 rc = cifs_lock_add_if(cfile, lock, wait_flag);
1747                 if (rc < 0) {
1748                         kfree(lock);
1749                         return rc;
1750                 }
1751                 if (!rc)
1752                         goto out;
1753
1754                 /*
1755                  * Windows 7 server can delay breaking lease from read to None
1756                  * if we set a byte-range lock on a file - break it explicitly
1757                  * before sending the lock to the server to be sure the next
1758                  * read won't conflict with non-overlapted locks due to
1759                  * pagereading.
1760                  */
1761                 if (!CIFS_CACHE_WRITE(CIFS_I(inode)) &&
1762                                         CIFS_CACHE_READ(CIFS_I(inode))) {
1763                         cifs_zap_mapping(inode);
1764                         cifs_dbg(FYI, "Set no oplock for inode=%p due to mand locks\n",
1765                                  inode);
1766                         CIFS_I(inode)->oplock = 0;
1767                 }
1768
1769                 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1770                                             type, 1, 0, wait_flag);
1771                 if (rc) {
1772                         kfree(lock);
1773                         return rc;
1774                 }
1775
1776                 cifs_lock_add(cfile, lock);
1777         } else if (unlock)
1778                 rc = server->ops->mand_unlock_range(cfile, flock, xid);
1779
1780 out:
1781         if ((flock->fl_flags & FL_POSIX) || (flock->fl_flags & FL_FLOCK)) {
1782                 /*
1783                  * If this is a request to remove all locks because we
1784                  * are closing the file, it doesn't matter if the
1785                  * unlocking failed as both cifs.ko and the SMB server
1786                  * remove the lock on file close
1787                  */
1788                 if (rc) {
1789                         cifs_dbg(VFS, "%s failed rc=%d\n", __func__, rc);
1790                         if (!(flock->fl_flags & FL_CLOSE))
1791                                 return rc;
1792                 }
1793                 rc = locks_lock_file_wait(file, flock);
1794         }
1795         return rc;
1796 }
1797
1798 int cifs_flock(struct file *file, int cmd, struct file_lock *fl)
1799 {
1800         int rc, xid;
1801         int lock = 0, unlock = 0;
1802         bool wait_flag = false;
1803         bool posix_lck = false;
1804         struct cifs_sb_info *cifs_sb;
1805         struct cifs_tcon *tcon;
1806         struct cifsFileInfo *cfile;
1807         __u32 type;
1808
1809         rc = -EACCES;
1810         xid = get_xid();
1811
1812         if (!(fl->fl_flags & FL_FLOCK))
1813                 return -ENOLCK;
1814
1815         cfile = (struct cifsFileInfo *)file->private_data;
1816         tcon = tlink_tcon(cfile->tlink);
1817
1818         cifs_read_flock(fl, &type, &lock, &unlock, &wait_flag,
1819                         tcon->ses->server);
1820         cifs_sb = CIFS_FILE_SB(file);
1821
1822         if (cap_unix(tcon->ses) &&
1823             (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
1824             ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
1825                 posix_lck = true;
1826
1827         if (!lock && !unlock) {
1828                 /*
1829                  * if no lock or unlock then nothing to do since we do not
1830                  * know what it is
1831                  */
1832                 free_xid(xid);
1833                 return -EOPNOTSUPP;
1834         }
1835
1836         rc = cifs_setlk(file, fl, type, wait_flag, posix_lck, lock, unlock,
1837                         xid);
1838         free_xid(xid);
1839         return rc;
1840
1841
1842 }
1843
1844 int cifs_lock(struct file *file, int cmd, struct file_lock *flock)
1845 {
1846         int rc, xid;
1847         int lock = 0, unlock = 0;
1848         bool wait_flag = false;
1849         bool posix_lck = false;
1850         struct cifs_sb_info *cifs_sb;
1851         struct cifs_tcon *tcon;
1852         struct cifsFileInfo *cfile;
1853         __u32 type;
1854
1855         rc = -EACCES;
1856         xid = get_xid();
1857
1858         cifs_dbg(FYI, "Lock parm: 0x%x flockflags: 0x%x flocktype: 0x%x start: %lld end: %lld\n",
1859                  cmd, flock->fl_flags, flock->fl_type,
1860                  flock->fl_start, flock->fl_end);
1861
1862         cfile = (struct cifsFileInfo *)file->private_data;
1863         tcon = tlink_tcon(cfile->tlink);
1864
1865         cifs_read_flock(flock, &type, &lock, &unlock, &wait_flag,
1866                         tcon->ses->server);
1867         cifs_sb = CIFS_FILE_SB(file);
1868         set_bit(CIFS_INO_CLOSE_ON_LOCK, &CIFS_I(d_inode(cfile->dentry))->flags);
1869
1870         if (cap_unix(tcon->ses) &&
1871             (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
1872             ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
1873                 posix_lck = true;
1874         /*
1875          * BB add code here to normalize offset and length to account for
1876          * negative length which we can not accept over the wire.
1877          */
1878         if (IS_GETLK(cmd)) {
1879                 rc = cifs_getlk(file, flock, type, wait_flag, posix_lck, xid);
1880                 free_xid(xid);
1881                 return rc;
1882         }
1883
1884         if (!lock && !unlock) {
1885                 /*
1886                  * if no lock or unlock then nothing to do since we do not
1887                  * know what it is
1888                  */
1889                 free_xid(xid);
1890                 return -EOPNOTSUPP;
1891         }
1892
1893         rc = cifs_setlk(file, flock, type, wait_flag, posix_lck, lock, unlock,
1894                         xid);
1895         free_xid(xid);
1896         return rc;
1897 }
1898
1899 /*
1900  * update the file size (if needed) after a write. Should be called with
1901  * the inode->i_lock held
1902  */
1903 void
1904 cifs_update_eof(struct cifsInodeInfo *cifsi, loff_t offset,
1905                       unsigned int bytes_written)
1906 {
1907         loff_t end_of_write = offset + bytes_written;
1908
1909         if (end_of_write > cifsi->server_eof)
1910                 cifsi->server_eof = end_of_write;
1911 }
1912
1913 static ssize_t
1914 cifs_write(struct cifsFileInfo *open_file, __u32 pid, const char *write_data,
1915            size_t write_size, loff_t *offset)
1916 {
1917         int rc = 0;
1918         unsigned int bytes_written = 0;
1919         unsigned int total_written;
1920         struct cifs_tcon *tcon;
1921         struct TCP_Server_Info *server;
1922         unsigned int xid;
1923         struct dentry *dentry = open_file->dentry;
1924         struct cifsInodeInfo *cifsi = CIFS_I(d_inode(dentry));
1925         struct cifs_io_parms io_parms = {0};
1926
1927         cifs_dbg(FYI, "write %zd bytes to offset %lld of %pd\n",
1928                  write_size, *offset, dentry);
1929
1930         tcon = tlink_tcon(open_file->tlink);
1931         server = tcon->ses->server;
1932
1933         if (!server->ops->sync_write)
1934                 return -ENOSYS;
1935
1936         xid = get_xid();
1937
1938         for (total_written = 0; write_size > total_written;
1939              total_written += bytes_written) {
1940                 rc = -EAGAIN;
1941                 while (rc == -EAGAIN) {
1942                         struct kvec iov[2];
1943                         unsigned int len;
1944
1945                         if (open_file->invalidHandle) {
1946                                 /* we could deadlock if we called
1947                                    filemap_fdatawait from here so tell
1948                                    reopen_file not to flush data to
1949                                    server now */
1950                                 rc = cifs_reopen_file(open_file, false);
1951                                 if (rc != 0)
1952                                         break;
1953                         }
1954
1955                         len = min(server->ops->wp_retry_size(d_inode(dentry)),
1956                                   (unsigned int)write_size - total_written);
1957                         /* iov[0] is reserved for smb header */
1958                         iov[1].iov_base = (char *)write_data + total_written;
1959                         iov[1].iov_len = len;
1960                         io_parms.pid = pid;
1961                         io_parms.tcon = tcon;
1962                         io_parms.offset = *offset;
1963                         io_parms.length = len;
1964                         rc = server->ops->sync_write(xid, &open_file->fid,
1965                                         &io_parms, &bytes_written, iov, 1);
1966                 }
1967                 if (rc || (bytes_written == 0)) {
1968                         if (total_written)
1969                                 break;
1970                         else {
1971                                 free_xid(xid);
1972                                 return rc;
1973                         }
1974                 } else {
1975                         spin_lock(&d_inode(dentry)->i_lock);
1976                         cifs_update_eof(cifsi, *offset, bytes_written);
1977                         spin_unlock(&d_inode(dentry)->i_lock);
1978                         *offset += bytes_written;
1979                 }
1980         }
1981
1982         cifs_stats_bytes_written(tcon, total_written);
1983
1984         if (total_written > 0) {
1985                 spin_lock(&d_inode(dentry)->i_lock);
1986                 if (*offset > d_inode(dentry)->i_size) {
1987                         i_size_write(d_inode(dentry), *offset);
1988                         d_inode(dentry)->i_blocks = (512 - 1 + *offset) >> 9;
1989                 }
1990                 spin_unlock(&d_inode(dentry)->i_lock);
1991         }
1992         mark_inode_dirty_sync(d_inode(dentry));
1993         free_xid(xid);
1994         return total_written;
1995 }
1996
1997 struct cifsFileInfo *find_readable_file(struct cifsInodeInfo *cifs_inode,
1998                                         bool fsuid_only)
1999 {
2000         struct cifsFileInfo *open_file = NULL;
2001         struct cifs_sb_info *cifs_sb = CIFS_SB(cifs_inode->vfs_inode.i_sb);
2002
2003         /* only filter by fsuid on multiuser mounts */
2004         if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
2005                 fsuid_only = false;
2006
2007         spin_lock(&cifs_inode->open_file_lock);
2008         /* we could simply get the first_list_entry since write-only entries
2009            are always at the end of the list but since the first entry might
2010            have a close pending, we go through the whole list */
2011         list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
2012                 if (fsuid_only && !uid_eq(open_file->uid, current_fsuid()))
2013                         continue;
2014                 if (OPEN_FMODE(open_file->f_flags) & FMODE_READ) {
2015                         if ((!open_file->invalidHandle)) {
2016                                 /* found a good file */
2017                                 /* lock it so it will not be closed on us */
2018                                 cifsFileInfo_get(open_file);
2019                                 spin_unlock(&cifs_inode->open_file_lock);
2020                                 return open_file;
2021                         } /* else might as well continue, and look for
2022                              another, or simply have the caller reopen it
2023                              again rather than trying to fix this handle */
2024                 } else /* write only file */
2025                         break; /* write only files are last so must be done */
2026         }
2027         spin_unlock(&cifs_inode->open_file_lock);
2028         return NULL;
2029 }
2030
2031 /* Return -EBADF if no handle is found and general rc otherwise */
2032 int
2033 cifs_get_writable_file(struct cifsInodeInfo *cifs_inode, int flags,
2034                        struct cifsFileInfo **ret_file)
2035 {
2036         struct cifsFileInfo *open_file, *inv_file = NULL;
2037         struct cifs_sb_info *cifs_sb;
2038         bool any_available = false;
2039         int rc = -EBADF;
2040         unsigned int refind = 0;
2041         bool fsuid_only = flags & FIND_WR_FSUID_ONLY;
2042         bool with_delete = flags & FIND_WR_WITH_DELETE;
2043         *ret_file = NULL;
2044
2045         /*
2046          * Having a null inode here (because mapping->host was set to zero by
2047          * the VFS or MM) should not happen but we had reports of on oops (due
2048          * to it being zero) during stress testcases so we need to check for it
2049          */
2050
2051         if (cifs_inode == NULL) {
2052                 cifs_dbg(VFS, "Null inode passed to cifs_writeable_file\n");
2053                 dump_stack();
2054                 return rc;
2055         }
2056
2057         cifs_sb = CIFS_SB(cifs_inode->vfs_inode.i_sb);
2058
2059         /* only filter by fsuid on multiuser mounts */
2060         if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
2061                 fsuid_only = false;
2062
2063         spin_lock(&cifs_inode->open_file_lock);
2064 refind_writable:
2065         if (refind > MAX_REOPEN_ATT) {
2066                 spin_unlock(&cifs_inode->open_file_lock);
2067                 return rc;
2068         }
2069         list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
2070                 if (!any_available && open_file->pid != current->tgid)
2071                         continue;
2072                 if (fsuid_only && !uid_eq(open_file->uid, current_fsuid()))
2073                         continue;
2074                 if (with_delete && !(open_file->fid.access & DELETE))
2075                         continue;
2076                 if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) {
2077                         if (!open_file->invalidHandle) {
2078                                 /* found a good writable file */
2079                                 cifsFileInfo_get(open_file);
2080                                 spin_unlock(&cifs_inode->open_file_lock);
2081                                 *ret_file = open_file;
2082                                 return 0;
2083                         } else {
2084                                 if (!inv_file)
2085                                         inv_file = open_file;
2086                         }
2087                 }
2088         }
2089         /* couldn't find useable FH with same pid, try any available */
2090         if (!any_available) {
2091                 any_available = true;
2092                 goto refind_writable;
2093         }
2094
2095         if (inv_file) {
2096                 any_available = false;
2097                 cifsFileInfo_get(inv_file);
2098         }
2099
2100         spin_unlock(&cifs_inode->open_file_lock);
2101
2102         if (inv_file) {
2103                 rc = cifs_reopen_file(inv_file, false);
2104                 if (!rc) {
2105                         *ret_file = inv_file;
2106                         return 0;
2107                 }
2108
2109                 spin_lock(&cifs_inode->open_file_lock);
2110                 list_move_tail(&inv_file->flist, &cifs_inode->openFileList);
2111                 spin_unlock(&cifs_inode->open_file_lock);
2112                 cifsFileInfo_put(inv_file);
2113                 ++refind;
2114                 inv_file = NULL;
2115                 spin_lock(&cifs_inode->open_file_lock);
2116                 goto refind_writable;
2117         }
2118
2119         return rc;
2120 }
2121
2122 struct cifsFileInfo *
2123 find_writable_file(struct cifsInodeInfo *cifs_inode, int flags)
2124 {
2125         struct cifsFileInfo *cfile;
2126         int rc;
2127
2128         rc = cifs_get_writable_file(cifs_inode, flags, &cfile);
2129         if (rc)
2130                 cifs_dbg(FYI, "Couldn't find writable handle rc=%d\n", rc);
2131
2132         return cfile;
2133 }
2134
2135 int
2136 cifs_get_writable_path(struct cifs_tcon *tcon, const char *name,
2137                        int flags,
2138                        struct cifsFileInfo **ret_file)
2139 {
2140         struct cifsFileInfo *cfile;
2141         void *page = alloc_dentry_path();
2142
2143         *ret_file = NULL;
2144
2145         spin_lock(&tcon->open_file_lock);
2146         list_for_each_entry(cfile, &tcon->openFileList, tlist) {
2147                 struct cifsInodeInfo *cinode;
2148                 const char *full_path = build_path_from_dentry(cfile->dentry, page);
2149                 if (IS_ERR(full_path)) {
2150                         spin_unlock(&tcon->open_file_lock);
2151                         free_dentry_path(page);
2152                         return PTR_ERR(full_path);
2153                 }
2154                 if (strcmp(full_path, name))
2155                         continue;
2156
2157                 cinode = CIFS_I(d_inode(cfile->dentry));
2158                 spin_unlock(&tcon->open_file_lock);
2159                 free_dentry_path(page);
2160                 return cifs_get_writable_file(cinode, flags, ret_file);
2161         }
2162
2163         spin_unlock(&tcon->open_file_lock);
2164         free_dentry_path(page);
2165         return -ENOENT;
2166 }
2167
2168 int
2169 cifs_get_readable_path(struct cifs_tcon *tcon, const char *name,
2170                        struct cifsFileInfo **ret_file)
2171 {
2172         struct cifsFileInfo *cfile;
2173         void *page = alloc_dentry_path();
2174
2175         *ret_file = NULL;
2176
2177         spin_lock(&tcon->open_file_lock);
2178         list_for_each_entry(cfile, &tcon->openFileList, tlist) {
2179                 struct cifsInodeInfo *cinode;
2180                 const char *full_path = build_path_from_dentry(cfile->dentry, page);
2181                 if (IS_ERR(full_path)) {
2182                         spin_unlock(&tcon->open_file_lock);
2183                         free_dentry_path(page);
2184                         return PTR_ERR(full_path);
2185                 }
2186                 if (strcmp(full_path, name))
2187                         continue;
2188
2189                 cinode = CIFS_I(d_inode(cfile->dentry));
2190                 spin_unlock(&tcon->open_file_lock);
2191                 free_dentry_path(page);
2192                 *ret_file = find_readable_file(cinode, 0);
2193                 return *ret_file ? 0 : -ENOENT;
2194         }
2195
2196         spin_unlock(&tcon->open_file_lock);
2197         free_dentry_path(page);
2198         return -ENOENT;
2199 }
2200
2201 static int cifs_partialpagewrite(struct page *page, unsigned from, unsigned to)
2202 {
2203         struct address_space *mapping = page->mapping;
2204         loff_t offset = (loff_t)page->index << PAGE_SHIFT;
2205         char *write_data;
2206         int rc = -EFAULT;
2207         int bytes_written = 0;
2208         struct inode *inode;
2209         struct cifsFileInfo *open_file;
2210
2211         if (!mapping || !mapping->host)
2212                 return -EFAULT;
2213
2214         inode = page->mapping->host;
2215
2216         offset += (loff_t)from;
2217         write_data = kmap(page);
2218         write_data += from;
2219
2220         if ((to > PAGE_SIZE) || (from > to)) {
2221                 kunmap(page);
2222                 return -EIO;
2223         }
2224
2225         /* racing with truncate? */
2226         if (offset > mapping->host->i_size) {
2227                 kunmap(page);
2228                 return 0; /* don't care */
2229         }
2230
2231         /* check to make sure that we are not extending the file */
2232         if (mapping->host->i_size - offset < (loff_t)to)
2233                 to = (unsigned)(mapping->host->i_size - offset);
2234
2235         rc = cifs_get_writable_file(CIFS_I(mapping->host), FIND_WR_ANY,
2236                                     &open_file);
2237         if (!rc) {
2238                 bytes_written = cifs_write(open_file, open_file->pid,
2239                                            write_data, to - from, &offset);
2240                 cifsFileInfo_put(open_file);
2241                 /* Does mm or vfs already set times? */
2242                 inode->i_atime = inode->i_mtime = current_time(inode);
2243                 if ((bytes_written > 0) && (offset))
2244                         rc = 0;
2245                 else if (bytes_written < 0)
2246                         rc = bytes_written;
2247                 else
2248                         rc = -EFAULT;
2249         } else {
2250                 cifs_dbg(FYI, "No writable handle for write page rc=%d\n", rc);
2251                 if (!is_retryable_error(rc))
2252                         rc = -EIO;
2253         }
2254
2255         kunmap(page);
2256         return rc;
2257 }
2258
2259 static struct cifs_writedata *
2260 wdata_alloc_and_fillpages(pgoff_t tofind, struct address_space *mapping,
2261                           pgoff_t end, pgoff_t *index,
2262                           unsigned int *found_pages)
2263 {
2264         struct cifs_writedata *wdata;
2265
2266         wdata = cifs_writedata_alloc((unsigned int)tofind,
2267                                      cifs_writev_complete);
2268         if (!wdata)
2269                 return NULL;
2270
2271         *found_pages = find_get_pages_range_tag(mapping, index, end,
2272                                 PAGECACHE_TAG_DIRTY, tofind, wdata->pages);
2273         return wdata;
2274 }
2275
2276 static unsigned int
2277 wdata_prepare_pages(struct cifs_writedata *wdata, unsigned int found_pages,
2278                     struct address_space *mapping,
2279                     struct writeback_control *wbc,
2280                     pgoff_t end, pgoff_t *index, pgoff_t *next, bool *done)
2281 {
2282         unsigned int nr_pages = 0, i;
2283         struct page *page;
2284
2285         for (i = 0; i < found_pages; i++) {
2286                 page = wdata->pages[i];
2287                 /*
2288                  * At this point we hold neither the i_pages lock nor the
2289                  * page lock: the page may be truncated or invalidated
2290                  * (changing page->mapping to NULL), or even swizzled
2291                  * back from swapper_space to tmpfs file mapping
2292                  */
2293
2294                 if (nr_pages == 0)
2295                         lock_page(page);
2296                 else if (!trylock_page(page))
2297                         break;
2298
2299                 if (unlikely(page->mapping != mapping)) {
2300                         unlock_page(page);
2301                         break;
2302                 }
2303
2304                 if (!wbc->range_cyclic && page->index > end) {
2305                         *done = true;
2306                         unlock_page(page);
2307                         break;
2308                 }
2309
2310                 if (*next && (page->index != *next)) {
2311                         /* Not next consecutive page */
2312                         unlock_page(page);
2313                         break;
2314                 }
2315
2316                 if (wbc->sync_mode != WB_SYNC_NONE)
2317                         wait_on_page_writeback(page);
2318
2319                 if (PageWriteback(page) ||
2320                                 !clear_page_dirty_for_io(page)) {
2321                         unlock_page(page);
2322                         break;
2323                 }
2324
2325                 /*
2326                  * This actually clears the dirty bit in the radix tree.
2327                  * See cifs_writepage() for more commentary.
2328                  */
2329                 set_page_writeback(page);
2330                 if (page_offset(page) >= i_size_read(mapping->host)) {
2331                         *done = true;
2332                         unlock_page(page);
2333                         end_page_writeback(page);
2334                         break;
2335                 }
2336
2337                 wdata->pages[i] = page;
2338                 *next = page->index + 1;
2339                 ++nr_pages;
2340         }
2341
2342         /* reset index to refind any pages skipped */
2343         if (nr_pages == 0)
2344                 *index = wdata->pages[0]->index + 1;
2345
2346         /* put any pages we aren't going to use */
2347         for (i = nr_pages; i < found_pages; i++) {
2348                 put_page(wdata->pages[i]);
2349                 wdata->pages[i] = NULL;
2350         }
2351
2352         return nr_pages;
2353 }
2354
2355 static int
2356 wdata_send_pages(struct cifs_writedata *wdata, unsigned int nr_pages,
2357                  struct address_space *mapping, struct writeback_control *wbc)
2358 {
2359         int rc;
2360
2361         wdata->sync_mode = wbc->sync_mode;
2362         wdata->nr_pages = nr_pages;
2363         wdata->offset = page_offset(wdata->pages[0]);
2364         wdata->pagesz = PAGE_SIZE;
2365         wdata->tailsz = min(i_size_read(mapping->host) -
2366                         page_offset(wdata->pages[nr_pages - 1]),
2367                         (loff_t)PAGE_SIZE);
2368         wdata->bytes = ((nr_pages - 1) * PAGE_SIZE) + wdata->tailsz;
2369         wdata->pid = wdata->cfile->pid;
2370
2371         rc = adjust_credits(wdata->server, &wdata->credits, wdata->bytes);
2372         if (rc)
2373                 return rc;
2374
2375         if (wdata->cfile->invalidHandle)
2376                 rc = -EAGAIN;
2377         else
2378                 rc = wdata->server->ops->async_writev(wdata,
2379                                                       cifs_writedata_release);
2380
2381         return rc;
2382 }
2383
2384 static int cifs_writepages(struct address_space *mapping,
2385                            struct writeback_control *wbc)
2386 {
2387         struct inode *inode = mapping->host;
2388         struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
2389         struct TCP_Server_Info *server;
2390         bool done = false, scanned = false, range_whole = false;
2391         pgoff_t end, index;
2392         struct cifs_writedata *wdata;
2393         struct cifsFileInfo *cfile = NULL;
2394         int rc = 0;
2395         int saved_rc = 0;
2396         unsigned int xid;
2397
2398         /*
2399          * If wsize is smaller than the page cache size, default to writing
2400          * one page at a time via cifs_writepage
2401          */
2402         if (cifs_sb->ctx->wsize < PAGE_SIZE)
2403                 return generic_writepages(mapping, wbc);
2404
2405         xid = get_xid();
2406         if (wbc->range_cyclic) {
2407                 index = mapping->writeback_index; /* Start from prev offset */
2408                 end = -1;
2409         } else {
2410                 index = wbc->range_start >> PAGE_SHIFT;
2411                 end = wbc->range_end >> PAGE_SHIFT;
2412                 if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
2413                         range_whole = true;
2414                 scanned = true;
2415         }
2416         server = cifs_pick_channel(cifs_sb_master_tcon(cifs_sb)->ses);
2417
2418 retry:
2419         while (!done && index <= end) {
2420                 unsigned int i, nr_pages, found_pages, wsize;
2421                 pgoff_t next = 0, tofind, saved_index = index;
2422                 struct cifs_credits credits_on_stack;
2423                 struct cifs_credits *credits = &credits_on_stack;
2424                 int get_file_rc = 0;
2425
2426                 if (cfile)
2427                         cifsFileInfo_put(cfile);
2428
2429                 rc = cifs_get_writable_file(CIFS_I(inode), FIND_WR_ANY, &cfile);
2430
2431                 /* in case of an error store it to return later */
2432                 if (rc)
2433                         get_file_rc = rc;
2434
2435                 rc = server->ops->wait_mtu_credits(server, cifs_sb->ctx->wsize,
2436                                                    &wsize, credits);
2437                 if (rc != 0) {
2438                         done = true;
2439                         break;
2440                 }
2441
2442                 tofind = min((wsize / PAGE_SIZE) - 1, end - index) + 1;
2443
2444                 wdata = wdata_alloc_and_fillpages(tofind, mapping, end, &index,
2445                                                   &found_pages);
2446                 if (!wdata) {
2447                         rc = -ENOMEM;
2448                         done = true;
2449                         add_credits_and_wake_if(server, credits, 0);
2450                         break;
2451                 }
2452
2453                 if (found_pages == 0) {
2454                         kref_put(&wdata->refcount, cifs_writedata_release);
2455                         add_credits_and_wake_if(server, credits, 0);
2456                         break;
2457                 }
2458
2459                 nr_pages = wdata_prepare_pages(wdata, found_pages, mapping, wbc,
2460                                                end, &index, &next, &done);
2461
2462                 /* nothing to write? */
2463                 if (nr_pages == 0) {
2464                         kref_put(&wdata->refcount, cifs_writedata_release);
2465                         add_credits_and_wake_if(server, credits, 0);
2466                         continue;
2467                 }
2468
2469                 wdata->credits = credits_on_stack;
2470                 wdata->cfile = cfile;
2471                 wdata->server = server;
2472                 cfile = NULL;
2473
2474                 if (!wdata->cfile) {
2475                         cifs_dbg(VFS, "No writable handle in writepages rc=%d\n",
2476                                  get_file_rc);
2477                         if (is_retryable_error(get_file_rc))
2478                                 rc = get_file_rc;
2479                         else
2480                                 rc = -EBADF;
2481                 } else
2482                         rc = wdata_send_pages(wdata, nr_pages, mapping, wbc);
2483
2484                 for (i = 0; i < nr_pages; ++i)
2485                         unlock_page(wdata->pages[i]);
2486
2487                 /* send failure -- clean up the mess */
2488                 if (rc != 0) {
2489                         add_credits_and_wake_if(server, &wdata->credits, 0);
2490                         for (i = 0; i < nr_pages; ++i) {
2491                                 if (is_retryable_error(rc))
2492                                         redirty_page_for_writepage(wbc,
2493                                                            wdata->pages[i]);
2494                                 else
2495                                         SetPageError(wdata->pages[i]);
2496                                 end_page_writeback(wdata->pages[i]);
2497                                 put_page(wdata->pages[i]);
2498                         }
2499                         if (!is_retryable_error(rc))
2500                                 mapping_set_error(mapping, rc);
2501                 }
2502                 kref_put(&wdata->refcount, cifs_writedata_release);
2503
2504                 if (wbc->sync_mode == WB_SYNC_ALL && rc == -EAGAIN) {
2505                         index = saved_index;
2506                         continue;
2507                 }
2508
2509                 /* Return immediately if we received a signal during writing */
2510                 if (is_interrupt_error(rc)) {
2511                         done = true;
2512                         break;
2513                 }
2514
2515                 if (rc != 0 && saved_rc == 0)
2516                         saved_rc = rc;
2517
2518                 wbc->nr_to_write -= nr_pages;
2519                 if (wbc->nr_to_write <= 0)
2520                         done = true;
2521
2522                 index = next;
2523         }
2524
2525         if (!scanned && !done) {
2526                 /*
2527                  * We hit the last page and there is more work to be done: wrap
2528                  * back to the start of the file
2529                  */
2530                 scanned = true;
2531                 index = 0;
2532                 goto retry;
2533         }
2534
2535         if (saved_rc != 0)
2536                 rc = saved_rc;
2537
2538         if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0))
2539                 mapping->writeback_index = index;
2540
2541         if (cfile)
2542                 cifsFileInfo_put(cfile);
2543         free_xid(xid);
2544         /* Indication to update ctime and mtime as close is deferred */
2545         set_bit(CIFS_INO_MODIFIED_ATTR, &CIFS_I(inode)->flags);
2546         return rc;
2547 }
2548
2549 static int
2550 cifs_writepage_locked(struct page *page, struct writeback_control *wbc)
2551 {
2552         int rc;
2553         unsigned int xid;
2554
2555         xid = get_xid();
2556 /* BB add check for wbc flags */
2557         get_page(page);
2558         if (!PageUptodate(page))
2559                 cifs_dbg(FYI, "ppw - page not up to date\n");
2560
2561         /*
2562          * Set the "writeback" flag, and clear "dirty" in the radix tree.
2563          *
2564          * A writepage() implementation always needs to do either this,
2565          * or re-dirty the page with "redirty_page_for_writepage()" in
2566          * the case of a failure.
2567          *
2568          * Just unlocking the page will cause the radix tree tag-bits
2569          * to fail to update with the state of the page correctly.
2570          */
2571         set_page_writeback(page);
2572 retry_write:
2573         rc = cifs_partialpagewrite(page, 0, PAGE_SIZE);
2574         if (is_retryable_error(rc)) {
2575                 if (wbc->sync_mode == WB_SYNC_ALL && rc == -EAGAIN)
2576                         goto retry_write;
2577                 redirty_page_for_writepage(wbc, page);
2578         } else if (rc != 0) {
2579                 SetPageError(page);
2580                 mapping_set_error(page->mapping, rc);
2581         } else {
2582                 SetPageUptodate(page);
2583         }
2584         end_page_writeback(page);
2585         put_page(page);
2586         free_xid(xid);
2587         return rc;
2588 }
2589
2590 static int cifs_writepage(struct page *page, struct writeback_control *wbc)
2591 {
2592         int rc = cifs_writepage_locked(page, wbc);
2593         unlock_page(page);
2594         return rc;
2595 }
2596
2597 static int cifs_write_end(struct file *file, struct address_space *mapping,
2598                         loff_t pos, unsigned len, unsigned copied,
2599                         struct page *page, void *fsdata)
2600 {
2601         int rc;
2602         struct inode *inode = mapping->host;
2603         struct cifsFileInfo *cfile = file->private_data;
2604         struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
2605         __u32 pid;
2606
2607         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
2608                 pid = cfile->pid;
2609         else
2610                 pid = current->tgid;
2611
2612         cifs_dbg(FYI, "write_end for page %p from pos %lld with %d bytes\n",
2613                  page, pos, copied);
2614
2615         if (PageChecked(page)) {
2616                 if (copied == len)
2617                         SetPageUptodate(page);
2618                 ClearPageChecked(page);
2619         } else if (!PageUptodate(page) && copied == PAGE_SIZE)
2620                 SetPageUptodate(page);
2621
2622         if (!PageUptodate(page)) {
2623                 char *page_data;
2624                 unsigned offset = pos & (PAGE_SIZE - 1);
2625                 unsigned int xid;
2626
2627                 xid = get_xid();
2628                 /* this is probably better than directly calling
2629                    partialpage_write since in this function the file handle is
2630                    known which we might as well leverage */
2631                 /* BB check if anything else missing out of ppw
2632                    such as updating last write time */
2633                 page_data = kmap(page);
2634                 rc = cifs_write(cfile, pid, page_data + offset, copied, &pos);
2635                 /* if (rc < 0) should we set writebehind rc? */
2636                 kunmap(page);
2637
2638                 free_xid(xid);
2639         } else {
2640                 rc = copied;
2641                 pos += copied;
2642                 set_page_dirty(page);
2643         }
2644
2645         if (rc > 0) {
2646                 spin_lock(&inode->i_lock);
2647                 if (pos > inode->i_size) {
2648                         i_size_write(inode, pos);
2649                         inode->i_blocks = (512 - 1 + pos) >> 9;
2650                 }
2651                 spin_unlock(&inode->i_lock);
2652         }
2653
2654         unlock_page(page);
2655         put_page(page);
2656         /* Indication to update ctime and mtime as close is deferred */
2657         set_bit(CIFS_INO_MODIFIED_ATTR, &CIFS_I(inode)->flags);
2658
2659         return rc;
2660 }
2661
2662 int cifs_strict_fsync(struct file *file, loff_t start, loff_t end,
2663                       int datasync)
2664 {
2665         unsigned int xid;
2666         int rc = 0;
2667         struct cifs_tcon *tcon;
2668         struct TCP_Server_Info *server;
2669         struct cifsFileInfo *smbfile = file->private_data;
2670         struct inode *inode = file_inode(file);
2671         struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
2672
2673         rc = file_write_and_wait_range(file, start, end);
2674         if (rc) {
2675                 trace_cifs_fsync_err(inode->i_ino, rc);
2676                 return rc;
2677         }
2678
2679         xid = get_xid();
2680
2681         cifs_dbg(FYI, "Sync file - name: %pD datasync: 0x%x\n",
2682                  file, datasync);
2683
2684         if (!CIFS_CACHE_READ(CIFS_I(inode))) {
2685                 rc = cifs_zap_mapping(inode);
2686                 if (rc) {
2687                         cifs_dbg(FYI, "rc: %d during invalidate phase\n", rc);
2688                         rc = 0; /* don't care about it in fsync */
2689                 }
2690         }
2691
2692         tcon = tlink_tcon(smbfile->tlink);
2693         if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) {
2694                 server = tcon->ses->server;
2695                 if (server->ops->flush)
2696                         rc = server->ops->flush(xid, tcon, &smbfile->fid);
2697                 else
2698                         rc = -ENOSYS;
2699         }
2700
2701         free_xid(xid);
2702         return rc;
2703 }
2704
2705 int cifs_fsync(struct file *file, loff_t start, loff_t end, int datasync)
2706 {
2707         unsigned int xid;
2708         int rc = 0;
2709         struct cifs_tcon *tcon;
2710         struct TCP_Server_Info *server;
2711         struct cifsFileInfo *smbfile = file->private_data;
2712         struct cifs_sb_info *cifs_sb = CIFS_FILE_SB(file);
2713
2714         rc = file_write_and_wait_range(file, start, end);
2715         if (rc) {
2716                 trace_cifs_fsync_err(file_inode(file)->i_ino, rc);
2717                 return rc;
2718         }
2719
2720         xid = get_xid();
2721
2722         cifs_dbg(FYI, "Sync file - name: %pD datasync: 0x%x\n",
2723                  file, datasync);
2724
2725         tcon = tlink_tcon(smbfile->tlink);
2726         if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) {
2727                 server = tcon->ses->server;
2728                 if (server->ops->flush)
2729                         rc = server->ops->flush(xid, tcon, &smbfile->fid);
2730                 else
2731                         rc = -ENOSYS;
2732         }
2733
2734         free_xid(xid);
2735         return rc;
2736 }
2737
2738 /*
2739  * As file closes, flush all cached write data for this inode checking
2740  * for write behind errors.
2741  */
2742 int cifs_flush(struct file *file, fl_owner_t id)
2743 {
2744         struct inode *inode = file_inode(file);
2745         int rc = 0;
2746
2747         if (file->f_mode & FMODE_WRITE)
2748                 rc = filemap_write_and_wait(inode->i_mapping);
2749
2750         cifs_dbg(FYI, "Flush inode %p file %p rc %d\n", inode, file, rc);
2751         if (rc)
2752                 trace_cifs_flush_err(inode->i_ino, rc);
2753         return rc;
2754 }
2755
2756 static int
2757 cifs_write_allocate_pages(struct page **pages, unsigned long num_pages)
2758 {
2759         int rc = 0;
2760         unsigned long i;
2761
2762         for (i = 0; i < num_pages; i++) {
2763                 pages[i] = alloc_page(GFP_KERNEL|__GFP_HIGHMEM);
2764                 if (!pages[i]) {
2765                         /*
2766                          * save number of pages we have already allocated and
2767                          * return with ENOMEM error
2768                          */
2769                         num_pages = i;
2770                         rc = -ENOMEM;
2771                         break;
2772                 }
2773         }
2774
2775         if (rc) {
2776                 for (i = 0; i < num_pages; i++)
2777                         put_page(pages[i]);
2778         }
2779         return rc;
2780 }
2781
2782 static inline
2783 size_t get_numpages(const size_t wsize, const size_t len, size_t *cur_len)
2784 {
2785         size_t num_pages;
2786         size_t clen;
2787
2788         clen = min_t(const size_t, len, wsize);
2789         num_pages = DIV_ROUND_UP(clen, PAGE_SIZE);
2790
2791         if (cur_len)
2792                 *cur_len = clen;
2793
2794         return num_pages;
2795 }
2796
2797 static void
2798 cifs_uncached_writedata_release(struct kref *refcount)
2799 {
2800         int i;
2801         struct cifs_writedata *wdata = container_of(refcount,
2802                                         struct cifs_writedata, refcount);
2803
2804         kref_put(&wdata->ctx->refcount, cifs_aio_ctx_release);
2805         for (i = 0; i < wdata->nr_pages; i++)
2806                 put_page(wdata->pages[i]);
2807         cifs_writedata_release(refcount);
2808 }
2809
2810 static void collect_uncached_write_data(struct cifs_aio_ctx *ctx);
2811
2812 static void
2813 cifs_uncached_writev_complete(struct work_struct *work)
2814 {
2815         struct cifs_writedata *wdata = container_of(work,
2816                                         struct cifs_writedata, work);
2817         struct inode *inode = d_inode(wdata->cfile->dentry);
2818         struct cifsInodeInfo *cifsi = CIFS_I(inode);
2819
2820         spin_lock(&inode->i_lock);
2821         cifs_update_eof(cifsi, wdata->offset, wdata->bytes);
2822         if (cifsi->server_eof > inode->i_size)
2823                 i_size_write(inode, cifsi->server_eof);
2824         spin_unlock(&inode->i_lock);
2825
2826         complete(&wdata->done);
2827         collect_uncached_write_data(wdata->ctx);
2828         /* the below call can possibly free the last ref to aio ctx */
2829         kref_put(&wdata->refcount, cifs_uncached_writedata_release);
2830 }
2831
2832 static int
2833 wdata_fill_from_iovec(struct cifs_writedata *wdata, struct iov_iter *from,
2834                       size_t *len, unsigned long *num_pages)
2835 {
2836         size_t save_len, copied, bytes, cur_len = *len;
2837         unsigned long i, nr_pages = *num_pages;
2838
2839         save_len = cur_len;
2840         for (i = 0; i < nr_pages; i++) {
2841                 bytes = min_t(const size_t, cur_len, PAGE_SIZE);
2842                 copied = copy_page_from_iter(wdata->pages[i], 0, bytes, from);
2843                 cur_len -= copied;
2844                 /*
2845                  * If we didn't copy as much as we expected, then that
2846                  * may mean we trod into an unmapped area. Stop copying
2847                  * at that point. On the next pass through the big
2848                  * loop, we'll likely end up getting a zero-length
2849                  * write and bailing out of it.
2850                  */
2851                 if (copied < bytes)
2852                         break;
2853         }
2854         cur_len = save_len - cur_len;
2855         *len = cur_len;
2856
2857         /*
2858          * If we have no data to send, then that probably means that
2859          * the copy above failed altogether. That's most likely because
2860          * the address in the iovec was bogus. Return -EFAULT and let
2861          * the caller free anything we allocated and bail out.
2862          */
2863         if (!cur_len)
2864                 return -EFAULT;
2865
2866         /*
2867          * i + 1 now represents the number of pages we actually used in
2868          * the copy phase above.
2869          */
2870         *num_pages = i + 1;
2871         return 0;
2872 }
2873
2874 static int
2875 cifs_resend_wdata(struct cifs_writedata *wdata, struct list_head *wdata_list,
2876         struct cifs_aio_ctx *ctx)
2877 {
2878         unsigned int wsize;
2879         struct cifs_credits credits;
2880         int rc;
2881         struct TCP_Server_Info *server = wdata->server;
2882
2883         do {
2884                 if (wdata->cfile->invalidHandle) {
2885                         rc = cifs_reopen_file(wdata->cfile, false);
2886                         if (rc == -EAGAIN)
2887                                 continue;
2888                         else if (rc)
2889                                 break;
2890                 }
2891
2892
2893                 /*
2894                  * Wait for credits to resend this wdata.
2895                  * Note: we are attempting to resend the whole wdata not in
2896                  * segments
2897                  */
2898                 do {
2899                         rc = server->ops->wait_mtu_credits(server, wdata->bytes,
2900                                                 &wsize, &credits);
2901                         if (rc)
2902                                 goto fail;
2903
2904                         if (wsize < wdata->bytes) {
2905                                 add_credits_and_wake_if(server, &credits, 0);
2906                                 msleep(1000);
2907                         }
2908                 } while (wsize < wdata->bytes);
2909                 wdata->credits = credits;
2910
2911                 rc = adjust_credits(server, &wdata->credits, wdata->bytes);
2912
2913                 if (!rc) {
2914                         if (wdata->cfile->invalidHandle)
2915                                 rc = -EAGAIN;
2916                         else {
2917 #ifdef CONFIG_CIFS_SMB_DIRECT
2918                                 if (wdata->mr) {
2919                                         wdata->mr->need_invalidate = true;
2920                                         smbd_deregister_mr(wdata->mr);
2921                                         wdata->mr = NULL;
2922                                 }
2923 #endif
2924                                 rc = server->ops->async_writev(wdata,
2925                                         cifs_uncached_writedata_release);
2926                         }
2927                 }
2928
2929                 /* If the write was successfully sent, we are done */
2930                 if (!rc) {
2931                         list_add_tail(&wdata->list, wdata_list);
2932                         return 0;
2933                 }
2934
2935                 /* Roll back credits and retry if needed */
2936                 add_credits_and_wake_if(server, &wdata->credits, 0);
2937         } while (rc == -EAGAIN);
2938
2939 fail:
2940         kref_put(&wdata->refcount, cifs_uncached_writedata_release);
2941         return rc;
2942 }
2943
2944 static int
2945 cifs_write_from_iter(loff_t offset, size_t len, struct iov_iter *from,
2946                      struct cifsFileInfo *open_file,
2947                      struct cifs_sb_info *cifs_sb, struct list_head *wdata_list,
2948                      struct cifs_aio_ctx *ctx)
2949 {
2950         int rc = 0;
2951         size_t cur_len;
2952         unsigned long nr_pages, num_pages, i;
2953         struct cifs_writedata *wdata;
2954         struct iov_iter saved_from = *from;
2955         loff_t saved_offset = offset;
2956         pid_t pid;
2957         struct TCP_Server_Info *server;
2958         struct page **pagevec;
2959         size_t start;
2960         unsigned int xid;
2961
2962         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
2963                 pid = open_file->pid;
2964         else
2965                 pid = current->tgid;
2966
2967         server = cifs_pick_channel(tlink_tcon(open_file->tlink)->ses);
2968         xid = get_xid();
2969
2970         do {
2971                 unsigned int wsize;
2972                 struct cifs_credits credits_on_stack;
2973                 struct cifs_credits *credits = &credits_on_stack;
2974
2975                 if (open_file->invalidHandle) {
2976                         rc = cifs_reopen_file(open_file, false);
2977                         if (rc == -EAGAIN)
2978                                 continue;
2979                         else if (rc)
2980                                 break;
2981                 }
2982
2983                 rc = server->ops->wait_mtu_credits(server, cifs_sb->ctx->wsize,
2984                                                    &wsize, credits);
2985                 if (rc)
2986                         break;
2987
2988                 cur_len = min_t(const size_t, len, wsize);
2989
2990                 if (ctx->direct_io) {
2991                         ssize_t result;
2992
2993                         result = iov_iter_get_pages_alloc(
2994                                 from, &pagevec, cur_len, &start);
2995                         if (result < 0) {
2996                                 cifs_dbg(VFS,
2997                                          "direct_writev couldn't get user pages (rc=%zd) iter type %d iov_offset %zd count %zd\n",
2998                                          result, iov_iter_type(from),
2999                                          from->iov_offset, from->count);
3000                                 dump_stack();
3001
3002                                 rc = result;
3003                                 add_credits_and_wake_if(server, credits, 0);
3004                                 break;
3005                         }
3006                         cur_len = (size_t)result;
3007                         iov_iter_advance(from, cur_len);
3008
3009                         nr_pages =
3010                                 (cur_len + start + PAGE_SIZE - 1) / PAGE_SIZE;
3011
3012                         wdata = cifs_writedata_direct_alloc(pagevec,
3013                                              cifs_uncached_writev_complete);
3014                         if (!wdata) {
3015                                 rc = -ENOMEM;
3016                                 add_credits_and_wake_if(server, credits, 0);
3017                                 break;
3018                         }
3019
3020
3021                         wdata->page_offset = start;
3022                         wdata->tailsz =
3023                                 nr_pages > 1 ?
3024                                         cur_len - (PAGE_SIZE - start) -
3025                                         (nr_pages - 2) * PAGE_SIZE :
3026                                         cur_len;
3027                 } else {
3028                         nr_pages = get_numpages(wsize, len, &cur_len);
3029                         wdata = cifs_writedata_alloc(nr_pages,
3030                                              cifs_uncached_writev_complete);
3031                         if (!wdata) {
3032                                 rc = -ENOMEM;
3033                                 add_credits_and_wake_if(server, credits, 0);
3034                                 break;
3035                         }
3036
3037                         rc = cifs_write_allocate_pages(wdata->pages, nr_pages);
3038                         if (rc) {
3039                                 kvfree(wdata->pages);
3040                                 kfree(wdata);
3041                                 add_credits_and_wake_if(server, credits, 0);
3042                                 break;
3043                         }
3044
3045                         num_pages = nr_pages;
3046                         rc = wdata_fill_from_iovec(
3047                                 wdata, from, &cur_len, &num_pages);
3048                         if (rc) {
3049                                 for (i = 0; i < nr_pages; i++)
3050                                         put_page(wdata->pages[i]);
3051                                 kvfree(wdata->pages);
3052                                 kfree(wdata);
3053                                 add_credits_and_wake_if(server, credits, 0);
3054                                 break;
3055                         }
3056
3057                         /*
3058                          * Bring nr_pages down to the number of pages we
3059                          * actually used, and free any pages that we didn't use.
3060                          */
3061                         for ( ; nr_pages > num_pages; nr_pages--)
3062                                 put_page(wdata->pages[nr_pages - 1]);
3063
3064                         wdata->tailsz = cur_len - ((nr_pages - 1) * PAGE_SIZE);
3065                 }
3066
3067                 wdata->sync_mode = WB_SYNC_ALL;
3068                 wdata->nr_pages = nr_pages;
3069                 wdata->offset = (__u64)offset;
3070                 wdata->cfile = cifsFileInfo_get(open_file);
3071                 wdata->server = server;
3072                 wdata->pid = pid;
3073                 wdata->bytes = cur_len;
3074                 wdata->pagesz = PAGE_SIZE;
3075                 wdata->credits = credits_on_stack;
3076                 wdata->ctx = ctx;
3077                 kref_get(&ctx->refcount);
3078
3079                 rc = adjust_credits(server, &wdata->credits, wdata->bytes);
3080
3081                 if (!rc) {
3082                         if (wdata->cfile->invalidHandle)
3083                                 rc = -EAGAIN;
3084                         else
3085                                 rc = server->ops->async_writev(wdata,
3086                                         cifs_uncached_writedata_release);
3087                 }
3088
3089                 if (rc) {
3090                         add_credits_and_wake_if(server, &wdata->credits, 0);
3091                         kref_put(&wdata->refcount,
3092                                  cifs_uncached_writedata_release);
3093                         if (rc == -EAGAIN) {
3094                                 *from = saved_from;
3095                                 iov_iter_advance(from, offset - saved_offset);
3096                                 continue;
3097                         }
3098                         break;
3099                 }
3100
3101                 list_add_tail(&wdata->list, wdata_list);
3102                 offset += cur_len;
3103                 len -= cur_len;
3104         } while (len > 0);
3105
3106         free_xid(xid);
3107         return rc;
3108 }
3109
3110 static void collect_uncached_write_data(struct cifs_aio_ctx *ctx)
3111 {
3112         struct cifs_writedata *wdata, *tmp;
3113         struct cifs_tcon *tcon;
3114         struct cifs_sb_info *cifs_sb;
3115         struct dentry *dentry = ctx->cfile->dentry;
3116         ssize_t rc;
3117
3118         tcon = tlink_tcon(ctx->cfile->tlink);
3119         cifs_sb = CIFS_SB(dentry->d_sb);
3120
3121         mutex_lock(&ctx->aio_mutex);
3122
3123         if (list_empty(&ctx->list)) {
3124                 mutex_unlock(&ctx->aio_mutex);
3125                 return;
3126         }
3127
3128         rc = ctx->rc;
3129         /*
3130          * Wait for and collect replies for any successful sends in order of
3131          * increasing offset. Once an error is hit, then return without waiting
3132          * for any more replies.
3133          */
3134 restart_loop:
3135         list_for_each_entry_safe(wdata, tmp, &ctx->list, list) {
3136                 if (!rc) {
3137                         if (!try_wait_for_completion(&wdata->done)) {
3138                                 mutex_unlock(&ctx->aio_mutex);
3139                                 return;
3140                         }
3141
3142                         if (wdata->result)
3143                                 rc = wdata->result;
3144                         else
3145                                 ctx->total_len += wdata->bytes;
3146
3147                         /* resend call if it's a retryable error */
3148                         if (rc == -EAGAIN) {
3149                                 struct list_head tmp_list;
3150                                 struct iov_iter tmp_from = ctx->iter;
3151
3152                                 INIT_LIST_HEAD(&tmp_list);
3153                                 list_del_init(&wdata->list);
3154
3155                                 if (ctx->direct_io)
3156                                         rc = cifs_resend_wdata(
3157                                                 wdata, &tmp_list, ctx);
3158                                 else {
3159                                         iov_iter_advance(&tmp_from,
3160                                                  wdata->offset - ctx->pos);
3161
3162                                         rc = cifs_write_from_iter(wdata->offset,
3163                                                 wdata->bytes, &tmp_from,
3164                                                 ctx->cfile, cifs_sb, &tmp_list,
3165                                                 ctx);
3166
3167                                         kref_put(&wdata->refcount,
3168                                                 cifs_uncached_writedata_release);
3169                                 }
3170
3171                                 list_splice(&tmp_list, &ctx->list);
3172                                 goto restart_loop;
3173                         }
3174                 }
3175                 list_del_init(&wdata->list);
3176                 kref_put(&wdata->refcount, cifs_uncached_writedata_release);
3177         }
3178
3179         cifs_stats_bytes_written(tcon, ctx->total_len);
3180         set_bit(CIFS_INO_INVALID_MAPPING, &CIFS_I(dentry->d_inode)->flags);
3181
3182         ctx->rc = (rc == 0) ? ctx->total_len : rc;
3183
3184         mutex_unlock(&ctx->aio_mutex);
3185
3186         if (ctx->iocb && ctx->iocb->ki_complete)
3187                 ctx->iocb->ki_complete(ctx->iocb, ctx->rc, 0);
3188         else
3189                 complete(&ctx->done);
3190 }
3191
3192 static ssize_t __cifs_writev(
3193         struct kiocb *iocb, struct iov_iter *from, bool direct)
3194 {
3195         struct file *file = iocb->ki_filp;
3196         ssize_t total_written = 0;
3197         struct cifsFileInfo *cfile;
3198         struct cifs_tcon *tcon;
3199         struct cifs_sb_info *cifs_sb;
3200         struct cifs_aio_ctx *ctx;
3201         struct iov_iter saved_from = *from;
3202         size_t len = iov_iter_count(from);
3203         int rc;
3204
3205         /*
3206          * iov_iter_get_pages_alloc doesn't work with ITER_KVEC.
3207          * In this case, fall back to non-direct write function.
3208          * this could be improved by getting pages directly in ITER_KVEC
3209          */
3210         if (direct && iov_iter_is_kvec(from)) {
3211                 cifs_dbg(FYI, "use non-direct cifs_writev for kvec I/O\n");
3212                 direct = false;
3213         }
3214
3215         rc = generic_write_checks(iocb, from);
3216         if (rc <= 0)
3217                 return rc;
3218
3219         cifs_sb = CIFS_FILE_SB(file);
3220         cfile = file->private_data;
3221         tcon = tlink_tcon(cfile->tlink);
3222
3223         if (!tcon->ses->server->ops->async_writev)
3224                 return -ENOSYS;
3225
3226         ctx = cifs_aio_ctx_alloc();
3227         if (!ctx)
3228                 return -ENOMEM;
3229
3230         ctx->cfile = cifsFileInfo_get(cfile);
3231
3232         if (!is_sync_kiocb(iocb))
3233                 ctx->iocb = iocb;
3234
3235         ctx->pos = iocb->ki_pos;
3236
3237         if (direct) {
3238                 ctx->direct_io = true;
3239                 ctx->iter = *from;
3240                 ctx->len = len;
3241         } else {
3242                 rc = setup_aio_ctx_iter(ctx, from, WRITE);
3243                 if (rc) {
3244                         kref_put(&ctx->refcount, cifs_aio_ctx_release);
3245                         return rc;
3246                 }
3247         }
3248
3249         /* grab a lock here due to read response handlers can access ctx */
3250         mutex_lock(&ctx->aio_mutex);
3251
3252         rc = cifs_write_from_iter(iocb->ki_pos, ctx->len, &saved_from,
3253                                   cfile, cifs_sb, &ctx->list, ctx);
3254
3255         /*
3256          * If at least one write was successfully sent, then discard any rc
3257          * value from the later writes. If the other write succeeds, then
3258          * we'll end up returning whatever was written. If it fails, then
3259          * we'll get a new rc value from that.
3260          */
3261         if (!list_empty(&ctx->list))
3262                 rc = 0;
3263
3264         mutex_unlock(&ctx->aio_mutex);
3265
3266         if (rc) {
3267                 kref_put(&ctx->refcount, cifs_aio_ctx_release);
3268                 return rc;
3269         }
3270
3271         if (!is_sync_kiocb(iocb)) {
3272                 kref_put(&ctx->refcount, cifs_aio_ctx_release);
3273                 return -EIOCBQUEUED;
3274         }
3275
3276         rc = wait_for_completion_killable(&ctx->done);
3277         if (rc) {
3278                 mutex_lock(&ctx->aio_mutex);
3279                 ctx->rc = rc = -EINTR;
3280                 total_written = ctx->total_len;
3281                 mutex_unlock(&ctx->aio_mutex);
3282         } else {
3283                 rc = ctx->rc;
3284                 total_written = ctx->total_len;
3285         }
3286
3287         kref_put(&ctx->refcount, cifs_aio_ctx_release);
3288
3289         if (unlikely(!total_written))
3290                 return rc;
3291
3292         iocb->ki_pos += total_written;
3293         return total_written;
3294 }
3295
3296 ssize_t cifs_direct_writev(struct kiocb *iocb, struct iov_iter *from)
3297 {
3298         return __cifs_writev(iocb, from, true);
3299 }
3300
3301 ssize_t cifs_user_writev(struct kiocb *iocb, struct iov_iter *from)
3302 {
3303         return __cifs_writev(iocb, from, false);
3304 }
3305
3306 static ssize_t
3307 cifs_writev(struct kiocb *iocb, struct iov_iter *from)
3308 {
3309         struct file *file = iocb->ki_filp;
3310         struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
3311         struct inode *inode = file->f_mapping->host;
3312         struct cifsInodeInfo *cinode = CIFS_I(inode);
3313         struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
3314         ssize_t rc;
3315
3316         inode_lock(inode);
3317         /*
3318          * We need to hold the sem to be sure nobody modifies lock list
3319          * with a brlock that prevents writing.
3320          */
3321         down_read(&cinode->lock_sem);
3322
3323         rc = generic_write_checks(iocb, from);
3324         if (rc <= 0)
3325                 goto out;
3326
3327         if (!cifs_find_lock_conflict(cfile, iocb->ki_pos, iov_iter_count(from),
3328                                      server->vals->exclusive_lock_type, 0,
3329                                      NULL, CIFS_WRITE_OP))
3330                 rc = __generic_file_write_iter(iocb, from);
3331         else
3332                 rc = -EACCES;
3333 out:
3334         up_read(&cinode->lock_sem);
3335         inode_unlock(inode);
3336
3337         if (rc > 0)
3338                 rc = generic_write_sync(iocb, rc);
3339         return rc;
3340 }
3341
3342 ssize_t
3343 cifs_strict_writev(struct kiocb *iocb, struct iov_iter *from)
3344 {
3345         struct inode *inode = file_inode(iocb->ki_filp);
3346         struct cifsInodeInfo *cinode = CIFS_I(inode);
3347         struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
3348         struct cifsFileInfo *cfile = (struct cifsFileInfo *)
3349                                                 iocb->ki_filp->private_data;
3350         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
3351         ssize_t written;
3352
3353         written = cifs_get_writer(cinode);
3354         if (written)
3355                 return written;
3356
3357         if (CIFS_CACHE_WRITE(cinode)) {
3358                 if (cap_unix(tcon->ses) &&
3359                 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability))
3360                   && ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0)) {
3361                         written = generic_file_write_iter(iocb, from);
3362                         goto out;
3363                 }
3364                 written = cifs_writev(iocb, from);
3365                 goto out;
3366         }
3367         /*
3368          * For non-oplocked files in strict cache mode we need to write the data
3369          * to the server exactly from the pos to pos+len-1 rather than flush all
3370          * affected pages because it may cause a error with mandatory locks on
3371          * these pages but not on the region from pos to ppos+len-1.
3372          */
3373         written = cifs_user_writev(iocb, from);
3374         if (CIFS_CACHE_READ(cinode)) {
3375                 /*
3376                  * We have read level caching and we have just sent a write
3377                  * request to the server thus making data in the cache stale.
3378                  * Zap the cache and set oplock/lease level to NONE to avoid
3379                  * reading stale data from the cache. All subsequent read
3380                  * operations will read new data from the server.
3381                  */
3382                 cifs_zap_mapping(inode);
3383                 cifs_dbg(FYI, "Set Oplock/Lease to NONE for inode=%p after write\n",
3384                          inode);
3385                 cinode->oplock = 0;
3386         }
3387 out:
3388         cifs_put_writer(cinode);
3389         return written;
3390 }
3391
3392 static struct cifs_readdata *
3393 cifs_readdata_direct_alloc(struct page **pages, work_func_t complete)
3394 {
3395         struct cifs_readdata *rdata;
3396
3397         rdata = kzalloc(sizeof(*rdata), GFP_KERNEL);
3398         if (rdata != NULL) {
3399                 rdata->pages = pages;
3400                 kref_init(&rdata->refcount);
3401                 INIT_LIST_HEAD(&rdata->list);
3402                 init_completion(&rdata->done);
3403                 INIT_WORK(&rdata->work, complete);
3404         }
3405
3406         return rdata;
3407 }
3408
3409 static struct cifs_readdata *
3410 cifs_readdata_alloc(unsigned int nr_pages, work_func_t complete)
3411 {
3412         struct page **pages =
3413                 kcalloc(nr_pages, sizeof(struct page *), GFP_KERNEL);
3414         struct cifs_readdata *ret = NULL;
3415
3416         if (pages) {
3417                 ret = cifs_readdata_direct_alloc(pages, complete);
3418                 if (!ret)
3419                         kfree(pages);
3420         }
3421
3422         return ret;
3423 }
3424
3425 void
3426 cifs_readdata_release(struct kref *refcount)
3427 {
3428         struct cifs_readdata *rdata = container_of(refcount,
3429                                         struct cifs_readdata, refcount);
3430 #ifdef CONFIG_CIFS_SMB_DIRECT
3431         if (rdata->mr) {
3432                 smbd_deregister_mr(rdata->mr);
3433                 rdata->mr = NULL;
3434         }
3435 #endif
3436         if (rdata->cfile)
3437                 cifsFileInfo_put(rdata->cfile);
3438
3439         kvfree(rdata->pages);
3440         kfree(rdata);
3441 }
3442
3443 static int
3444 cifs_read_allocate_pages(struct cifs_readdata *rdata, unsigned int nr_pages)
3445 {
3446         int rc = 0;
3447         struct page *page;
3448         unsigned int i;
3449
3450         for (i = 0; i < nr_pages; i++) {
3451                 page = alloc_page(GFP_KERNEL|__GFP_HIGHMEM);
3452                 if (!page) {
3453                         rc = -ENOMEM;
3454                         break;
3455                 }
3456                 rdata->pages[i] = page;
3457         }
3458
3459         if (rc) {
3460                 unsigned int nr_page_failed = i;
3461
3462                 for (i = 0; i < nr_page_failed; i++) {
3463                         put_page(rdata->pages[i]);
3464                         rdata->pages[i] = NULL;
3465                 }
3466         }
3467         return rc;
3468 }
3469
3470 static void
3471 cifs_uncached_readdata_release(struct kref *refcount)
3472 {
3473         struct cifs_readdata *rdata = container_of(refcount,
3474                                         struct cifs_readdata, refcount);
3475         unsigned int i;
3476
3477         kref_put(&rdata->ctx->refcount, cifs_aio_ctx_release);
3478         for (i = 0; i < rdata->nr_pages; i++) {
3479                 put_page(rdata->pages[i]);
3480         }
3481         cifs_readdata_release(refcount);
3482 }
3483
3484 /**
3485  * cifs_readdata_to_iov - copy data from pages in response to an iovec
3486  * @rdata:      the readdata response with list of pages holding data
3487  * @iter:       destination for our data
3488  *
3489  * This function copies data from a list of pages in a readdata response into
3490  * an array of iovecs. It will first calculate where the data should go
3491  * based on the info in the readdata and then copy the data into that spot.
3492  */
3493 static int
3494 cifs_readdata_to_iov(struct cifs_readdata *rdata, struct iov_iter *iter)
3495 {
3496         size_t remaining = rdata->got_bytes;
3497         unsigned int i;
3498
3499         for (i = 0; i < rdata->nr_pages; i++) {
3500                 struct page *page = rdata->pages[i];
3501                 size_t copy = min_t(size_t, remaining, PAGE_SIZE);
3502                 size_t written;
3503
3504                 if (unlikely(iov_iter_is_pipe(iter))) {
3505                         void *addr = kmap_atomic(page);
3506
3507                         written = copy_to_iter(addr, copy, iter);
3508                         kunmap_atomic(addr);
3509                 } else
3510                         written = copy_page_to_iter(page, 0, copy, iter);
3511                 remaining -= written;
3512                 if (written < copy && iov_iter_count(iter) > 0)
3513                         break;
3514         }
3515         return remaining ? -EFAULT : 0;
3516 }
3517
3518 static void collect_uncached_read_data(struct cifs_aio_ctx *ctx);
3519
3520 static void
3521 cifs_uncached_readv_complete(struct work_struct *work)
3522 {
3523         struct cifs_readdata *rdata = container_of(work,
3524                                                 struct cifs_readdata, work);
3525
3526         complete(&rdata->done);
3527         collect_uncached_read_data(rdata->ctx);
3528         /* the below call can possibly free the last ref to aio ctx */
3529         kref_put(&rdata->refcount, cifs_uncached_readdata_release);
3530 }
3531
3532 static int
3533 uncached_fill_pages(struct TCP_Server_Info *server,
3534                     struct cifs_readdata *rdata, struct iov_iter *iter,
3535                     unsigned int len)
3536 {
3537         int result = 0;
3538         unsigned int i;
3539         unsigned int nr_pages = rdata->nr_pages;
3540         unsigned int page_offset = rdata->page_offset;
3541
3542         rdata->got_bytes = 0;
3543         rdata->tailsz = PAGE_SIZE;
3544         for (i = 0; i < nr_pages; i++) {
3545                 struct page *page = rdata->pages[i];
3546                 size_t n;
3547                 unsigned int segment_size = rdata->pagesz;
3548
3549                 if (i == 0)
3550                         segment_size -= page_offset;
3551                 else
3552                         page_offset = 0;
3553
3554
3555                 if (len <= 0) {
3556                         /* no need to hold page hostage */
3557                         rdata->pages[i] = NULL;
3558                         rdata->nr_pages--;
3559                         put_page(page);
3560                         continue;
3561                 }
3562
3563                 n = len;
3564                 if (len >= segment_size)
3565                         /* enough data to fill the page */
3566                         n = segment_size;
3567                 else
3568                         rdata->tailsz = len;
3569                 len -= n;
3570
3571                 if (iter)
3572                         result = copy_page_from_iter(
3573                                         page, page_offset, n, iter);
3574 #ifdef CONFIG_CIFS_SMB_DIRECT
3575                 else if (rdata->mr)
3576                         result = n;
3577 #endif
3578                 else
3579                         result = cifs_read_page_from_socket(
3580                                         server, page, page_offset, n);
3581                 if (result < 0)
3582                         break;
3583
3584                 rdata->got_bytes += result;
3585         }
3586
3587         return rdata->got_bytes > 0 && result != -ECONNABORTED ?
3588                                                 rdata->got_bytes : result;
3589 }
3590
3591 static int
3592 cifs_uncached_read_into_pages(struct TCP_Server_Info *server,
3593                               struct cifs_readdata *rdata, unsigned int len)
3594 {
3595         return uncached_fill_pages(server, rdata, NULL, len);
3596 }
3597
3598 static int
3599 cifs_uncached_copy_into_pages(struct TCP_Server_Info *server,
3600                               struct cifs_readdata *rdata,
3601                               struct iov_iter *iter)
3602 {
3603         return uncached_fill_pages(server, rdata, iter, iter->count);
3604 }
3605
3606 static int cifs_resend_rdata(struct cifs_readdata *rdata,
3607                         struct list_head *rdata_list,
3608                         struct cifs_aio_ctx *ctx)
3609 {
3610         unsigned int rsize;
3611         struct cifs_credits credits;
3612         int rc;
3613         struct TCP_Server_Info *server;
3614
3615         /* XXX: should we pick a new channel here? */
3616         server = rdata->server;
3617
3618         do {
3619                 if (rdata->cfile->invalidHandle) {
3620                         rc = cifs_reopen_file(rdata->cfile, true);
3621                         if (rc == -EAGAIN)
3622                                 continue;
3623                         else if (rc)
3624                                 break;
3625                 }
3626
3627                 /*
3628                  * Wait for credits to resend this rdata.
3629                  * Note: we are attempting to resend the whole rdata not in
3630                  * segments
3631                  */
3632                 do {
3633                         rc = server->ops->wait_mtu_credits(server, rdata->bytes,
3634                                                 &rsize, &credits);
3635
3636                         if (rc)
3637                                 goto fail;
3638
3639                         if (rsize < rdata->bytes) {
3640                                 add_credits_and_wake_if(server, &credits, 0);
3641                                 msleep(1000);
3642                         }
3643                 } while (rsize < rdata->bytes);
3644                 rdata->credits = credits;
3645
3646                 rc = adjust_credits(server, &rdata->credits, rdata->bytes);
3647                 if (!rc) {
3648                         if (rdata->cfile->invalidHandle)
3649                                 rc = -EAGAIN;
3650                         else {
3651 #ifdef CONFIG_CIFS_SMB_DIRECT
3652                                 if (rdata->mr) {
3653                                         rdata->mr->need_invalidate = true;
3654                                         smbd_deregister_mr(rdata->mr);
3655                                         rdata->mr = NULL;
3656                                 }
3657 #endif
3658                                 rc = server->ops->async_readv(rdata);
3659                         }
3660                 }
3661
3662                 /* If the read was successfully sent, we are done */
3663                 if (!rc) {
3664                         /* Add to aio pending list */
3665                         list_add_tail(&rdata->list, rdata_list);
3666                         return 0;
3667                 }
3668
3669                 /* Roll back credits and retry if needed */
3670                 add_credits_and_wake_if(server, &rdata->credits, 0);
3671         } while (rc == -EAGAIN);
3672
3673 fail:
3674         kref_put(&rdata->refcount, cifs_uncached_readdata_release);
3675         return rc;
3676 }
3677
3678 static int
3679 cifs_send_async_read(loff_t offset, size_t len, struct cifsFileInfo *open_file,
3680                      struct cifs_sb_info *cifs_sb, struct list_head *rdata_list,
3681                      struct cifs_aio_ctx *ctx)
3682 {
3683         struct cifs_readdata *rdata;
3684         unsigned int npages, rsize;
3685         struct cifs_credits credits_on_stack;
3686         struct cifs_credits *credits = &credits_on_stack;
3687         size_t cur_len;
3688         int rc;
3689         pid_t pid;
3690         struct TCP_Server_Info *server;
3691         struct page **pagevec;
3692         size_t start;
3693         struct iov_iter direct_iov = ctx->iter;
3694
3695         server = cifs_pick_channel(tlink_tcon(open_file->tlink)->ses);
3696
3697         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
3698                 pid = open_file->pid;
3699         else
3700                 pid = current->tgid;
3701
3702         if (ctx->direct_io)
3703                 iov_iter_advance(&direct_iov, offset - ctx->pos);
3704
3705         do {
3706                 if (open_file->invalidHandle) {
3707                         rc = cifs_reopen_file(open_file, true);
3708                         if (rc == -EAGAIN)
3709                                 continue;
3710                         else if (rc)
3711                                 break;
3712                 }
3713
3714                 rc = server->ops->wait_mtu_credits(server, cifs_sb->ctx->rsize,
3715                                                    &rsize, credits);
3716                 if (rc)
3717                         break;
3718
3719                 cur_len = min_t(const size_t, len, rsize);
3720
3721                 if (ctx->direct_io) {
3722                         ssize_t result;
3723
3724                         result = iov_iter_get_pages_alloc(
3725                                         &direct_iov, &pagevec,
3726                                         cur_len, &start);
3727                         if (result < 0) {
3728                                 cifs_dbg(VFS,
3729                                          "Couldn't get user pages (rc=%zd) iter type %d iov_offset %zd count %zd\n",
3730                                          result, iov_iter_type(&direct_iov),
3731                                          direct_iov.iov_offset,
3732                                          direct_iov.count);
3733                                 dump_stack();
3734
3735                                 rc = result;
3736                                 add_credits_and_wake_if(server, credits, 0);
3737                                 break;
3738                         }
3739                         cur_len = (size_t)result;
3740                         iov_iter_advance(&direct_iov, cur_len);
3741
3742                         rdata = cifs_readdata_direct_alloc(
3743                                         pagevec, cifs_uncached_readv_complete);
3744                         if (!rdata) {
3745                                 add_credits_and_wake_if(server, credits, 0);
3746                                 rc = -ENOMEM;
3747                                 break;
3748                         }
3749
3750                         npages = (cur_len + start + PAGE_SIZE-1) / PAGE_SIZE;
3751                         rdata->page_offset = start;
3752                         rdata->tailsz = npages > 1 ?
3753                                 cur_len-(PAGE_SIZE-start)-(npages-2)*PAGE_SIZE :
3754                                 cur_len;
3755
3756                 } else {
3757
3758                         npages = DIV_ROUND_UP(cur_len, PAGE_SIZE);
3759                         /* allocate a readdata struct */
3760                         rdata = cifs_readdata_alloc(npages,
3761                                             cifs_uncached_readv_complete);
3762                         if (!rdata) {
3763                                 add_credits_and_wake_if(server, credits, 0);
3764                                 rc = -ENOMEM;
3765                                 break;
3766                         }
3767
3768                         rc = cifs_read_allocate_pages(rdata, npages);
3769                         if (rc) {
3770                                 kvfree(rdata->pages);
3771                                 kfree(rdata);
3772                                 add_credits_and_wake_if(server, credits, 0);
3773                                 break;
3774                         }
3775
3776                         rdata->tailsz = PAGE_SIZE;
3777                 }
3778
3779                 rdata->server = server;
3780                 rdata->cfile = cifsFileInfo_get(open_file);
3781                 rdata->nr_pages = npages;
3782                 rdata->offset = offset;
3783                 rdata->bytes = cur_len;
3784                 rdata->pid = pid;
3785                 rdata->pagesz = PAGE_SIZE;
3786                 rdata->read_into_pages = cifs_uncached_read_into_pages;
3787                 rdata->copy_into_pages = cifs_uncached_copy_into_pages;
3788                 rdata->credits = credits_on_stack;
3789                 rdata->ctx = ctx;
3790                 kref_get(&ctx->refcount);
3791
3792                 rc = adjust_credits(server, &rdata->credits, rdata->bytes);
3793
3794                 if (!rc) {
3795                         if (rdata->cfile->invalidHandle)
3796                                 rc = -EAGAIN;
3797                         else
3798                                 rc = server->ops->async_readv(rdata);
3799                 }
3800
3801                 if (rc) {
3802                         add_credits_and_wake_if(server, &rdata->credits, 0);
3803                         kref_put(&rdata->refcount,
3804                                 cifs_uncached_readdata_release);
3805                         if (rc == -EAGAIN) {
3806                                 iov_iter_revert(&direct_iov, cur_len);
3807                                 continue;
3808                         }
3809                         break;
3810                 }
3811
3812                 list_add_tail(&rdata->list, rdata_list);
3813                 offset += cur_len;
3814                 len -= cur_len;
3815         } while (len > 0);
3816
3817         return rc;
3818 }
3819
3820 static void
3821 collect_uncached_read_data(struct cifs_aio_ctx *ctx)
3822 {
3823         struct cifs_readdata *rdata, *tmp;
3824         struct iov_iter *to = &ctx->iter;
3825         struct cifs_sb_info *cifs_sb;
3826         int rc;
3827
3828         cifs_sb = CIFS_SB(ctx->cfile->dentry->d_sb);
3829
3830         mutex_lock(&ctx->aio_mutex);
3831
3832         if (list_empty(&ctx->list)) {
3833                 mutex_unlock(&ctx->aio_mutex);
3834                 return;
3835         }
3836
3837         rc = ctx->rc;
3838         /* the loop below should proceed in the order of increasing offsets */
3839 again:
3840         list_for_each_entry_safe(rdata, tmp, &ctx->list, list) {
3841                 if (!rc) {
3842                         if (!try_wait_for_completion(&rdata->done)) {
3843                                 mutex_unlock(&ctx->aio_mutex);
3844                                 return;
3845                         }
3846
3847                         if (rdata->result == -EAGAIN) {
3848                                 /* resend call if it's a retryable error */
3849                                 struct list_head tmp_list;
3850                                 unsigned int got_bytes = rdata->got_bytes;
3851
3852                                 list_del_init(&rdata->list);
3853                                 INIT_LIST_HEAD(&tmp_list);
3854
3855                                 /*
3856                                  * Got a part of data and then reconnect has
3857                                  * happened -- fill the buffer and continue
3858                                  * reading.
3859                                  */
3860                                 if (got_bytes && got_bytes < rdata->bytes) {
3861                                         rc = 0;
3862                                         if (!ctx->direct_io)
3863                                                 rc = cifs_readdata_to_iov(rdata, to);
3864                                         if (rc) {
3865                                                 kref_put(&rdata->refcount,
3866                                                         cifs_uncached_readdata_release);
3867                                                 continue;
3868                                         }
3869                                 }
3870
3871                                 if (ctx->direct_io) {
3872                                         /*
3873                                          * Re-use rdata as this is a
3874                                          * direct I/O
3875                                          */
3876                                         rc = cifs_resend_rdata(
3877                                                 rdata,
3878                                                 &tmp_list, ctx);
3879                                 } else {
3880                                         rc = cifs_send_async_read(
3881                                                 rdata->offset + got_bytes,
3882                                                 rdata->bytes - got_bytes,
3883                                                 rdata->cfile, cifs_sb,
3884                                                 &tmp_list, ctx);
3885
3886                                         kref_put(&rdata->refcount,
3887                                                 cifs_uncached_readdata_release);
3888                                 }
3889
3890                                 list_splice(&tmp_list, &ctx->list);
3891
3892                                 goto again;
3893                         } else if (rdata->result)
3894                                 rc = rdata->result;
3895                         else if (!ctx->direct_io)
3896                                 rc = cifs_readdata_to_iov(rdata, to);
3897
3898                         /* if there was a short read -- discard anything left */
3899                         if (rdata->got_bytes && rdata->got_bytes < rdata->bytes)
3900                                 rc = -ENODATA;
3901
3902                         ctx->total_len += rdata->got_bytes;
3903                 }
3904                 list_del_init(&rdata->list);
3905                 kref_put(&rdata->refcount, cifs_uncached_readdata_release);
3906         }
3907
3908         if (!ctx->direct_io)
3909                 ctx->total_len = ctx->len - iov_iter_count(to);
3910
3911         /* mask nodata case */
3912         if (rc == -ENODATA)
3913                 rc = 0;
3914
3915         ctx->rc = (rc == 0) ? (ssize_t)ctx->total_len : rc;
3916
3917         mutex_unlock(&ctx->aio_mutex);
3918
3919         if (ctx->iocb && ctx->iocb->ki_complete)
3920                 ctx->iocb->ki_complete(ctx->iocb, ctx->rc, 0);
3921         else
3922                 complete(&ctx->done);
3923 }
3924
3925 static ssize_t __cifs_readv(
3926         struct kiocb *iocb, struct iov_iter *to, bool direct)
3927 {
3928         size_t len;
3929         struct file *file = iocb->ki_filp;
3930         struct cifs_sb_info *cifs_sb;
3931         struct cifsFileInfo *cfile;
3932         struct cifs_tcon *tcon;
3933         ssize_t rc, total_read = 0;
3934         loff_t offset = iocb->ki_pos;
3935         struct cifs_aio_ctx *ctx;
3936
3937         /*
3938          * iov_iter_get_pages_alloc() doesn't work with ITER_KVEC,
3939          * fall back to data copy read path
3940          * this could be improved by getting pages directly in ITER_KVEC
3941          */
3942         if (direct && iov_iter_is_kvec(to)) {
3943                 cifs_dbg(FYI, "use non-direct cifs_user_readv for kvec I/O\n");
3944                 direct = false;
3945         }
3946
3947         len = iov_iter_count(to);
3948         if (!len)
3949                 return 0;
3950
3951         cifs_sb = CIFS_FILE_SB(file);
3952         cfile = file->private_data;
3953         tcon = tlink_tcon(cfile->tlink);
3954
3955         if (!tcon->ses->server->ops->async_readv)
3956                 return -ENOSYS;
3957
3958         if ((file->f_flags & O_ACCMODE) == O_WRONLY)
3959                 cifs_dbg(FYI, "attempting read on write only file instance\n");
3960
3961         ctx = cifs_aio_ctx_alloc();
3962         if (!ctx)
3963                 return -ENOMEM;
3964
3965         ctx->cfile = cifsFileInfo_get(cfile);
3966
3967         if (!is_sync_kiocb(iocb))
3968                 ctx->iocb = iocb;
3969
3970         if (iter_is_iovec(to))
3971                 ctx->should_dirty = true;
3972
3973         if (direct) {
3974                 ctx->pos = offset;
3975                 ctx->direct_io = true;
3976                 ctx->iter = *to;
3977                 ctx->len = len;
3978         } else {
3979                 rc = setup_aio_ctx_iter(ctx, to, READ);
3980                 if (rc) {
3981                         kref_put(&ctx->refcount, cifs_aio_ctx_release);
3982                         return rc;
3983                 }
3984                 len = ctx->len;
3985         }
3986
3987         /* grab a lock here due to read response handlers can access ctx */
3988         mutex_lock(&ctx->aio_mutex);
3989
3990         rc = cifs_send_async_read(offset, len, cfile, cifs_sb, &ctx->list, ctx);
3991
3992         /* if at least one read request send succeeded, then reset rc */
3993         if (!list_empty(&ctx->list))
3994                 rc = 0;
3995
3996         mutex_unlock(&ctx->aio_mutex);
3997
3998         if (rc) {
3999                 kref_put(&ctx->refcount, cifs_aio_ctx_release);
4000                 return rc;
4001         }
4002
4003         if (!is_sync_kiocb(iocb)) {
4004                 kref_put(&ctx->refcount, cifs_aio_ctx_release);
4005                 return -EIOCBQUEUED;
4006         }
4007
4008         rc = wait_for_completion_killable(&ctx->done);
4009         if (rc) {
4010                 mutex_lock(&ctx->aio_mutex);
4011                 ctx->rc = rc = -EINTR;
4012                 total_read = ctx->total_len;
4013                 mutex_unlock(&ctx->aio_mutex);
4014         } else {
4015                 rc = ctx->rc;
4016                 total_read = ctx->total_len;
4017         }
4018
4019         kref_put(&ctx->refcount, cifs_aio_ctx_release);
4020
4021         if (total_read) {
4022                 iocb->ki_pos += total_read;
4023                 return total_read;
4024         }
4025         return rc;
4026 }
4027
4028 ssize_t cifs_direct_readv(struct kiocb *iocb, struct iov_iter *to)
4029 {
4030         return __cifs_readv(iocb, to, true);
4031 }
4032
4033 ssize_t cifs_user_readv(struct kiocb *iocb, struct iov_iter *to)
4034 {
4035         return __cifs_readv(iocb, to, false);
4036 }
4037
4038 ssize_t
4039 cifs_strict_readv(struct kiocb *iocb, struct iov_iter *to)
4040 {
4041         struct inode *inode = file_inode(iocb->ki_filp);
4042         struct cifsInodeInfo *cinode = CIFS_I(inode);
4043         struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
4044         struct cifsFileInfo *cfile = (struct cifsFileInfo *)
4045                                                 iocb->ki_filp->private_data;
4046         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
4047         int rc = -EACCES;
4048
4049         /*
4050          * In strict cache mode we need to read from the server all the time
4051          * if we don't have level II oplock because the server can delay mtime
4052          * change - so we can't make a decision about inode invalidating.
4053          * And we can also fail with pagereading if there are mandatory locks
4054          * on pages affected by this read but not on the region from pos to
4055          * pos+len-1.
4056          */
4057         if (!CIFS_CACHE_READ(cinode))
4058                 return cifs_user_readv(iocb, to);
4059
4060         if (cap_unix(tcon->ses) &&
4061             (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
4062             ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
4063                 return generic_file_read_iter(iocb, to);
4064
4065         /*
4066          * We need to hold the sem to be sure nobody modifies lock list
4067          * with a brlock that prevents reading.
4068          */
4069         down_read(&cinode->lock_sem);
4070         if (!cifs_find_lock_conflict(cfile, iocb->ki_pos, iov_iter_count(to),
4071                                      tcon->ses->server->vals->shared_lock_type,
4072                                      0, NULL, CIFS_READ_OP))
4073                 rc = generic_file_read_iter(iocb, to);
4074         up_read(&cinode->lock_sem);
4075         return rc;
4076 }
4077
4078 static ssize_t
4079 cifs_read(struct file *file, char *read_data, size_t read_size, loff_t *offset)
4080 {
4081         int rc = -EACCES;
4082         unsigned int bytes_read = 0;
4083         unsigned int total_read;
4084         unsigned int current_read_size;
4085         unsigned int rsize;
4086         struct cifs_sb_info *cifs_sb;
4087         struct cifs_tcon *tcon;
4088         struct TCP_Server_Info *server;
4089         unsigned int xid;
4090         char *cur_offset;
4091         struct cifsFileInfo *open_file;
4092         struct cifs_io_parms io_parms = {0};
4093         int buf_type = CIFS_NO_BUFFER;
4094         __u32 pid;
4095
4096         xid = get_xid();
4097         cifs_sb = CIFS_FILE_SB(file);
4098
4099         /* FIXME: set up handlers for larger reads and/or convert to async */
4100         rsize = min_t(unsigned int, cifs_sb->ctx->rsize, CIFSMaxBufSize);
4101
4102         if (file->private_data == NULL) {
4103                 rc = -EBADF;
4104                 free_xid(xid);
4105                 return rc;
4106         }
4107         open_file = file->private_data;
4108         tcon = tlink_tcon(open_file->tlink);
4109         server = cifs_pick_channel(tcon->ses);
4110
4111         if (!server->ops->sync_read) {
4112                 free_xid(xid);
4113                 return -ENOSYS;
4114         }
4115
4116         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
4117                 pid = open_file->pid;
4118         else
4119                 pid = current->tgid;
4120
4121         if ((file->f_flags & O_ACCMODE) == O_WRONLY)
4122                 cifs_dbg(FYI, "attempting read on write only file instance\n");
4123
4124         for (total_read = 0, cur_offset = read_data; read_size > total_read;
4125              total_read += bytes_read, cur_offset += bytes_read) {
4126                 do {
4127                         current_read_size = min_t(uint, read_size - total_read,
4128                                                   rsize);
4129                         /*
4130                          * For windows me and 9x we do not want to request more
4131                          * than it negotiated since it will refuse the read
4132                          * then.
4133                          */
4134                         if (!(tcon->ses->capabilities &
4135                                 tcon->ses->server->vals->cap_large_files)) {
4136                                 current_read_size = min_t(uint,
4137                                         current_read_size, CIFSMaxBufSize);
4138                         }
4139                         if (open_file->invalidHandle) {
4140                                 rc = cifs_reopen_file(open_file, true);
4141                                 if (rc != 0)
4142                                         break;
4143                         }
4144                         io_parms.pid = pid;
4145                         io_parms.tcon = tcon;
4146                         io_parms.offset = *offset;
4147                         io_parms.length = current_read_size;
4148                         io_parms.server = server;
4149                         rc = server->ops->sync_read(xid, &open_file->fid, &io_parms,
4150                                                     &bytes_read, &cur_offset,
4151                                                     &buf_type);
4152                 } while (rc == -EAGAIN);
4153
4154                 if (rc || (bytes_read == 0)) {
4155                         if (total_read) {
4156                                 break;
4157                         } else {
4158                                 free_xid(xid);
4159                                 return rc;
4160                         }
4161                 } else {
4162                         cifs_stats_bytes_read(tcon, total_read);
4163                         *offset += bytes_read;
4164                 }
4165         }
4166         free_xid(xid);
4167         return total_read;
4168 }
4169
4170 /*
4171  * If the page is mmap'ed into a process' page tables, then we need to make
4172  * sure that it doesn't change while being written back.
4173  */
4174 static vm_fault_t
4175 cifs_page_mkwrite(struct vm_fault *vmf)
4176 {
4177         struct page *page = vmf->page;
4178         struct file *file = vmf->vma->vm_file;
4179         struct inode *inode = file_inode(file);
4180
4181         cifs_fscache_wait_on_page_write(inode, page);
4182
4183         lock_page(page);
4184         return VM_FAULT_LOCKED;
4185 }
4186
4187 static const struct vm_operations_struct cifs_file_vm_ops = {
4188         .fault = filemap_fault,
4189         .map_pages = filemap_map_pages,
4190         .page_mkwrite = cifs_page_mkwrite,
4191 };
4192
4193 int cifs_file_strict_mmap(struct file *file, struct vm_area_struct *vma)
4194 {
4195         int xid, rc = 0;
4196         struct inode *inode = file_inode(file);
4197
4198         xid = get_xid();
4199
4200         if (!CIFS_CACHE_READ(CIFS_I(inode)))
4201                 rc = cifs_zap_mapping(inode);
4202         if (!rc)
4203                 rc = generic_file_mmap(file, vma);
4204         if (!rc)
4205                 vma->vm_ops = &cifs_file_vm_ops;
4206
4207         free_xid(xid);
4208         return rc;
4209 }
4210
4211 int cifs_file_mmap(struct file *file, struct vm_area_struct *vma)
4212 {
4213         int rc, xid;
4214
4215         xid = get_xid();
4216
4217         rc = cifs_revalidate_file(file);
4218         if (rc)
4219                 cifs_dbg(FYI, "Validation prior to mmap failed, error=%d\n",
4220                          rc);
4221         if (!rc)
4222                 rc = generic_file_mmap(file, vma);
4223         if (!rc)
4224                 vma->vm_ops = &cifs_file_vm_ops;
4225
4226         free_xid(xid);
4227         return rc;
4228 }
4229
4230 static void
4231 cifs_readv_complete(struct work_struct *work)
4232 {
4233         unsigned int i, got_bytes;
4234         struct cifs_readdata *rdata = container_of(work,
4235                                                 struct cifs_readdata, work);
4236
4237         got_bytes = rdata->got_bytes;
4238         for (i = 0; i < rdata->nr_pages; i++) {
4239                 struct page *page = rdata->pages[i];
4240
4241                 lru_cache_add(page);
4242
4243                 if (rdata->result == 0 ||
4244                     (rdata->result == -EAGAIN && got_bytes)) {
4245                         flush_dcache_page(page);
4246                         SetPageUptodate(page);
4247                 } else
4248                         SetPageError(page);
4249
4250                 unlock_page(page);
4251
4252                 if (rdata->result == 0 ||
4253                     (rdata->result == -EAGAIN && got_bytes))
4254                         cifs_readpage_to_fscache(rdata->mapping->host, page);
4255                 else
4256                         cifs_fscache_uncache_page(rdata->mapping->host, page);
4257
4258                 got_bytes -= min_t(unsigned int, PAGE_SIZE, got_bytes);
4259
4260                 put_page(page);
4261                 rdata->pages[i] = NULL;
4262         }
4263         kref_put(&rdata->refcount, cifs_readdata_release);
4264 }
4265
4266 static int
4267 readpages_fill_pages(struct TCP_Server_Info *server,
4268                      struct cifs_readdata *rdata, struct iov_iter *iter,
4269                      unsigned int len)
4270 {
4271         int result = 0;
4272         unsigned int i;
4273         u64 eof;
4274         pgoff_t eof_index;
4275         unsigned int nr_pages = rdata->nr_pages;
4276         unsigned int page_offset = rdata->page_offset;
4277
4278         /* determine the eof that the server (probably) has */
4279         eof = CIFS_I(rdata->mapping->host)->server_eof;
4280         eof_index = eof ? (eof - 1) >> PAGE_SHIFT : 0;
4281         cifs_dbg(FYI, "eof=%llu eof_index=%lu\n", eof, eof_index);
4282
4283         rdata->got_bytes = 0;
4284         rdata->tailsz = PAGE_SIZE;
4285         for (i = 0; i < nr_pages; i++) {
4286                 struct page *page = rdata->pages[i];
4287                 unsigned int to_read = rdata->pagesz;
4288                 size_t n;
4289
4290                 if (i == 0)
4291                         to_read -= page_offset;
4292                 else
4293                         page_offset = 0;
4294
4295                 n = to_read;
4296
4297                 if (len >= to_read) {
4298                         len -= to_read;
4299                 } else if (len > 0) {
4300                         /* enough for partial page, fill and zero the rest */
4301                         zero_user(page, len + page_offset, to_read - len);
4302                         n = rdata->tailsz = len;
4303                         len = 0;
4304                 } else if (page->index > eof_index) {
4305                         /*
4306                          * The VFS will not try to do readahead past the
4307                          * i_size, but it's possible that we have outstanding
4308                          * writes with gaps in the middle and the i_size hasn't
4309                          * caught up yet. Populate those with zeroed out pages
4310                          * to prevent the VFS from repeatedly attempting to
4311                          * fill them until the writes are flushed.
4312                          */
4313                         zero_user(page, 0, PAGE_SIZE);
4314                         lru_cache_add(page);
4315                         flush_dcache_page(page);
4316                         SetPageUptodate(page);
4317                         unlock_page(page);
4318                         put_page(page);
4319                         rdata->pages[i] = NULL;
4320                         rdata->nr_pages--;
4321                         continue;
4322                 } else {
4323                         /* no need to hold page hostage */
4324                         lru_cache_add(page);
4325                         unlock_page(page);
4326                         put_page(page);
4327                         rdata->pages[i] = NULL;
4328                         rdata->nr_pages--;
4329                         continue;
4330                 }
4331
4332                 if (iter)
4333                         result = copy_page_from_iter(
4334                                         page, page_offset, n, iter);
4335 #ifdef CONFIG_CIFS_SMB_DIRECT
4336                 else if (rdata->mr)
4337                         result = n;
4338 #endif
4339                 else
4340                         result = cifs_read_page_from_socket(
4341                                         server, page, page_offset, n);
4342                 if (result < 0)
4343                         break;
4344
4345                 rdata->got_bytes += result;
4346         }
4347
4348         return rdata->got_bytes > 0 && result != -ECONNABORTED ?
4349                                                 rdata->got_bytes : result;
4350 }
4351
4352 static int
4353 cifs_readpages_read_into_pages(struct TCP_Server_Info *server,
4354                                struct cifs_readdata *rdata, unsigned int len)
4355 {
4356         return readpages_fill_pages(server, rdata, NULL, len);
4357 }
4358
4359 static int
4360 cifs_readpages_copy_into_pages(struct TCP_Server_Info *server,
4361                                struct cifs_readdata *rdata,
4362                                struct iov_iter *iter)
4363 {
4364         return readpages_fill_pages(server, rdata, iter, iter->count);
4365 }
4366
4367 static int
4368 readpages_get_pages(struct address_space *mapping, struct list_head *page_list,
4369                     unsigned int rsize, struct list_head *tmplist,
4370                     unsigned int *nr_pages, loff_t *offset, unsigned int *bytes)
4371 {
4372         struct page *page, *tpage;
4373         unsigned int expected_index;
4374         int rc;
4375         gfp_t gfp = readahead_gfp_mask(mapping);
4376
4377         INIT_LIST_HEAD(tmplist);
4378
4379         page = lru_to_page(page_list);
4380
4381         /*
4382          * Lock the page and put it in the cache. Since no one else
4383          * should have access to this page, we're safe to simply set
4384          * PG_locked without checking it first.
4385          */
4386         __SetPageLocked(page);
4387         rc = add_to_page_cache_locked(page, mapping,
4388                                       page->index, gfp);
4389
4390         /* give up if we can't stick it in the cache */
4391         if (rc) {
4392                 __ClearPageLocked(page);
4393                 return rc;
4394         }
4395
4396         /* move first page to the tmplist */
4397         *offset = (loff_t)page->index << PAGE_SHIFT;
4398         *bytes = PAGE_SIZE;
4399         *nr_pages = 1;
4400         list_move_tail(&page->lru, tmplist);
4401
4402         /* now try and add more pages onto the request */
4403         expected_index = page->index + 1;
4404         list_for_each_entry_safe_reverse(page, tpage, page_list, lru) {
4405                 /* discontinuity ? */
4406                 if (page->index != expected_index)
4407                         break;
4408
4409                 /* would this page push the read over the rsize? */
4410                 if (*bytes + PAGE_SIZE > rsize)
4411                         break;
4412
4413                 __SetPageLocked(page);
4414                 rc = add_to_page_cache_locked(page, mapping, page->index, gfp);
4415                 if (rc) {
4416                         __ClearPageLocked(page);
4417                         break;
4418                 }
4419                 list_move_tail(&page->lru, tmplist);
4420                 (*bytes) += PAGE_SIZE;
4421                 expected_index++;
4422                 (*nr_pages)++;
4423         }
4424         return rc;
4425 }
4426
4427 static int cifs_readpages(struct file *file, struct address_space *mapping,
4428         struct list_head *page_list, unsigned num_pages)
4429 {
4430         int rc;
4431         int err = 0;
4432         struct list_head tmplist;
4433         struct cifsFileInfo *open_file = file->private_data;
4434         struct cifs_sb_info *cifs_sb = CIFS_FILE_SB(file);
4435         struct TCP_Server_Info *server;
4436         pid_t pid;
4437         unsigned int xid;
4438
4439         xid = get_xid();
4440         /*
4441          * Reads as many pages as possible from fscache. Returns -ENOBUFS
4442          * immediately if the cookie is negative
4443          *
4444          * After this point, every page in the list might have PG_fscache set,
4445          * so we will need to clean that up off of every page we don't use.
4446          */
4447         rc = cifs_readpages_from_fscache(mapping->host, mapping, page_list,
4448                                          &num_pages);
4449         if (rc == 0) {
4450                 free_xid(xid);
4451                 return rc;
4452         }
4453
4454         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
4455                 pid = open_file->pid;
4456         else
4457                 pid = current->tgid;
4458
4459         rc = 0;
4460         server = cifs_pick_channel(tlink_tcon(open_file->tlink)->ses);
4461
4462         cifs_dbg(FYI, "%s: file=%p mapping=%p num_pages=%u\n",
4463                  __func__, file, mapping, num_pages);
4464
4465         /*
4466          * Start with the page at end of list and move it to private
4467          * list. Do the same with any following pages until we hit
4468          * the rsize limit, hit an index discontinuity, or run out of
4469          * pages. Issue the async read and then start the loop again
4470          * until the list is empty.
4471          *
4472          * Note that list order is important. The page_list is in
4473          * the order of declining indexes. When we put the pages in
4474          * the rdata->pages, then we want them in increasing order.
4475          */
4476         while (!list_empty(page_list) && !err) {
4477                 unsigned int i, nr_pages, bytes, rsize;
4478                 loff_t offset;
4479                 struct page *page, *tpage;
4480                 struct cifs_readdata *rdata;
4481                 struct cifs_credits credits_on_stack;
4482                 struct cifs_credits *credits = &credits_on_stack;
4483
4484                 if (open_file->invalidHandle) {
4485                         rc = cifs_reopen_file(open_file, true);
4486                         if (rc == -EAGAIN)
4487                                 continue;
4488                         else if (rc)
4489                                 break;
4490                 }
4491
4492                 rc = server->ops->wait_mtu_credits(server, cifs_sb->ctx->rsize,
4493                                                    &rsize, credits);
4494                 if (rc)
4495                         break;
4496
4497                 /*
4498                  * Give up immediately if rsize is too small to read an entire
4499                  * page. The VFS will fall back to readpage. We should never
4500                  * reach this point however since we set ra_pages to 0 when the
4501                  * rsize is smaller than a cache page.
4502                  */
4503                 if (unlikely(rsize < PAGE_SIZE)) {
4504                         add_credits_and_wake_if(server, credits, 0);
4505                         free_xid(xid);
4506                         return 0;
4507                 }
4508
4509                 nr_pages = 0;
4510                 err = readpages_get_pages(mapping, page_list, rsize, &tmplist,
4511                                          &nr_pages, &offset, &bytes);
4512                 if (!nr_pages) {
4513                         add_credits_and_wake_if(server, credits, 0);
4514                         break;
4515                 }
4516
4517                 rdata = cifs_readdata_alloc(nr_pages, cifs_readv_complete);
4518                 if (!rdata) {
4519                         /* best to give up if we're out of mem */
4520                         list_for_each_entry_safe(page, tpage, &tmplist, lru) {
4521                                 list_del(&page->lru);
4522                                 lru_cache_add(page);
4523                                 unlock_page(page);
4524                                 put_page(page);
4525                         }
4526                         rc = -ENOMEM;
4527                         add_credits_and_wake_if(server, credits, 0);
4528                         break;
4529                 }
4530
4531                 rdata->cfile = cifsFileInfo_get(open_file);
4532                 rdata->server = server;
4533                 rdata->mapping = mapping;
4534                 rdata->offset = offset;
4535                 rdata->bytes = bytes;
4536                 rdata->pid = pid;
4537                 rdata->pagesz = PAGE_SIZE;
4538                 rdata->tailsz = PAGE_SIZE;
4539                 rdata->read_into_pages = cifs_readpages_read_into_pages;
4540                 rdata->copy_into_pages = cifs_readpages_copy_into_pages;
4541                 rdata->credits = credits_on_stack;
4542
4543                 list_for_each_entry_safe(page, tpage, &tmplist, lru) {
4544                         list_del(&page->lru);
4545                         rdata->pages[rdata->nr_pages++] = page;
4546                 }
4547
4548                 rc = adjust_credits(server, &rdata->credits, rdata->bytes);
4549
4550                 if (!rc) {
4551                         if (rdata->cfile->invalidHandle)
4552                                 rc = -EAGAIN;
4553                         else
4554                                 rc = server->ops->async_readv(rdata);
4555                 }
4556
4557                 if (rc) {
4558                         add_credits_and_wake_if(server, &rdata->credits, 0);
4559                         for (i = 0; i < rdata->nr_pages; i++) {
4560                                 page = rdata->pages[i];
4561                                 lru_cache_add(page);
4562                                 unlock_page(page);
4563                                 put_page(page);
4564                         }
4565                         /* Fallback to the readpage in error/reconnect cases */
4566                         kref_put(&rdata->refcount, cifs_readdata_release);
4567                         break;
4568                 }
4569
4570                 kref_put(&rdata->refcount, cifs_readdata_release);
4571         }
4572
4573         /* Any pages that have been shown to fscache but didn't get added to
4574          * the pagecache must be uncached before they get returned to the
4575          * allocator.
4576          */
4577         cifs_fscache_readpages_cancel(mapping->host, page_list);
4578         free_xid(xid);
4579         return rc;
4580 }
4581
4582 /*
4583  * cifs_readpage_worker must be called with the page pinned
4584  */
4585 static int cifs_readpage_worker(struct file *file, struct page *page,
4586         loff_t *poffset)
4587 {
4588         char *read_data;
4589         int rc;
4590
4591         /* Is the page cached? */
4592         rc = cifs_readpage_from_fscache(file_inode(file), page);
4593         if (rc == 0)
4594                 goto read_complete;
4595
4596         read_data = kmap(page);
4597         /* for reads over a certain size could initiate async read ahead */
4598
4599         rc = cifs_read(file, read_data, PAGE_SIZE, poffset);
4600
4601         if (rc < 0)
4602                 goto io_error;
4603         else
4604                 cifs_dbg(FYI, "Bytes read %d\n", rc);
4605
4606         /* we do not want atime to be less than mtime, it broke some apps */
4607         file_inode(file)->i_atime = current_time(file_inode(file));
4608         if (timespec64_compare(&(file_inode(file)->i_atime), &(file_inode(file)->i_mtime)))
4609                 file_inode(file)->i_atime = file_inode(file)->i_mtime;
4610         else
4611                 file_inode(file)->i_atime = current_time(file_inode(file));
4612
4613         if (PAGE_SIZE > rc)
4614                 memset(read_data + rc, 0, PAGE_SIZE - rc);
4615
4616         flush_dcache_page(page);
4617         SetPageUptodate(page);
4618
4619         /* send this page to the cache */
4620         cifs_readpage_to_fscache(file_inode(file), page);
4621
4622         rc = 0;
4623
4624 io_error:
4625         kunmap(page);
4626         unlock_page(page);
4627
4628 read_complete:
4629         return rc;
4630 }
4631
4632 static int cifs_readpage(struct file *file, struct page *page)
4633 {
4634         loff_t offset = page_file_offset(page);
4635         int rc = -EACCES;
4636         unsigned int xid;
4637
4638         xid = get_xid();
4639
4640         if (file->private_data == NULL) {
4641                 rc = -EBADF;
4642                 free_xid(xid);
4643                 return rc;
4644         }
4645
4646         cifs_dbg(FYI, "readpage %p at offset %d 0x%x\n",
4647                  page, (int)offset, (int)offset);
4648
4649         rc = cifs_readpage_worker(file, page, &offset);
4650
4651         free_xid(xid);
4652         return rc;
4653 }
4654
4655 static int is_inode_writable(struct cifsInodeInfo *cifs_inode)
4656 {
4657         struct cifsFileInfo *open_file;
4658
4659         spin_lock(&cifs_inode->open_file_lock);
4660         list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
4661                 if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) {
4662                         spin_unlock(&cifs_inode->open_file_lock);
4663                         return 1;
4664                 }
4665         }
4666         spin_unlock(&cifs_inode->open_file_lock);
4667         return 0;
4668 }
4669
4670 /* We do not want to update the file size from server for inodes
4671    open for write - to avoid races with writepage extending
4672    the file - in the future we could consider allowing
4673    refreshing the inode only on increases in the file size
4674    but this is tricky to do without racing with writebehind
4675    page caching in the current Linux kernel design */
4676 bool is_size_safe_to_change(struct cifsInodeInfo *cifsInode, __u64 end_of_file)
4677 {
4678         if (!cifsInode)
4679                 return true;
4680
4681         if (is_inode_writable(cifsInode)) {
4682                 /* This inode is open for write at least once */
4683                 struct cifs_sb_info *cifs_sb;
4684
4685                 cifs_sb = CIFS_SB(cifsInode->vfs_inode.i_sb);
4686                 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DIRECT_IO) {
4687                         /* since no page cache to corrupt on directio
4688                         we can change size safely */
4689                         return true;
4690                 }
4691
4692                 if (i_size_read(&cifsInode->vfs_inode) < end_of_file)
4693                         return true;
4694
4695                 return false;
4696         } else
4697                 return true;
4698 }
4699
4700 static int cifs_write_begin(struct file *file, struct address_space *mapping,
4701                         loff_t pos, unsigned len, unsigned flags,
4702                         struct page **pagep, void **fsdata)
4703 {
4704         int oncethru = 0;
4705         pgoff_t index = pos >> PAGE_SHIFT;
4706         loff_t offset = pos & (PAGE_SIZE - 1);
4707         loff_t page_start = pos & PAGE_MASK;
4708         loff_t i_size;
4709         struct page *page;
4710         int rc = 0;
4711
4712         cifs_dbg(FYI, "write_begin from %lld len %d\n", (long long)pos, len);
4713
4714 start:
4715         page = grab_cache_page_write_begin(mapping, index, flags);
4716         if (!page) {
4717                 rc = -ENOMEM;
4718                 goto out;
4719         }
4720
4721         if (PageUptodate(page))
4722                 goto out;
4723
4724         /*
4725          * If we write a full page it will be up to date, no need to read from
4726          * the server. If the write is short, we'll end up doing a sync write
4727          * instead.
4728          */
4729         if (len == PAGE_SIZE)
4730                 goto out;
4731
4732         /*
4733          * optimize away the read when we have an oplock, and we're not
4734          * expecting to use any of the data we'd be reading in. That
4735          * is, when the page lies beyond the EOF, or straddles the EOF
4736          * and the write will cover all of the existing data.
4737          */
4738         if (CIFS_CACHE_READ(CIFS_I(mapping->host))) {
4739                 i_size = i_size_read(mapping->host);
4740                 if (page_start >= i_size ||
4741                     (offset == 0 && (pos + len) >= i_size)) {
4742                         zero_user_segments(page, 0, offset,
4743                                            offset + len,
4744                                            PAGE_SIZE);
4745                         /*
4746                          * PageChecked means that the parts of the page
4747                          * to which we're not writing are considered up
4748                          * to date. Once the data is copied to the
4749                          * page, it can be set uptodate.
4750                          */
4751                         SetPageChecked(page);
4752                         goto out;
4753                 }
4754         }
4755
4756         if ((file->f_flags & O_ACCMODE) != O_WRONLY && !oncethru) {
4757                 /*
4758                  * might as well read a page, it is fast enough. If we get
4759                  * an error, we don't need to return it. cifs_write_end will
4760                  * do a sync write instead since PG_uptodate isn't set.
4761                  */
4762                 cifs_readpage_worker(file, page, &page_start);
4763                 put_page(page);
4764                 oncethru = 1;
4765                 goto start;
4766         } else {
4767                 /* we could try using another file handle if there is one -
4768                    but how would we lock it to prevent close of that handle
4769                    racing with this read? In any case
4770                    this will be written out by write_end so is fine */
4771         }
4772 out:
4773         *pagep = page;
4774         return rc;
4775 }
4776
4777 static int cifs_release_page(struct page *page, gfp_t gfp)
4778 {
4779         if (PagePrivate(page))
4780                 return 0;
4781
4782         return cifs_fscache_release_page(page, gfp);
4783 }
4784
4785 static void cifs_invalidate_page(struct page *page, unsigned int offset,
4786                                  unsigned int length)
4787 {
4788         struct cifsInodeInfo *cifsi = CIFS_I(page->mapping->host);
4789
4790         if (offset == 0 && length == PAGE_SIZE)
4791                 cifs_fscache_invalidate_page(page, &cifsi->vfs_inode);
4792 }
4793
4794 static int cifs_launder_page(struct page *page)
4795 {
4796         int rc = 0;
4797         loff_t range_start = page_offset(page);
4798         loff_t range_end = range_start + (loff_t)(PAGE_SIZE - 1);
4799         struct writeback_control wbc = {
4800                 .sync_mode = WB_SYNC_ALL,
4801                 .nr_to_write = 0,
4802                 .range_start = range_start,
4803                 .range_end = range_end,
4804         };
4805
4806         cifs_dbg(FYI, "Launder page: %p\n", page);
4807
4808         if (clear_page_dirty_for_io(page))
4809                 rc = cifs_writepage_locked(page, &wbc);
4810
4811         cifs_fscache_invalidate_page(page, page->mapping->host);
4812         return rc;
4813 }
4814
4815 void cifs_oplock_break(struct work_struct *work)
4816 {
4817         struct cifsFileInfo *cfile = container_of(work, struct cifsFileInfo,
4818                                                   oplock_break);
4819         struct inode *inode = d_inode(cfile->dentry);
4820         struct cifsInodeInfo *cinode = CIFS_I(inode);
4821         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
4822         struct TCP_Server_Info *server = tcon->ses->server;
4823         int rc = 0;
4824         bool purge_cache = false;
4825         bool is_deferred = false;
4826         struct cifs_deferred_close *dclose;
4827
4828         wait_on_bit(&cinode->flags, CIFS_INODE_PENDING_WRITERS,
4829                         TASK_UNINTERRUPTIBLE);
4830
4831         server->ops->downgrade_oplock(server, cinode, cfile->oplock_level,
4832                                       cfile->oplock_epoch, &purge_cache);
4833
4834         if (!CIFS_CACHE_WRITE(cinode) && CIFS_CACHE_READ(cinode) &&
4835                                                 cifs_has_mand_locks(cinode)) {
4836                 cifs_dbg(FYI, "Reset oplock to None for inode=%p due to mand locks\n",
4837                          inode);
4838                 cinode->oplock = 0;
4839         }
4840
4841         if (inode && S_ISREG(inode->i_mode)) {
4842                 if (CIFS_CACHE_READ(cinode))
4843                         break_lease(inode, O_RDONLY);
4844                 else
4845                         break_lease(inode, O_WRONLY);
4846                 rc = filemap_fdatawrite(inode->i_mapping);
4847                 if (!CIFS_CACHE_READ(cinode) || purge_cache) {
4848                         rc = filemap_fdatawait(inode->i_mapping);
4849                         mapping_set_error(inode->i_mapping, rc);
4850                         cifs_zap_mapping(inode);
4851                 }
4852                 cifs_dbg(FYI, "Oplock flush inode %p rc %d\n", inode, rc);
4853                 if (CIFS_CACHE_WRITE(cinode))
4854                         goto oplock_break_ack;
4855         }
4856
4857         rc = cifs_push_locks(cfile);
4858         if (rc)
4859                 cifs_dbg(VFS, "Push locks rc = %d\n", rc);
4860
4861 oplock_break_ack:
4862         /*
4863          * When oplock break is received and there are no active
4864          * file handles but cached, then schedule deferred close immediately.
4865          * So, new open will not use cached handle.
4866          */
4867         spin_lock(&CIFS_I(inode)->deferred_lock);
4868         is_deferred = cifs_is_deferred_close(cfile, &dclose);
4869         spin_unlock(&CIFS_I(inode)->deferred_lock);
4870         if (is_deferred &&
4871             cfile->deferred_close_scheduled &&
4872             delayed_work_pending(&cfile->deferred)) {
4873                 if (cancel_delayed_work(&cfile->deferred)) {
4874                         _cifsFileInfo_put(cfile, false, false);
4875                         goto oplock_break_done;
4876                 }
4877         }
4878         /*
4879          * releasing stale oplock after recent reconnect of smb session using
4880          * a now incorrect file handle is not a data integrity issue but do
4881          * not bother sending an oplock release if session to server still is
4882          * disconnected since oplock already released by the server
4883          */
4884         if (!cfile->oplock_break_cancelled) {
4885                 rc = tcon->ses->server->ops->oplock_response(tcon, &cfile->fid,
4886                                                              cinode);
4887                 cifs_dbg(FYI, "Oplock release rc = %d\n", rc);
4888         }
4889 oplock_break_done:
4890         _cifsFileInfo_put(cfile, false /* do not wait for ourself */, false);
4891         cifs_done_oplock_break(cinode);
4892 }
4893
4894 /*
4895  * The presence of cifs_direct_io() in the address space ops vector
4896  * allowes open() O_DIRECT flags which would have failed otherwise.
4897  *
4898  * In the non-cached mode (mount with cache=none), we shunt off direct read and write requests
4899  * so this method should never be called.
4900  *
4901  * Direct IO is not yet supported in the cached mode. 
4902  */
4903 static ssize_t
4904 cifs_direct_io(struct kiocb *iocb, struct iov_iter *iter)
4905 {
4906         /*
4907          * FIXME
4908          * Eventually need to support direct IO for non forcedirectio mounts
4909          */
4910         return -EINVAL;
4911 }
4912
4913 static int cifs_swap_activate(struct swap_info_struct *sis,
4914                               struct file *swap_file, sector_t *span)
4915 {
4916         struct cifsFileInfo *cfile = swap_file->private_data;
4917         struct inode *inode = swap_file->f_mapping->host;
4918         unsigned long blocks;
4919         long long isize;
4920
4921         cifs_dbg(FYI, "swap activate\n");
4922
4923         spin_lock(&inode->i_lock);
4924         blocks = inode->i_blocks;
4925         isize = inode->i_size;
4926         spin_unlock(&inode->i_lock);
4927         if (blocks*512 < isize) {
4928                 pr_warn("swap activate: swapfile has holes\n");
4929                 return -EINVAL;
4930         }
4931         *span = sis->pages;
4932
4933         pr_warn_once("Swap support over SMB3 is experimental\n");
4934
4935         /*
4936          * TODO: consider adding ACL (or documenting how) to prevent other
4937          * users (on this or other systems) from reading it
4938          */
4939
4940
4941         /* TODO: add sk_set_memalloc(inet) or similar */
4942
4943         if (cfile)
4944                 cfile->swapfile = true;
4945         /*
4946          * TODO: Since file already open, we can't open with DENY_ALL here
4947          * but we could add call to grab a byte range lock to prevent others
4948          * from reading or writing the file
4949          */
4950
4951         return 0;
4952 }
4953
4954 static void cifs_swap_deactivate(struct file *file)
4955 {
4956         struct cifsFileInfo *cfile = file->private_data;
4957
4958         cifs_dbg(FYI, "swap deactivate\n");
4959
4960         /* TODO: undo sk_set_memalloc(inet) will eventually be needed */
4961
4962         if (cfile)
4963                 cfile->swapfile = false;
4964
4965         /* do we need to unpin (or unlock) the file */
4966 }
4967
4968 const struct address_space_operations cifs_addr_ops = {
4969         .readpage = cifs_readpage,
4970         .readpages = cifs_readpages,
4971         .writepage = cifs_writepage,
4972         .writepages = cifs_writepages,
4973         .write_begin = cifs_write_begin,
4974         .write_end = cifs_write_end,
4975         .set_page_dirty = __set_page_dirty_nobuffers,
4976         .releasepage = cifs_release_page,
4977         .direct_IO = cifs_direct_io,
4978         .invalidatepage = cifs_invalidate_page,
4979         .launder_page = cifs_launder_page,
4980         /*
4981          * TODO: investigate and if useful we could add an cifs_migratePage
4982          * helper (under an CONFIG_MIGRATION) in the future, and also
4983          * investigate and add an is_dirty_writeback helper if needed
4984          */
4985         .swap_activate = cifs_swap_activate,
4986         .swap_deactivate = cifs_swap_deactivate,
4987 };
4988
4989 /*
4990  * cifs_readpages requires the server to support a buffer large enough to
4991  * contain the header plus one complete page of data.  Otherwise, we need
4992  * to leave cifs_readpages out of the address space operations.
4993  */
4994 const struct address_space_operations cifs_addr_ops_smallbuf = {
4995         .readpage = cifs_readpage,
4996         .writepage = cifs_writepage,
4997         .writepages = cifs_writepages,
4998         .write_begin = cifs_write_begin,
4999         .write_end = cifs_write_end,
5000         .set_page_dirty = __set_page_dirty_nobuffers,
5001         .releasepage = cifs_release_page,
5002         .invalidatepage = cifs_invalidate_page,
5003         .launder_page = cifs_launder_page,
5004 };