Merge branch 'akpm' (patches from Andrew)
[platform/kernel/linux-rpi.git] / fs / cifs / file.c
1 /*
2  *   fs/cifs/file.c
3  *
4  *   vfs operations that deal with files
5  *
6  *   Copyright (C) International Business Machines  Corp., 2002,2010
7  *   Author(s): Steve French (sfrench@us.ibm.com)
8  *              Jeremy Allison (jra@samba.org)
9  *
10  *   This library is free software; you can redistribute it and/or modify
11  *   it under the terms of the GNU Lesser General Public License as published
12  *   by the Free Software Foundation; either version 2.1 of the License, or
13  *   (at your option) any later version.
14  *
15  *   This library is distributed in the hope that it will be useful,
16  *   but WITHOUT ANY WARRANTY; without even the implied warranty of
17  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
18  *   the GNU Lesser General Public License for more details.
19  *
20  *   You should have received a copy of the GNU Lesser General Public License
21  *   along with this library; if not, write to the Free Software
22  *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
23  */
24 #include <linux/fs.h>
25 #include <linux/backing-dev.h>
26 #include <linux/stat.h>
27 #include <linux/fcntl.h>
28 #include <linux/pagemap.h>
29 #include <linux/pagevec.h>
30 #include <linux/writeback.h>
31 #include <linux/task_io_accounting_ops.h>
32 #include <linux/delay.h>
33 #include <linux/mount.h>
34 #include <linux/slab.h>
35 #include <linux/swap.h>
36 #include <linux/mm.h>
37 #include <asm/div64.h>
38 #include "cifsfs.h"
39 #include "cifspdu.h"
40 #include "cifsglob.h"
41 #include "cifsproto.h"
42 #include "cifs_unicode.h"
43 #include "cifs_debug.h"
44 #include "cifs_fs_sb.h"
45 #include "fscache.h"
46 #include "smbdirect.h"
47 #include "fs_context.h"
48 #include "cifs_ioctl.h"
49
50 static inline int cifs_convert_flags(unsigned int flags)
51 {
52         if ((flags & O_ACCMODE) == O_RDONLY)
53                 return GENERIC_READ;
54         else if ((flags & O_ACCMODE) == O_WRONLY)
55                 return GENERIC_WRITE;
56         else if ((flags & O_ACCMODE) == O_RDWR) {
57                 /* GENERIC_ALL is too much permission to request
58                    can cause unnecessary access denied on create */
59                 /* return GENERIC_ALL; */
60                 return (GENERIC_READ | GENERIC_WRITE);
61         }
62
63         return (READ_CONTROL | FILE_WRITE_ATTRIBUTES | FILE_READ_ATTRIBUTES |
64                 FILE_WRITE_EA | FILE_APPEND_DATA | FILE_WRITE_DATA |
65                 FILE_READ_DATA);
66 }
67
68 static u32 cifs_posix_convert_flags(unsigned int flags)
69 {
70         u32 posix_flags = 0;
71
72         if ((flags & O_ACCMODE) == O_RDONLY)
73                 posix_flags = SMB_O_RDONLY;
74         else if ((flags & O_ACCMODE) == O_WRONLY)
75                 posix_flags = SMB_O_WRONLY;
76         else if ((flags & O_ACCMODE) == O_RDWR)
77                 posix_flags = SMB_O_RDWR;
78
79         if (flags & O_CREAT) {
80                 posix_flags |= SMB_O_CREAT;
81                 if (flags & O_EXCL)
82                         posix_flags |= SMB_O_EXCL;
83         } else if (flags & O_EXCL)
84                 cifs_dbg(FYI, "Application %s pid %d has incorrectly set O_EXCL flag but not O_CREAT on file open. Ignoring O_EXCL\n",
85                          current->comm, current->tgid);
86
87         if (flags & O_TRUNC)
88                 posix_flags |= SMB_O_TRUNC;
89         /* be safe and imply O_SYNC for O_DSYNC */
90         if (flags & O_DSYNC)
91                 posix_flags |= SMB_O_SYNC;
92         if (flags & O_DIRECTORY)
93                 posix_flags |= SMB_O_DIRECTORY;
94         if (flags & O_NOFOLLOW)
95                 posix_flags |= SMB_O_NOFOLLOW;
96         if (flags & O_DIRECT)
97                 posix_flags |= SMB_O_DIRECT;
98
99         return posix_flags;
100 }
101
102 static inline int cifs_get_disposition(unsigned int flags)
103 {
104         if ((flags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL))
105                 return FILE_CREATE;
106         else if ((flags & (O_CREAT | O_TRUNC)) == (O_CREAT | O_TRUNC))
107                 return FILE_OVERWRITE_IF;
108         else if ((flags & O_CREAT) == O_CREAT)
109                 return FILE_OPEN_IF;
110         else if ((flags & O_TRUNC) == O_TRUNC)
111                 return FILE_OVERWRITE;
112         else
113                 return FILE_OPEN;
114 }
115
116 int cifs_posix_open(const char *full_path, struct inode **pinode,
117                         struct super_block *sb, int mode, unsigned int f_flags,
118                         __u32 *poplock, __u16 *pnetfid, unsigned int xid)
119 {
120         int rc;
121         FILE_UNIX_BASIC_INFO *presp_data;
122         __u32 posix_flags = 0;
123         struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
124         struct cifs_fattr fattr;
125         struct tcon_link *tlink;
126         struct cifs_tcon *tcon;
127
128         cifs_dbg(FYI, "posix open %s\n", full_path);
129
130         presp_data = kzalloc(sizeof(FILE_UNIX_BASIC_INFO), GFP_KERNEL);
131         if (presp_data == NULL)
132                 return -ENOMEM;
133
134         tlink = cifs_sb_tlink(cifs_sb);
135         if (IS_ERR(tlink)) {
136                 rc = PTR_ERR(tlink);
137                 goto posix_open_ret;
138         }
139
140         tcon = tlink_tcon(tlink);
141         mode &= ~current_umask();
142
143         posix_flags = cifs_posix_convert_flags(f_flags);
144         rc = CIFSPOSIXCreate(xid, tcon, posix_flags, mode, pnetfid, presp_data,
145                              poplock, full_path, cifs_sb->local_nls,
146                              cifs_remap(cifs_sb));
147         cifs_put_tlink(tlink);
148
149         if (rc)
150                 goto posix_open_ret;
151
152         if (presp_data->Type == cpu_to_le32(-1))
153                 goto posix_open_ret; /* open ok, caller does qpathinfo */
154
155         if (!pinode)
156                 goto posix_open_ret; /* caller does not need info */
157
158         cifs_unix_basic_to_fattr(&fattr, presp_data, cifs_sb);
159
160         /* get new inode and set it up */
161         if (*pinode == NULL) {
162                 cifs_fill_uniqueid(sb, &fattr);
163                 *pinode = cifs_iget(sb, &fattr);
164                 if (!*pinode) {
165                         rc = -ENOMEM;
166                         goto posix_open_ret;
167                 }
168         } else {
169                 cifs_revalidate_mapping(*pinode);
170                 rc = cifs_fattr_to_inode(*pinode, &fattr);
171         }
172
173 posix_open_ret:
174         kfree(presp_data);
175         return rc;
176 }
177
178 static int
179 cifs_nt_open(const char *full_path, struct inode *inode, struct cifs_sb_info *cifs_sb,
180              struct cifs_tcon *tcon, unsigned int f_flags, __u32 *oplock,
181              struct cifs_fid *fid, unsigned int xid)
182 {
183         int rc;
184         int desired_access;
185         int disposition;
186         int create_options = CREATE_NOT_DIR;
187         FILE_ALL_INFO *buf;
188         struct TCP_Server_Info *server = tcon->ses->server;
189         struct cifs_open_parms oparms;
190
191         if (!server->ops->open)
192                 return -ENOSYS;
193
194         desired_access = cifs_convert_flags(f_flags);
195
196 /*********************************************************************
197  *  open flag mapping table:
198  *
199  *      POSIX Flag            CIFS Disposition
200  *      ----------            ----------------
201  *      O_CREAT               FILE_OPEN_IF
202  *      O_CREAT | O_EXCL      FILE_CREATE
203  *      O_CREAT | O_TRUNC     FILE_OVERWRITE_IF
204  *      O_TRUNC               FILE_OVERWRITE
205  *      none of the above     FILE_OPEN
206  *
207  *      Note that there is not a direct match between disposition
208  *      FILE_SUPERSEDE (ie create whether or not file exists although
209  *      O_CREAT | O_TRUNC is similar but truncates the existing
210  *      file rather than creating a new file as FILE_SUPERSEDE does
211  *      (which uses the attributes / metadata passed in on open call)
212  *?
213  *?  O_SYNC is a reasonable match to CIFS writethrough flag
214  *?  and the read write flags match reasonably.  O_LARGEFILE
215  *?  is irrelevant because largefile support is always used
216  *?  by this client. Flags O_APPEND, O_DIRECT, O_DIRECTORY,
217  *       O_FASYNC, O_NOFOLLOW, O_NONBLOCK need further investigation
218  *********************************************************************/
219
220         disposition = cifs_get_disposition(f_flags);
221
222         /* BB pass O_SYNC flag through on file attributes .. BB */
223
224         buf = kmalloc(sizeof(FILE_ALL_INFO), GFP_KERNEL);
225         if (!buf)
226                 return -ENOMEM;
227
228         /* O_SYNC also has bit for O_DSYNC so following check picks up either */
229         if (f_flags & O_SYNC)
230                 create_options |= CREATE_WRITE_THROUGH;
231
232         if (f_flags & O_DIRECT)
233                 create_options |= CREATE_NO_BUFFER;
234
235         oparms.tcon = tcon;
236         oparms.cifs_sb = cifs_sb;
237         oparms.desired_access = desired_access;
238         oparms.create_options = cifs_create_options(cifs_sb, create_options);
239         oparms.disposition = disposition;
240         oparms.path = full_path;
241         oparms.fid = fid;
242         oparms.reconnect = false;
243
244         rc = server->ops->open(xid, &oparms, oplock, buf);
245
246         if (rc)
247                 goto out;
248
249         /* TODO: Add support for calling posix query info but with passing in fid */
250         if (tcon->unix_ext)
251                 rc = cifs_get_inode_info_unix(&inode, full_path, inode->i_sb,
252                                               xid);
253         else
254                 rc = cifs_get_inode_info(&inode, full_path, buf, inode->i_sb,
255                                          xid, fid);
256
257         if (rc) {
258                 server->ops->close(xid, tcon, fid);
259                 if (rc == -ESTALE)
260                         rc = -EOPENSTALE;
261         }
262
263 out:
264         kfree(buf);
265         return rc;
266 }
267
268 static bool
269 cifs_has_mand_locks(struct cifsInodeInfo *cinode)
270 {
271         struct cifs_fid_locks *cur;
272         bool has_locks = false;
273
274         down_read(&cinode->lock_sem);
275         list_for_each_entry(cur, &cinode->llist, llist) {
276                 if (!list_empty(&cur->locks)) {
277                         has_locks = true;
278                         break;
279                 }
280         }
281         up_read(&cinode->lock_sem);
282         return has_locks;
283 }
284
285 void
286 cifs_down_write(struct rw_semaphore *sem)
287 {
288         while (!down_write_trylock(sem))
289                 msleep(10);
290 }
291
292 static void cifsFileInfo_put_work(struct work_struct *work);
293
294 struct cifsFileInfo *
295 cifs_new_fileinfo(struct cifs_fid *fid, struct file *file,
296                   struct tcon_link *tlink, __u32 oplock)
297 {
298         struct dentry *dentry = file_dentry(file);
299         struct inode *inode = d_inode(dentry);
300         struct cifsInodeInfo *cinode = CIFS_I(inode);
301         struct cifsFileInfo *cfile;
302         struct cifs_fid_locks *fdlocks;
303         struct cifs_tcon *tcon = tlink_tcon(tlink);
304         struct TCP_Server_Info *server = tcon->ses->server;
305
306         cfile = kzalloc(sizeof(struct cifsFileInfo), GFP_KERNEL);
307         if (cfile == NULL)
308                 return cfile;
309
310         fdlocks = kzalloc(sizeof(struct cifs_fid_locks), GFP_KERNEL);
311         if (!fdlocks) {
312                 kfree(cfile);
313                 return NULL;
314         }
315
316         INIT_LIST_HEAD(&fdlocks->locks);
317         fdlocks->cfile = cfile;
318         cfile->llist = fdlocks;
319
320         cfile->count = 1;
321         cfile->pid = current->tgid;
322         cfile->uid = current_fsuid();
323         cfile->dentry = dget(dentry);
324         cfile->f_flags = file->f_flags;
325         cfile->invalidHandle = false;
326         cfile->oplock_break_received = false;
327         cfile->deferred_scheduled = false;
328         cfile->tlink = cifs_get_tlink(tlink);
329         INIT_WORK(&cfile->oplock_break, cifs_oplock_break);
330         INIT_WORK(&cfile->put, cifsFileInfo_put_work);
331         INIT_DELAYED_WORK(&cfile->deferred, smb2_deferred_work_close);
332         mutex_init(&cfile->fh_mutex);
333         spin_lock_init(&cfile->file_info_lock);
334
335         cifs_sb_active(inode->i_sb);
336
337         /*
338          * If the server returned a read oplock and we have mandatory brlocks,
339          * set oplock level to None.
340          */
341         if (server->ops->is_read_op(oplock) && cifs_has_mand_locks(cinode)) {
342                 cifs_dbg(FYI, "Reset oplock val from read to None due to mand locks\n");
343                 oplock = 0;
344         }
345
346         cifs_down_write(&cinode->lock_sem);
347         list_add(&fdlocks->llist, &cinode->llist);
348         up_write(&cinode->lock_sem);
349
350         spin_lock(&tcon->open_file_lock);
351         if (fid->pending_open->oplock != CIFS_OPLOCK_NO_CHANGE && oplock)
352                 oplock = fid->pending_open->oplock;
353         list_del(&fid->pending_open->olist);
354
355         fid->purge_cache = false;
356         server->ops->set_fid(cfile, fid, oplock);
357
358         list_add(&cfile->tlist, &tcon->openFileList);
359         atomic_inc(&tcon->num_local_opens);
360
361         /* if readable file instance put first in list*/
362         spin_lock(&cinode->open_file_lock);
363         if (file->f_mode & FMODE_READ)
364                 list_add(&cfile->flist, &cinode->openFileList);
365         else
366                 list_add_tail(&cfile->flist, &cinode->openFileList);
367         spin_unlock(&cinode->open_file_lock);
368         spin_unlock(&tcon->open_file_lock);
369
370         if (fid->purge_cache)
371                 cifs_zap_mapping(inode);
372
373         file->private_data = cfile;
374         return cfile;
375 }
376
377 struct cifsFileInfo *
378 cifsFileInfo_get(struct cifsFileInfo *cifs_file)
379 {
380         spin_lock(&cifs_file->file_info_lock);
381         cifsFileInfo_get_locked(cifs_file);
382         spin_unlock(&cifs_file->file_info_lock);
383         return cifs_file;
384 }
385
386 static void cifsFileInfo_put_final(struct cifsFileInfo *cifs_file)
387 {
388         struct inode *inode = d_inode(cifs_file->dentry);
389         struct cifsInodeInfo *cifsi = CIFS_I(inode);
390         struct cifsLockInfo *li, *tmp;
391         struct super_block *sb = inode->i_sb;
392
393         /*
394          * Delete any outstanding lock records. We'll lose them when the file
395          * is closed anyway.
396          */
397         cifs_down_write(&cifsi->lock_sem);
398         list_for_each_entry_safe(li, tmp, &cifs_file->llist->locks, llist) {
399                 list_del(&li->llist);
400                 cifs_del_lock_waiters(li);
401                 kfree(li);
402         }
403         list_del(&cifs_file->llist->llist);
404         kfree(cifs_file->llist);
405         up_write(&cifsi->lock_sem);
406
407         cifs_put_tlink(cifs_file->tlink);
408         dput(cifs_file->dentry);
409         cifs_sb_deactive(sb);
410         kfree(cifs_file);
411 }
412
413 static void cifsFileInfo_put_work(struct work_struct *work)
414 {
415         struct cifsFileInfo *cifs_file = container_of(work,
416                         struct cifsFileInfo, put);
417
418         cifsFileInfo_put_final(cifs_file);
419 }
420
421 /**
422  * cifsFileInfo_put - release a reference of file priv data
423  *
424  * Always potentially wait for oplock handler. See _cifsFileInfo_put().
425  *
426  * @cifs_file:  cifs/smb3 specific info (eg refcounts) for an open file
427  */
428 void cifsFileInfo_put(struct cifsFileInfo *cifs_file)
429 {
430         _cifsFileInfo_put(cifs_file, true, true);
431 }
432
433 /**
434  * _cifsFileInfo_put - release a reference of file priv data
435  *
436  * This may involve closing the filehandle @cifs_file out on the
437  * server. Must be called without holding tcon->open_file_lock,
438  * cinode->open_file_lock and cifs_file->file_info_lock.
439  *
440  * If @wait_for_oplock_handler is true and we are releasing the last
441  * reference, wait for any running oplock break handler of the file
442  * and cancel any pending one.
443  *
444  * @cifs_file:  cifs/smb3 specific info (eg refcounts) for an open file
445  * @wait_oplock_handler: must be false if called from oplock_break_handler
446  * @offload:    not offloaded on close and oplock breaks
447  *
448  */
449 void _cifsFileInfo_put(struct cifsFileInfo *cifs_file,
450                        bool wait_oplock_handler, bool offload)
451 {
452         struct inode *inode = d_inode(cifs_file->dentry);
453         struct cifs_tcon *tcon = tlink_tcon(cifs_file->tlink);
454         struct TCP_Server_Info *server = tcon->ses->server;
455         struct cifsInodeInfo *cifsi = CIFS_I(inode);
456         struct super_block *sb = inode->i_sb;
457         struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
458         struct cifs_fid fid;
459         struct cifs_pending_open open;
460         bool oplock_break_cancelled;
461
462         spin_lock(&tcon->open_file_lock);
463         spin_lock(&cifsi->open_file_lock);
464         spin_lock(&cifs_file->file_info_lock);
465         if (--cifs_file->count > 0) {
466                 spin_unlock(&cifs_file->file_info_lock);
467                 spin_unlock(&cifsi->open_file_lock);
468                 spin_unlock(&tcon->open_file_lock);
469                 return;
470         }
471         spin_unlock(&cifs_file->file_info_lock);
472
473         if (server->ops->get_lease_key)
474                 server->ops->get_lease_key(inode, &fid);
475
476         /* store open in pending opens to make sure we don't miss lease break */
477         cifs_add_pending_open_locked(&fid, cifs_file->tlink, &open);
478
479         /* remove it from the lists */
480         list_del(&cifs_file->flist);
481         list_del(&cifs_file->tlist);
482         atomic_dec(&tcon->num_local_opens);
483
484         if (list_empty(&cifsi->openFileList)) {
485                 cifs_dbg(FYI, "closing last open instance for inode %p\n",
486                          d_inode(cifs_file->dentry));
487                 /*
488                  * In strict cache mode we need invalidate mapping on the last
489                  * close  because it may cause a error when we open this file
490                  * again and get at least level II oplock.
491                  */
492                 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO)
493                         set_bit(CIFS_INO_INVALID_MAPPING, &cifsi->flags);
494                 cifs_set_oplock_level(cifsi, 0);
495         }
496
497         spin_unlock(&cifsi->open_file_lock);
498         spin_unlock(&tcon->open_file_lock);
499
500         oplock_break_cancelled = wait_oplock_handler ?
501                 cancel_work_sync(&cifs_file->oplock_break) : false;
502
503         if (!tcon->need_reconnect && !cifs_file->invalidHandle) {
504                 struct TCP_Server_Info *server = tcon->ses->server;
505                 unsigned int xid;
506
507                 xid = get_xid();
508                 if (server->ops->close_getattr)
509                         server->ops->close_getattr(xid, tcon, cifs_file);
510                 else if (server->ops->close)
511                         server->ops->close(xid, tcon, &cifs_file->fid);
512                 _free_xid(xid);
513         }
514
515         if (oplock_break_cancelled)
516                 cifs_done_oplock_break(cifsi);
517
518         cifs_del_pending_open(&open);
519
520         if (offload)
521                 queue_work(fileinfo_put_wq, &cifs_file->put);
522         else
523                 cifsFileInfo_put_final(cifs_file);
524 }
525
526 int cifs_open(struct inode *inode, struct file *file)
527
528 {
529         int rc = -EACCES;
530         unsigned int xid;
531         __u32 oplock;
532         struct cifs_sb_info *cifs_sb;
533         struct TCP_Server_Info *server;
534         struct cifs_tcon *tcon;
535         struct tcon_link *tlink;
536         struct cifsFileInfo *cfile = NULL;
537         void *page;
538         const char *full_path;
539         bool posix_open_ok = false;
540         struct cifs_fid fid;
541         struct cifs_pending_open open;
542
543         xid = get_xid();
544
545         cifs_sb = CIFS_SB(inode->i_sb);
546         if (unlikely(cifs_forced_shutdown(cifs_sb))) {
547                 free_xid(xid);
548                 return -EIO;
549         }
550
551         tlink = cifs_sb_tlink(cifs_sb);
552         if (IS_ERR(tlink)) {
553                 free_xid(xid);
554                 return PTR_ERR(tlink);
555         }
556         tcon = tlink_tcon(tlink);
557         server = tcon->ses->server;
558
559         page = alloc_dentry_path();
560         full_path = build_path_from_dentry(file_dentry(file), page);
561         if (IS_ERR(full_path)) {
562                 rc = PTR_ERR(full_path);
563                 goto out;
564         }
565
566         cifs_dbg(FYI, "inode = 0x%p file flags are 0x%x for %s\n",
567                  inode, file->f_flags, full_path);
568
569         if (file->f_flags & O_DIRECT &&
570             cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO) {
571                 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_BRL)
572                         file->f_op = &cifs_file_direct_nobrl_ops;
573                 else
574                         file->f_op = &cifs_file_direct_ops;
575         }
576
577         spin_lock(&CIFS_I(inode)->deferred_lock);
578         /* Get the cached handle as SMB2 close is deferred */
579         rc = cifs_get_readable_path(tcon, full_path, &cfile);
580         if (rc == 0) {
581                 if (file->f_flags == cfile->f_flags) {
582                         file->private_data = cfile;
583                         cifs_del_deferred_close(cfile);
584                         spin_unlock(&CIFS_I(inode)->deferred_lock);
585                         goto out;
586                 } else {
587                         spin_unlock(&CIFS_I(inode)->deferred_lock);
588                         _cifsFileInfo_put(cfile, true, false);
589                 }
590         } else {
591                 spin_unlock(&CIFS_I(inode)->deferred_lock);
592         }
593
594         if (server->oplocks)
595                 oplock = REQ_OPLOCK;
596         else
597                 oplock = 0;
598
599         if (!tcon->broken_posix_open && tcon->unix_ext &&
600             cap_unix(tcon->ses) && (CIFS_UNIX_POSIX_PATH_OPS_CAP &
601                                 le64_to_cpu(tcon->fsUnixInfo.Capability))) {
602                 /* can not refresh inode info since size could be stale */
603                 rc = cifs_posix_open(full_path, &inode, inode->i_sb,
604                                 cifs_sb->ctx->file_mode /* ignored */,
605                                 file->f_flags, &oplock, &fid.netfid, xid);
606                 if (rc == 0) {
607                         cifs_dbg(FYI, "posix open succeeded\n");
608                         posix_open_ok = true;
609                 } else if ((rc == -EINVAL) || (rc == -EOPNOTSUPP)) {
610                         if (tcon->ses->serverNOS)
611                                 cifs_dbg(VFS, "server %s of type %s returned unexpected error on SMB posix open, disabling posix open support. Check if server update available.\n",
612                                          tcon->ses->ip_addr,
613                                          tcon->ses->serverNOS);
614                         tcon->broken_posix_open = true;
615                 } else if ((rc != -EIO) && (rc != -EREMOTE) &&
616                          (rc != -EOPNOTSUPP)) /* path not found or net err */
617                         goto out;
618                 /*
619                  * Else fallthrough to retry open the old way on network i/o
620                  * or DFS errors.
621                  */
622         }
623
624         if (server->ops->get_lease_key)
625                 server->ops->get_lease_key(inode, &fid);
626
627         cifs_add_pending_open(&fid, tlink, &open);
628
629         if (!posix_open_ok) {
630                 if (server->ops->get_lease_key)
631                         server->ops->get_lease_key(inode, &fid);
632
633                 rc = cifs_nt_open(full_path, inode, cifs_sb, tcon,
634                                   file->f_flags, &oplock, &fid, xid);
635                 if (rc) {
636                         cifs_del_pending_open(&open);
637                         goto out;
638                 }
639         }
640
641         cfile = cifs_new_fileinfo(&fid, file, tlink, oplock);
642         if (cfile == NULL) {
643                 if (server->ops->close)
644                         server->ops->close(xid, tcon, &fid);
645                 cifs_del_pending_open(&open);
646                 rc = -ENOMEM;
647                 goto out;
648         }
649
650         cifs_fscache_set_inode_cookie(inode, file);
651
652         if ((oplock & CIFS_CREATE_ACTION) && !posix_open_ok && tcon->unix_ext) {
653                 /*
654                  * Time to set mode which we can not set earlier due to
655                  * problems creating new read-only files.
656                  */
657                 struct cifs_unix_set_info_args args = {
658                         .mode   = inode->i_mode,
659                         .uid    = INVALID_UID, /* no change */
660                         .gid    = INVALID_GID, /* no change */
661                         .ctime  = NO_CHANGE_64,
662                         .atime  = NO_CHANGE_64,
663                         .mtime  = NO_CHANGE_64,
664                         .device = 0,
665                 };
666                 CIFSSMBUnixSetFileInfo(xid, tcon, &args, fid.netfid,
667                                        cfile->pid);
668         }
669
670 out:
671         free_dentry_path(page);
672         free_xid(xid);
673         cifs_put_tlink(tlink);
674         return rc;
675 }
676
677 static int cifs_push_posix_locks(struct cifsFileInfo *cfile);
678
679 /*
680  * Try to reacquire byte range locks that were released when session
681  * to server was lost.
682  */
683 static int
684 cifs_relock_file(struct cifsFileInfo *cfile)
685 {
686         struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
687         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
688         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
689         int rc = 0;
690
691         down_read_nested(&cinode->lock_sem, SINGLE_DEPTH_NESTING);
692         if (cinode->can_cache_brlcks) {
693                 /* can cache locks - no need to relock */
694                 up_read(&cinode->lock_sem);
695                 return rc;
696         }
697
698         if (cap_unix(tcon->ses) &&
699             (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
700             ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
701                 rc = cifs_push_posix_locks(cfile);
702         else
703                 rc = tcon->ses->server->ops->push_mand_locks(cfile);
704
705         up_read(&cinode->lock_sem);
706         return rc;
707 }
708
709 static int
710 cifs_reopen_file(struct cifsFileInfo *cfile, bool can_flush)
711 {
712         int rc = -EACCES;
713         unsigned int xid;
714         __u32 oplock;
715         struct cifs_sb_info *cifs_sb;
716         struct cifs_tcon *tcon;
717         struct TCP_Server_Info *server;
718         struct cifsInodeInfo *cinode;
719         struct inode *inode;
720         void *page;
721         const char *full_path;
722         int desired_access;
723         int disposition = FILE_OPEN;
724         int create_options = CREATE_NOT_DIR;
725         struct cifs_open_parms oparms;
726
727         xid = get_xid();
728         mutex_lock(&cfile->fh_mutex);
729         if (!cfile->invalidHandle) {
730                 mutex_unlock(&cfile->fh_mutex);
731                 free_xid(xid);
732                 return 0;
733         }
734
735         inode = d_inode(cfile->dentry);
736         cifs_sb = CIFS_SB(inode->i_sb);
737         tcon = tlink_tcon(cfile->tlink);
738         server = tcon->ses->server;
739
740         /*
741          * Can not grab rename sem here because various ops, including those
742          * that already have the rename sem can end up causing writepage to get
743          * called and if the server was down that means we end up here, and we
744          * can never tell if the caller already has the rename_sem.
745          */
746         page = alloc_dentry_path();
747         full_path = build_path_from_dentry(cfile->dentry, page);
748         if (IS_ERR(full_path)) {
749                 mutex_unlock(&cfile->fh_mutex);
750                 free_dentry_path(page);
751                 free_xid(xid);
752                 return PTR_ERR(full_path);
753         }
754
755         cifs_dbg(FYI, "inode = 0x%p file flags 0x%x for %s\n",
756                  inode, cfile->f_flags, full_path);
757
758         if (tcon->ses->server->oplocks)
759                 oplock = REQ_OPLOCK;
760         else
761                 oplock = 0;
762
763         if (tcon->unix_ext && cap_unix(tcon->ses) &&
764             (CIFS_UNIX_POSIX_PATH_OPS_CAP &
765                                 le64_to_cpu(tcon->fsUnixInfo.Capability))) {
766                 /*
767                  * O_CREAT, O_EXCL and O_TRUNC already had their effect on the
768                  * original open. Must mask them off for a reopen.
769                  */
770                 unsigned int oflags = cfile->f_flags &
771                                                 ~(O_CREAT | O_EXCL | O_TRUNC);
772
773                 rc = cifs_posix_open(full_path, NULL, inode->i_sb,
774                                      cifs_sb->ctx->file_mode /* ignored */,
775                                      oflags, &oplock, &cfile->fid.netfid, xid);
776                 if (rc == 0) {
777                         cifs_dbg(FYI, "posix reopen succeeded\n");
778                         oparms.reconnect = true;
779                         goto reopen_success;
780                 }
781                 /*
782                  * fallthrough to retry open the old way on errors, especially
783                  * in the reconnect path it is important to retry hard
784                  */
785         }
786
787         desired_access = cifs_convert_flags(cfile->f_flags);
788
789         /* O_SYNC also has bit for O_DSYNC so following check picks up either */
790         if (cfile->f_flags & O_SYNC)
791                 create_options |= CREATE_WRITE_THROUGH;
792
793         if (cfile->f_flags & O_DIRECT)
794                 create_options |= CREATE_NO_BUFFER;
795
796         if (server->ops->get_lease_key)
797                 server->ops->get_lease_key(inode, &cfile->fid);
798
799         oparms.tcon = tcon;
800         oparms.cifs_sb = cifs_sb;
801         oparms.desired_access = desired_access;
802         oparms.create_options = cifs_create_options(cifs_sb, create_options);
803         oparms.disposition = disposition;
804         oparms.path = full_path;
805         oparms.fid = &cfile->fid;
806         oparms.reconnect = true;
807
808         /*
809          * Can not refresh inode by passing in file_info buf to be returned by
810          * ops->open and then calling get_inode_info with returned buf since
811          * file might have write behind data that needs to be flushed and server
812          * version of file size can be stale. If we knew for sure that inode was
813          * not dirty locally we could do this.
814          */
815         rc = server->ops->open(xid, &oparms, &oplock, NULL);
816         if (rc == -ENOENT && oparms.reconnect == false) {
817                 /* durable handle timeout is expired - open the file again */
818                 rc = server->ops->open(xid, &oparms, &oplock, NULL);
819                 /* indicate that we need to relock the file */
820                 oparms.reconnect = true;
821         }
822
823         if (rc) {
824                 mutex_unlock(&cfile->fh_mutex);
825                 cifs_dbg(FYI, "cifs_reopen returned 0x%x\n", rc);
826                 cifs_dbg(FYI, "oplock: %d\n", oplock);
827                 goto reopen_error_exit;
828         }
829
830 reopen_success:
831         cfile->invalidHandle = false;
832         mutex_unlock(&cfile->fh_mutex);
833         cinode = CIFS_I(inode);
834
835         if (can_flush) {
836                 rc = filemap_write_and_wait(inode->i_mapping);
837                 if (!is_interrupt_error(rc))
838                         mapping_set_error(inode->i_mapping, rc);
839
840                 if (tcon->posix_extensions)
841                         rc = smb311_posix_get_inode_info(&inode, full_path, inode->i_sb, xid);
842                 else if (tcon->unix_ext)
843                         rc = cifs_get_inode_info_unix(&inode, full_path,
844                                                       inode->i_sb, xid);
845                 else
846                         rc = cifs_get_inode_info(&inode, full_path, NULL,
847                                                  inode->i_sb, xid, NULL);
848         }
849         /*
850          * Else we are writing out data to server already and could deadlock if
851          * we tried to flush data, and since we do not know if we have data that
852          * would invalidate the current end of file on the server we can not go
853          * to the server to get the new inode info.
854          */
855
856         /*
857          * If the server returned a read oplock and we have mandatory brlocks,
858          * set oplock level to None.
859          */
860         if (server->ops->is_read_op(oplock) && cifs_has_mand_locks(cinode)) {
861                 cifs_dbg(FYI, "Reset oplock val from read to None due to mand locks\n");
862                 oplock = 0;
863         }
864
865         server->ops->set_fid(cfile, &cfile->fid, oplock);
866         if (oparms.reconnect)
867                 cifs_relock_file(cfile);
868
869 reopen_error_exit:
870         free_dentry_path(page);
871         free_xid(xid);
872         return rc;
873 }
874
875 void smb2_deferred_work_close(struct work_struct *work)
876 {
877         struct cifsFileInfo *cfile = container_of(work,
878                         struct cifsFileInfo, deferred.work);
879
880         spin_lock(&CIFS_I(d_inode(cfile->dentry))->deferred_lock);
881         if (!cfile->deferred_scheduled) {
882                 spin_unlock(&CIFS_I(d_inode(cfile->dentry))->deferred_lock);
883                 return;
884         }
885         cifs_del_deferred_close(cfile);
886         cfile->deferred_scheduled = false;
887         spin_unlock(&CIFS_I(d_inode(cfile->dentry))->deferred_lock);
888         _cifsFileInfo_put(cfile, true, false);
889 }
890
891 int cifs_close(struct inode *inode, struct file *file)
892 {
893         struct cifsFileInfo *cfile;
894         struct cifsInodeInfo *cinode = CIFS_I(inode);
895         struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
896         struct cifs_deferred_close *dclose;
897
898         if (file->private_data != NULL) {
899                 cfile = file->private_data;
900                 file->private_data = NULL;
901                 dclose = kmalloc(sizeof(struct cifs_deferred_close), GFP_KERNEL);
902                 if ((cinode->oplock == CIFS_CACHE_RHW_FLG) &&
903                     dclose) {
904                         if (test_bit(CIFS_INO_MODIFIED_ATTR, &cinode->flags))
905                                 inode->i_ctime = inode->i_mtime = current_time(inode);
906                         spin_lock(&cinode->deferred_lock);
907                         cifs_add_deferred_close(cfile, dclose);
908                         if (cfile->deferred_scheduled) {
909                                 mod_delayed_work(deferredclose_wq,
910                                                 &cfile->deferred, cifs_sb->ctx->acregmax);
911                         } else {
912                                 /* Deferred close for files */
913                                 queue_delayed_work(deferredclose_wq,
914                                                 &cfile->deferred, cifs_sb->ctx->acregmax);
915                                 cfile->deferred_scheduled = true;
916                                 spin_unlock(&cinode->deferred_lock);
917                                 return 0;
918                         }
919                         spin_unlock(&cinode->deferred_lock);
920                         _cifsFileInfo_put(cfile, true, false);
921                 } else {
922                         _cifsFileInfo_put(cfile, true, false);
923                         kfree(dclose);
924                 }
925         }
926
927         /* return code from the ->release op is always ignored */
928         return 0;
929 }
930
931 void
932 cifs_reopen_persistent_handles(struct cifs_tcon *tcon)
933 {
934         struct cifsFileInfo *open_file;
935         struct list_head *tmp;
936         struct list_head *tmp1;
937         struct list_head tmp_list;
938
939         if (!tcon->use_persistent || !tcon->need_reopen_files)
940                 return;
941
942         tcon->need_reopen_files = false;
943
944         cifs_dbg(FYI, "Reopen persistent handles\n");
945         INIT_LIST_HEAD(&tmp_list);
946
947         /* list all files open on tree connection, reopen resilient handles  */
948         spin_lock(&tcon->open_file_lock);
949         list_for_each(tmp, &tcon->openFileList) {
950                 open_file = list_entry(tmp, struct cifsFileInfo, tlist);
951                 if (!open_file->invalidHandle)
952                         continue;
953                 cifsFileInfo_get(open_file);
954                 list_add_tail(&open_file->rlist, &tmp_list);
955         }
956         spin_unlock(&tcon->open_file_lock);
957
958         list_for_each_safe(tmp, tmp1, &tmp_list) {
959                 open_file = list_entry(tmp, struct cifsFileInfo, rlist);
960                 if (cifs_reopen_file(open_file, false /* do not flush */))
961                         tcon->need_reopen_files = true;
962                 list_del_init(&open_file->rlist);
963                 cifsFileInfo_put(open_file);
964         }
965 }
966
967 int cifs_closedir(struct inode *inode, struct file *file)
968 {
969         int rc = 0;
970         unsigned int xid;
971         struct cifsFileInfo *cfile = file->private_data;
972         struct cifs_tcon *tcon;
973         struct TCP_Server_Info *server;
974         char *buf;
975
976         cifs_dbg(FYI, "Closedir inode = 0x%p\n", inode);
977
978         if (cfile == NULL)
979                 return rc;
980
981         xid = get_xid();
982         tcon = tlink_tcon(cfile->tlink);
983         server = tcon->ses->server;
984
985         cifs_dbg(FYI, "Freeing private data in close dir\n");
986         spin_lock(&cfile->file_info_lock);
987         if (server->ops->dir_needs_close(cfile)) {
988                 cfile->invalidHandle = true;
989                 spin_unlock(&cfile->file_info_lock);
990                 if (server->ops->close_dir)
991                         rc = server->ops->close_dir(xid, tcon, &cfile->fid);
992                 else
993                         rc = -ENOSYS;
994                 cifs_dbg(FYI, "Closing uncompleted readdir with rc %d\n", rc);
995                 /* not much we can do if it fails anyway, ignore rc */
996                 rc = 0;
997         } else
998                 spin_unlock(&cfile->file_info_lock);
999
1000         buf = cfile->srch_inf.ntwrk_buf_start;
1001         if (buf) {
1002                 cifs_dbg(FYI, "closedir free smb buf in srch struct\n");
1003                 cfile->srch_inf.ntwrk_buf_start = NULL;
1004                 if (cfile->srch_inf.smallBuf)
1005                         cifs_small_buf_release(buf);
1006                 else
1007                         cifs_buf_release(buf);
1008         }
1009
1010         cifs_put_tlink(cfile->tlink);
1011         kfree(file->private_data);
1012         file->private_data = NULL;
1013         /* BB can we lock the filestruct while this is going on? */
1014         free_xid(xid);
1015         return rc;
1016 }
1017
1018 static struct cifsLockInfo *
1019 cifs_lock_init(__u64 offset, __u64 length, __u8 type, __u16 flags)
1020 {
1021         struct cifsLockInfo *lock =
1022                 kmalloc(sizeof(struct cifsLockInfo), GFP_KERNEL);
1023         if (!lock)
1024                 return lock;
1025         lock->offset = offset;
1026         lock->length = length;
1027         lock->type = type;
1028         lock->pid = current->tgid;
1029         lock->flags = flags;
1030         INIT_LIST_HEAD(&lock->blist);
1031         init_waitqueue_head(&lock->block_q);
1032         return lock;
1033 }
1034
1035 void
1036 cifs_del_lock_waiters(struct cifsLockInfo *lock)
1037 {
1038         struct cifsLockInfo *li, *tmp;
1039         list_for_each_entry_safe(li, tmp, &lock->blist, blist) {
1040                 list_del_init(&li->blist);
1041                 wake_up(&li->block_q);
1042         }
1043 }
1044
1045 #define CIFS_LOCK_OP    0
1046 #define CIFS_READ_OP    1
1047 #define CIFS_WRITE_OP   2
1048
1049 /* @rw_check : 0 - no op, 1 - read, 2 - write */
1050 static bool
1051 cifs_find_fid_lock_conflict(struct cifs_fid_locks *fdlocks, __u64 offset,
1052                             __u64 length, __u8 type, __u16 flags,
1053                             struct cifsFileInfo *cfile,
1054                             struct cifsLockInfo **conf_lock, int rw_check)
1055 {
1056         struct cifsLockInfo *li;
1057         struct cifsFileInfo *cur_cfile = fdlocks->cfile;
1058         struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
1059
1060         list_for_each_entry(li, &fdlocks->locks, llist) {
1061                 if (offset + length <= li->offset ||
1062                     offset >= li->offset + li->length)
1063                         continue;
1064                 if (rw_check != CIFS_LOCK_OP && current->tgid == li->pid &&
1065                     server->ops->compare_fids(cfile, cur_cfile)) {
1066                         /* shared lock prevents write op through the same fid */
1067                         if (!(li->type & server->vals->shared_lock_type) ||
1068                             rw_check != CIFS_WRITE_OP)
1069                                 continue;
1070                 }
1071                 if ((type & server->vals->shared_lock_type) &&
1072                     ((server->ops->compare_fids(cfile, cur_cfile) &&
1073                      current->tgid == li->pid) || type == li->type))
1074                         continue;
1075                 if (rw_check == CIFS_LOCK_OP &&
1076                     (flags & FL_OFDLCK) && (li->flags & FL_OFDLCK) &&
1077                     server->ops->compare_fids(cfile, cur_cfile))
1078                         continue;
1079                 if (conf_lock)
1080                         *conf_lock = li;
1081                 return true;
1082         }
1083         return false;
1084 }
1085
1086 bool
1087 cifs_find_lock_conflict(struct cifsFileInfo *cfile, __u64 offset, __u64 length,
1088                         __u8 type, __u16 flags,
1089                         struct cifsLockInfo **conf_lock, int rw_check)
1090 {
1091         bool rc = false;
1092         struct cifs_fid_locks *cur;
1093         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1094
1095         list_for_each_entry(cur, &cinode->llist, llist) {
1096                 rc = cifs_find_fid_lock_conflict(cur, offset, length, type,
1097                                                  flags, cfile, conf_lock,
1098                                                  rw_check);
1099                 if (rc)
1100                         break;
1101         }
1102
1103         return rc;
1104 }
1105
1106 /*
1107  * Check if there is another lock that prevents us to set the lock (mandatory
1108  * style). If such a lock exists, update the flock structure with its
1109  * properties. Otherwise, set the flock type to F_UNLCK if we can cache brlocks
1110  * or leave it the same if we can't. Returns 0 if we don't need to request to
1111  * the server or 1 otherwise.
1112  */
1113 static int
1114 cifs_lock_test(struct cifsFileInfo *cfile, __u64 offset, __u64 length,
1115                __u8 type, struct file_lock *flock)
1116 {
1117         int rc = 0;
1118         struct cifsLockInfo *conf_lock;
1119         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1120         struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
1121         bool exist;
1122
1123         down_read(&cinode->lock_sem);
1124
1125         exist = cifs_find_lock_conflict(cfile, offset, length, type,
1126                                         flock->fl_flags, &conf_lock,
1127                                         CIFS_LOCK_OP);
1128         if (exist) {
1129                 flock->fl_start = conf_lock->offset;
1130                 flock->fl_end = conf_lock->offset + conf_lock->length - 1;
1131                 flock->fl_pid = conf_lock->pid;
1132                 if (conf_lock->type & server->vals->shared_lock_type)
1133                         flock->fl_type = F_RDLCK;
1134                 else
1135                         flock->fl_type = F_WRLCK;
1136         } else if (!cinode->can_cache_brlcks)
1137                 rc = 1;
1138         else
1139                 flock->fl_type = F_UNLCK;
1140
1141         up_read(&cinode->lock_sem);
1142         return rc;
1143 }
1144
1145 static void
1146 cifs_lock_add(struct cifsFileInfo *cfile, struct cifsLockInfo *lock)
1147 {
1148         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1149         cifs_down_write(&cinode->lock_sem);
1150         list_add_tail(&lock->llist, &cfile->llist->locks);
1151         up_write(&cinode->lock_sem);
1152 }
1153
1154 /*
1155  * Set the byte-range lock (mandatory style). Returns:
1156  * 1) 0, if we set the lock and don't need to request to the server;
1157  * 2) 1, if no locks prevent us but we need to request to the server;
1158  * 3) -EACCES, if there is a lock that prevents us and wait is false.
1159  */
1160 static int
1161 cifs_lock_add_if(struct cifsFileInfo *cfile, struct cifsLockInfo *lock,
1162                  bool wait)
1163 {
1164         struct cifsLockInfo *conf_lock;
1165         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1166         bool exist;
1167         int rc = 0;
1168
1169 try_again:
1170         exist = false;
1171         cifs_down_write(&cinode->lock_sem);
1172
1173         exist = cifs_find_lock_conflict(cfile, lock->offset, lock->length,
1174                                         lock->type, lock->flags, &conf_lock,
1175                                         CIFS_LOCK_OP);
1176         if (!exist && cinode->can_cache_brlcks) {
1177                 list_add_tail(&lock->llist, &cfile->llist->locks);
1178                 up_write(&cinode->lock_sem);
1179                 return rc;
1180         }
1181
1182         if (!exist)
1183                 rc = 1;
1184         else if (!wait)
1185                 rc = -EACCES;
1186         else {
1187                 list_add_tail(&lock->blist, &conf_lock->blist);
1188                 up_write(&cinode->lock_sem);
1189                 rc = wait_event_interruptible(lock->block_q,
1190                                         (lock->blist.prev == &lock->blist) &&
1191                                         (lock->blist.next == &lock->blist));
1192                 if (!rc)
1193                         goto try_again;
1194                 cifs_down_write(&cinode->lock_sem);
1195                 list_del_init(&lock->blist);
1196         }
1197
1198         up_write(&cinode->lock_sem);
1199         return rc;
1200 }
1201
1202 /*
1203  * Check if there is another lock that prevents us to set the lock (posix
1204  * style). If such a lock exists, update the flock structure with its
1205  * properties. Otherwise, set the flock type to F_UNLCK if we can cache brlocks
1206  * or leave it the same if we can't. Returns 0 if we don't need to request to
1207  * the server or 1 otherwise.
1208  */
1209 static int
1210 cifs_posix_lock_test(struct file *file, struct file_lock *flock)
1211 {
1212         int rc = 0;
1213         struct cifsInodeInfo *cinode = CIFS_I(file_inode(file));
1214         unsigned char saved_type = flock->fl_type;
1215
1216         if ((flock->fl_flags & FL_POSIX) == 0)
1217                 return 1;
1218
1219         down_read(&cinode->lock_sem);
1220         posix_test_lock(file, flock);
1221
1222         if (flock->fl_type == F_UNLCK && !cinode->can_cache_brlcks) {
1223                 flock->fl_type = saved_type;
1224                 rc = 1;
1225         }
1226
1227         up_read(&cinode->lock_sem);
1228         return rc;
1229 }
1230
1231 /*
1232  * Set the byte-range lock (posix style). Returns:
1233  * 1) <0, if the error occurs while setting the lock;
1234  * 2) 0, if we set the lock and don't need to request to the server;
1235  * 3) FILE_LOCK_DEFERRED, if we will wait for some other file_lock;
1236  * 4) FILE_LOCK_DEFERRED + 1, if we need to request to the server.
1237  */
1238 static int
1239 cifs_posix_lock_set(struct file *file, struct file_lock *flock)
1240 {
1241         struct cifsInodeInfo *cinode = CIFS_I(file_inode(file));
1242         int rc = FILE_LOCK_DEFERRED + 1;
1243
1244         if ((flock->fl_flags & FL_POSIX) == 0)
1245                 return rc;
1246
1247         cifs_down_write(&cinode->lock_sem);
1248         if (!cinode->can_cache_brlcks) {
1249                 up_write(&cinode->lock_sem);
1250                 return rc;
1251         }
1252
1253         rc = posix_lock_file(file, flock, NULL);
1254         up_write(&cinode->lock_sem);
1255         return rc;
1256 }
1257
1258 int
1259 cifs_push_mandatory_locks(struct cifsFileInfo *cfile)
1260 {
1261         unsigned int xid;
1262         int rc = 0, stored_rc;
1263         struct cifsLockInfo *li, *tmp;
1264         struct cifs_tcon *tcon;
1265         unsigned int num, max_num, max_buf;
1266         LOCKING_ANDX_RANGE *buf, *cur;
1267         static const int types[] = {
1268                 LOCKING_ANDX_LARGE_FILES,
1269                 LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES
1270         };
1271         int i;
1272
1273         xid = get_xid();
1274         tcon = tlink_tcon(cfile->tlink);
1275
1276         /*
1277          * Accessing maxBuf is racy with cifs_reconnect - need to store value
1278          * and check it before using.
1279          */
1280         max_buf = tcon->ses->server->maxBuf;
1281         if (max_buf < (sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE))) {
1282                 free_xid(xid);
1283                 return -EINVAL;
1284         }
1285
1286         BUILD_BUG_ON(sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE) >
1287                      PAGE_SIZE);
1288         max_buf = min_t(unsigned int, max_buf - sizeof(struct smb_hdr),
1289                         PAGE_SIZE);
1290         max_num = (max_buf - sizeof(struct smb_hdr)) /
1291                                                 sizeof(LOCKING_ANDX_RANGE);
1292         buf = kcalloc(max_num, sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
1293         if (!buf) {
1294                 free_xid(xid);
1295                 return -ENOMEM;
1296         }
1297
1298         for (i = 0; i < 2; i++) {
1299                 cur = buf;
1300                 num = 0;
1301                 list_for_each_entry_safe(li, tmp, &cfile->llist->locks, llist) {
1302                         if (li->type != types[i])
1303                                 continue;
1304                         cur->Pid = cpu_to_le16(li->pid);
1305                         cur->LengthLow = cpu_to_le32((u32)li->length);
1306                         cur->LengthHigh = cpu_to_le32((u32)(li->length>>32));
1307                         cur->OffsetLow = cpu_to_le32((u32)li->offset);
1308                         cur->OffsetHigh = cpu_to_le32((u32)(li->offset>>32));
1309                         if (++num == max_num) {
1310                                 stored_rc = cifs_lockv(xid, tcon,
1311                                                        cfile->fid.netfid,
1312                                                        (__u8)li->type, 0, num,
1313                                                        buf);
1314                                 if (stored_rc)
1315                                         rc = stored_rc;
1316                                 cur = buf;
1317                                 num = 0;
1318                         } else
1319                                 cur++;
1320                 }
1321
1322                 if (num) {
1323                         stored_rc = cifs_lockv(xid, tcon, cfile->fid.netfid,
1324                                                (__u8)types[i], 0, num, buf);
1325                         if (stored_rc)
1326                                 rc = stored_rc;
1327                 }
1328         }
1329
1330         kfree(buf);
1331         free_xid(xid);
1332         return rc;
1333 }
1334
1335 static __u32
1336 hash_lockowner(fl_owner_t owner)
1337 {
1338         return cifs_lock_secret ^ hash32_ptr((const void *)owner);
1339 }
1340
1341 struct lock_to_push {
1342         struct list_head llist;
1343         __u64 offset;
1344         __u64 length;
1345         __u32 pid;
1346         __u16 netfid;
1347         __u8 type;
1348 };
1349
1350 static int
1351 cifs_push_posix_locks(struct cifsFileInfo *cfile)
1352 {
1353         struct inode *inode = d_inode(cfile->dentry);
1354         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1355         struct file_lock *flock;
1356         struct file_lock_context *flctx = inode->i_flctx;
1357         unsigned int count = 0, i;
1358         int rc = 0, xid, type;
1359         struct list_head locks_to_send, *el;
1360         struct lock_to_push *lck, *tmp;
1361         __u64 length;
1362
1363         xid = get_xid();
1364
1365         if (!flctx)
1366                 goto out;
1367
1368         spin_lock(&flctx->flc_lock);
1369         list_for_each(el, &flctx->flc_posix) {
1370                 count++;
1371         }
1372         spin_unlock(&flctx->flc_lock);
1373
1374         INIT_LIST_HEAD(&locks_to_send);
1375
1376         /*
1377          * Allocating count locks is enough because no FL_POSIX locks can be
1378          * added to the list while we are holding cinode->lock_sem that
1379          * protects locking operations of this inode.
1380          */
1381         for (i = 0; i < count; i++) {
1382                 lck = kmalloc(sizeof(struct lock_to_push), GFP_KERNEL);
1383                 if (!lck) {
1384                         rc = -ENOMEM;
1385                         goto err_out;
1386                 }
1387                 list_add_tail(&lck->llist, &locks_to_send);
1388         }
1389
1390         el = locks_to_send.next;
1391         spin_lock(&flctx->flc_lock);
1392         list_for_each_entry(flock, &flctx->flc_posix, fl_list) {
1393                 if (el == &locks_to_send) {
1394                         /*
1395                          * The list ended. We don't have enough allocated
1396                          * structures - something is really wrong.
1397                          */
1398                         cifs_dbg(VFS, "Can't push all brlocks!\n");
1399                         break;
1400                 }
1401                 length = 1 + flock->fl_end - flock->fl_start;
1402                 if (flock->fl_type == F_RDLCK || flock->fl_type == F_SHLCK)
1403                         type = CIFS_RDLCK;
1404                 else
1405                         type = CIFS_WRLCK;
1406                 lck = list_entry(el, struct lock_to_push, llist);
1407                 lck->pid = hash_lockowner(flock->fl_owner);
1408                 lck->netfid = cfile->fid.netfid;
1409                 lck->length = length;
1410                 lck->type = type;
1411                 lck->offset = flock->fl_start;
1412         }
1413         spin_unlock(&flctx->flc_lock);
1414
1415         list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) {
1416                 int stored_rc;
1417
1418                 stored_rc = CIFSSMBPosixLock(xid, tcon, lck->netfid, lck->pid,
1419                                              lck->offset, lck->length, NULL,
1420                                              lck->type, 0);
1421                 if (stored_rc)
1422                         rc = stored_rc;
1423                 list_del(&lck->llist);
1424                 kfree(lck);
1425         }
1426
1427 out:
1428         free_xid(xid);
1429         return rc;
1430 err_out:
1431         list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) {
1432                 list_del(&lck->llist);
1433                 kfree(lck);
1434         }
1435         goto out;
1436 }
1437
1438 static int
1439 cifs_push_locks(struct cifsFileInfo *cfile)
1440 {
1441         struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
1442         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1443         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1444         int rc = 0;
1445
1446         /* we are going to update can_cache_brlcks here - need a write access */
1447         cifs_down_write(&cinode->lock_sem);
1448         if (!cinode->can_cache_brlcks) {
1449                 up_write(&cinode->lock_sem);
1450                 return rc;
1451         }
1452
1453         if (cap_unix(tcon->ses) &&
1454             (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
1455             ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
1456                 rc = cifs_push_posix_locks(cfile);
1457         else
1458                 rc = tcon->ses->server->ops->push_mand_locks(cfile);
1459
1460         cinode->can_cache_brlcks = false;
1461         up_write(&cinode->lock_sem);
1462         return rc;
1463 }
1464
1465 static void
1466 cifs_read_flock(struct file_lock *flock, __u32 *type, int *lock, int *unlock,
1467                 bool *wait_flag, struct TCP_Server_Info *server)
1468 {
1469         if (flock->fl_flags & FL_POSIX)
1470                 cifs_dbg(FYI, "Posix\n");
1471         if (flock->fl_flags & FL_FLOCK)
1472                 cifs_dbg(FYI, "Flock\n");
1473         if (flock->fl_flags & FL_SLEEP) {
1474                 cifs_dbg(FYI, "Blocking lock\n");
1475                 *wait_flag = true;
1476         }
1477         if (flock->fl_flags & FL_ACCESS)
1478                 cifs_dbg(FYI, "Process suspended by mandatory locking - not implemented yet\n");
1479         if (flock->fl_flags & FL_LEASE)
1480                 cifs_dbg(FYI, "Lease on file - not implemented yet\n");
1481         if (flock->fl_flags &
1482             (~(FL_POSIX | FL_FLOCK | FL_SLEEP |
1483                FL_ACCESS | FL_LEASE | FL_CLOSE | FL_OFDLCK)))
1484                 cifs_dbg(FYI, "Unknown lock flags 0x%x\n", flock->fl_flags);
1485
1486         *type = server->vals->large_lock_type;
1487         if (flock->fl_type == F_WRLCK) {
1488                 cifs_dbg(FYI, "F_WRLCK\n");
1489                 *type |= server->vals->exclusive_lock_type;
1490                 *lock = 1;
1491         } else if (flock->fl_type == F_UNLCK) {
1492                 cifs_dbg(FYI, "F_UNLCK\n");
1493                 *type |= server->vals->unlock_lock_type;
1494                 *unlock = 1;
1495                 /* Check if unlock includes more than one lock range */
1496         } else if (flock->fl_type == F_RDLCK) {
1497                 cifs_dbg(FYI, "F_RDLCK\n");
1498                 *type |= server->vals->shared_lock_type;
1499                 *lock = 1;
1500         } else if (flock->fl_type == F_EXLCK) {
1501                 cifs_dbg(FYI, "F_EXLCK\n");
1502                 *type |= server->vals->exclusive_lock_type;
1503                 *lock = 1;
1504         } else if (flock->fl_type == F_SHLCK) {
1505                 cifs_dbg(FYI, "F_SHLCK\n");
1506                 *type |= server->vals->shared_lock_type;
1507                 *lock = 1;
1508         } else
1509                 cifs_dbg(FYI, "Unknown type of lock\n");
1510 }
1511
1512 static int
1513 cifs_getlk(struct file *file, struct file_lock *flock, __u32 type,
1514            bool wait_flag, bool posix_lck, unsigned int xid)
1515 {
1516         int rc = 0;
1517         __u64 length = 1 + flock->fl_end - flock->fl_start;
1518         struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
1519         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1520         struct TCP_Server_Info *server = tcon->ses->server;
1521         __u16 netfid = cfile->fid.netfid;
1522
1523         if (posix_lck) {
1524                 int posix_lock_type;
1525
1526                 rc = cifs_posix_lock_test(file, flock);
1527                 if (!rc)
1528                         return rc;
1529
1530                 if (type & server->vals->shared_lock_type)
1531                         posix_lock_type = CIFS_RDLCK;
1532                 else
1533                         posix_lock_type = CIFS_WRLCK;
1534                 rc = CIFSSMBPosixLock(xid, tcon, netfid,
1535                                       hash_lockowner(flock->fl_owner),
1536                                       flock->fl_start, length, flock,
1537                                       posix_lock_type, wait_flag);
1538                 return rc;
1539         }
1540
1541         rc = cifs_lock_test(cfile, flock->fl_start, length, type, flock);
1542         if (!rc)
1543                 return rc;
1544
1545         /* BB we could chain these into one lock request BB */
1546         rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length, type,
1547                                     1, 0, false);
1548         if (rc == 0) {
1549                 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1550                                             type, 0, 1, false);
1551                 flock->fl_type = F_UNLCK;
1552                 if (rc != 0)
1553                         cifs_dbg(VFS, "Error unlocking previously locked range %d during test of lock\n",
1554                                  rc);
1555                 return 0;
1556         }
1557
1558         if (type & server->vals->shared_lock_type) {
1559                 flock->fl_type = F_WRLCK;
1560                 return 0;
1561         }
1562
1563         type &= ~server->vals->exclusive_lock_type;
1564
1565         rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1566                                     type | server->vals->shared_lock_type,
1567                                     1, 0, false);
1568         if (rc == 0) {
1569                 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1570                         type | server->vals->shared_lock_type, 0, 1, false);
1571                 flock->fl_type = F_RDLCK;
1572                 if (rc != 0)
1573                         cifs_dbg(VFS, "Error unlocking previously locked range %d during test of lock\n",
1574                                  rc);
1575         } else
1576                 flock->fl_type = F_WRLCK;
1577
1578         return 0;
1579 }
1580
1581 void
1582 cifs_move_llist(struct list_head *source, struct list_head *dest)
1583 {
1584         struct list_head *li, *tmp;
1585         list_for_each_safe(li, tmp, source)
1586                 list_move(li, dest);
1587 }
1588
1589 void
1590 cifs_free_llist(struct list_head *llist)
1591 {
1592         struct cifsLockInfo *li, *tmp;
1593         list_for_each_entry_safe(li, tmp, llist, llist) {
1594                 cifs_del_lock_waiters(li);
1595                 list_del(&li->llist);
1596                 kfree(li);
1597         }
1598 }
1599
1600 int
1601 cifs_unlock_range(struct cifsFileInfo *cfile, struct file_lock *flock,
1602                   unsigned int xid)
1603 {
1604         int rc = 0, stored_rc;
1605         static const int types[] = {
1606                 LOCKING_ANDX_LARGE_FILES,
1607                 LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES
1608         };
1609         unsigned int i;
1610         unsigned int max_num, num, max_buf;
1611         LOCKING_ANDX_RANGE *buf, *cur;
1612         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1613         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1614         struct cifsLockInfo *li, *tmp;
1615         __u64 length = 1 + flock->fl_end - flock->fl_start;
1616         struct list_head tmp_llist;
1617
1618         INIT_LIST_HEAD(&tmp_llist);
1619
1620         /*
1621          * Accessing maxBuf is racy with cifs_reconnect - need to store value
1622          * and check it before using.
1623          */
1624         max_buf = tcon->ses->server->maxBuf;
1625         if (max_buf < (sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE)))
1626                 return -EINVAL;
1627
1628         BUILD_BUG_ON(sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE) >
1629                      PAGE_SIZE);
1630         max_buf = min_t(unsigned int, max_buf - sizeof(struct smb_hdr),
1631                         PAGE_SIZE);
1632         max_num = (max_buf - sizeof(struct smb_hdr)) /
1633                                                 sizeof(LOCKING_ANDX_RANGE);
1634         buf = kcalloc(max_num, sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
1635         if (!buf)
1636                 return -ENOMEM;
1637
1638         cifs_down_write(&cinode->lock_sem);
1639         for (i = 0; i < 2; i++) {
1640                 cur = buf;
1641                 num = 0;
1642                 list_for_each_entry_safe(li, tmp, &cfile->llist->locks, llist) {
1643                         if (flock->fl_start > li->offset ||
1644                             (flock->fl_start + length) <
1645                             (li->offset + li->length))
1646                                 continue;
1647                         if (current->tgid != li->pid)
1648                                 continue;
1649                         if (types[i] != li->type)
1650                                 continue;
1651                         if (cinode->can_cache_brlcks) {
1652                                 /*
1653                                  * We can cache brlock requests - simply remove
1654                                  * a lock from the file's list.
1655                                  */
1656                                 list_del(&li->llist);
1657                                 cifs_del_lock_waiters(li);
1658                                 kfree(li);
1659                                 continue;
1660                         }
1661                         cur->Pid = cpu_to_le16(li->pid);
1662                         cur->LengthLow = cpu_to_le32((u32)li->length);
1663                         cur->LengthHigh = cpu_to_le32((u32)(li->length>>32));
1664                         cur->OffsetLow = cpu_to_le32((u32)li->offset);
1665                         cur->OffsetHigh = cpu_to_le32((u32)(li->offset>>32));
1666                         /*
1667                          * We need to save a lock here to let us add it again to
1668                          * the file's list if the unlock range request fails on
1669                          * the server.
1670                          */
1671                         list_move(&li->llist, &tmp_llist);
1672                         if (++num == max_num) {
1673                                 stored_rc = cifs_lockv(xid, tcon,
1674                                                        cfile->fid.netfid,
1675                                                        li->type, num, 0, buf);
1676                                 if (stored_rc) {
1677                                         /*
1678                                          * We failed on the unlock range
1679                                          * request - add all locks from the tmp
1680                                          * list to the head of the file's list.
1681                                          */
1682                                         cifs_move_llist(&tmp_llist,
1683                                                         &cfile->llist->locks);
1684                                         rc = stored_rc;
1685                                 } else
1686                                         /*
1687                                          * The unlock range request succeed -
1688                                          * free the tmp list.
1689                                          */
1690                                         cifs_free_llist(&tmp_llist);
1691                                 cur = buf;
1692                                 num = 0;
1693                         } else
1694                                 cur++;
1695                 }
1696                 if (num) {
1697                         stored_rc = cifs_lockv(xid, tcon, cfile->fid.netfid,
1698                                                types[i], num, 0, buf);
1699                         if (stored_rc) {
1700                                 cifs_move_llist(&tmp_llist,
1701                                                 &cfile->llist->locks);
1702                                 rc = stored_rc;
1703                         } else
1704                                 cifs_free_llist(&tmp_llist);
1705                 }
1706         }
1707
1708         up_write(&cinode->lock_sem);
1709         kfree(buf);
1710         return rc;
1711 }
1712
1713 static int
1714 cifs_setlk(struct file *file, struct file_lock *flock, __u32 type,
1715            bool wait_flag, bool posix_lck, int lock, int unlock,
1716            unsigned int xid)
1717 {
1718         int rc = 0;
1719         __u64 length = 1 + flock->fl_end - flock->fl_start;
1720         struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
1721         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1722         struct TCP_Server_Info *server = tcon->ses->server;
1723         struct inode *inode = d_inode(cfile->dentry);
1724
1725         if (posix_lck) {
1726                 int posix_lock_type;
1727
1728                 rc = cifs_posix_lock_set(file, flock);
1729                 if (rc <= FILE_LOCK_DEFERRED)
1730                         return rc;
1731
1732                 if (type & server->vals->shared_lock_type)
1733                         posix_lock_type = CIFS_RDLCK;
1734                 else
1735                         posix_lock_type = CIFS_WRLCK;
1736
1737                 if (unlock == 1)
1738                         posix_lock_type = CIFS_UNLCK;
1739
1740                 rc = CIFSSMBPosixLock(xid, tcon, cfile->fid.netfid,
1741                                       hash_lockowner(flock->fl_owner),
1742                                       flock->fl_start, length,
1743                                       NULL, posix_lock_type, wait_flag);
1744                 goto out;
1745         }
1746
1747         if (lock) {
1748                 struct cifsLockInfo *lock;
1749
1750                 lock = cifs_lock_init(flock->fl_start, length, type,
1751                                       flock->fl_flags);
1752                 if (!lock)
1753                         return -ENOMEM;
1754
1755                 rc = cifs_lock_add_if(cfile, lock, wait_flag);
1756                 if (rc < 0) {
1757                         kfree(lock);
1758                         return rc;
1759                 }
1760                 if (!rc)
1761                         goto out;
1762
1763                 /*
1764                  * Windows 7 server can delay breaking lease from read to None
1765                  * if we set a byte-range lock on a file - break it explicitly
1766                  * before sending the lock to the server to be sure the next
1767                  * read won't conflict with non-overlapted locks due to
1768                  * pagereading.
1769                  */
1770                 if (!CIFS_CACHE_WRITE(CIFS_I(inode)) &&
1771                                         CIFS_CACHE_READ(CIFS_I(inode))) {
1772                         cifs_zap_mapping(inode);
1773                         cifs_dbg(FYI, "Set no oplock for inode=%p due to mand locks\n",
1774                                  inode);
1775                         CIFS_I(inode)->oplock = 0;
1776                 }
1777
1778                 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1779                                             type, 1, 0, wait_flag);
1780                 if (rc) {
1781                         kfree(lock);
1782                         return rc;
1783                 }
1784
1785                 cifs_lock_add(cfile, lock);
1786         } else if (unlock)
1787                 rc = server->ops->mand_unlock_range(cfile, flock, xid);
1788
1789 out:
1790         if ((flock->fl_flags & FL_POSIX) || (flock->fl_flags & FL_FLOCK)) {
1791                 /*
1792                  * If this is a request to remove all locks because we
1793                  * are closing the file, it doesn't matter if the
1794                  * unlocking failed as both cifs.ko and the SMB server
1795                  * remove the lock on file close
1796                  */
1797                 if (rc) {
1798                         cifs_dbg(VFS, "%s failed rc=%d\n", __func__, rc);
1799                         if (!(flock->fl_flags & FL_CLOSE))
1800                                 return rc;
1801                 }
1802                 rc = locks_lock_file_wait(file, flock);
1803         }
1804         return rc;
1805 }
1806
1807 int cifs_flock(struct file *file, int cmd, struct file_lock *fl)
1808 {
1809         int rc, xid;
1810         int lock = 0, unlock = 0;
1811         bool wait_flag = false;
1812         bool posix_lck = false;
1813         struct cifs_sb_info *cifs_sb;
1814         struct cifs_tcon *tcon;
1815         struct cifsFileInfo *cfile;
1816         __u32 type;
1817
1818         rc = -EACCES;
1819         xid = get_xid();
1820
1821         if (!(fl->fl_flags & FL_FLOCK))
1822                 return -ENOLCK;
1823
1824         cfile = (struct cifsFileInfo *)file->private_data;
1825         tcon = tlink_tcon(cfile->tlink);
1826
1827         cifs_read_flock(fl, &type, &lock, &unlock, &wait_flag,
1828                         tcon->ses->server);
1829         cifs_sb = CIFS_FILE_SB(file);
1830
1831         if (cap_unix(tcon->ses) &&
1832             (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
1833             ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
1834                 posix_lck = true;
1835
1836         if (!lock && !unlock) {
1837                 /*
1838                  * if no lock or unlock then nothing to do since we do not
1839                  * know what it is
1840                  */
1841                 free_xid(xid);
1842                 return -EOPNOTSUPP;
1843         }
1844
1845         rc = cifs_setlk(file, fl, type, wait_flag, posix_lck, lock, unlock,
1846                         xid);
1847         free_xid(xid);
1848         return rc;
1849
1850
1851 }
1852
1853 int cifs_lock(struct file *file, int cmd, struct file_lock *flock)
1854 {
1855         int rc, xid;
1856         int lock = 0, unlock = 0;
1857         bool wait_flag = false;
1858         bool posix_lck = false;
1859         struct cifs_sb_info *cifs_sb;
1860         struct cifs_tcon *tcon;
1861         struct cifsFileInfo *cfile;
1862         __u32 type;
1863
1864         rc = -EACCES;
1865         xid = get_xid();
1866
1867         cifs_dbg(FYI, "Lock parm: 0x%x flockflags: 0x%x flocktype: 0x%x start: %lld end: %lld\n",
1868                  cmd, flock->fl_flags, flock->fl_type,
1869                  flock->fl_start, flock->fl_end);
1870
1871         cfile = (struct cifsFileInfo *)file->private_data;
1872         tcon = tlink_tcon(cfile->tlink);
1873
1874         cifs_read_flock(flock, &type, &lock, &unlock, &wait_flag,
1875                         tcon->ses->server);
1876         cifs_sb = CIFS_FILE_SB(file);
1877
1878         if (cap_unix(tcon->ses) &&
1879             (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
1880             ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
1881                 posix_lck = true;
1882         /*
1883          * BB add code here to normalize offset and length to account for
1884          * negative length which we can not accept over the wire.
1885          */
1886         if (IS_GETLK(cmd)) {
1887                 rc = cifs_getlk(file, flock, type, wait_flag, posix_lck, xid);
1888                 free_xid(xid);
1889                 return rc;
1890         }
1891
1892         if (!lock && !unlock) {
1893                 /*
1894                  * if no lock or unlock then nothing to do since we do not
1895                  * know what it is
1896                  */
1897                 free_xid(xid);
1898                 return -EOPNOTSUPP;
1899         }
1900
1901         rc = cifs_setlk(file, flock, type, wait_flag, posix_lck, lock, unlock,
1902                         xid);
1903         free_xid(xid);
1904         return rc;
1905 }
1906
1907 /*
1908  * update the file size (if needed) after a write. Should be called with
1909  * the inode->i_lock held
1910  */
1911 void
1912 cifs_update_eof(struct cifsInodeInfo *cifsi, loff_t offset,
1913                       unsigned int bytes_written)
1914 {
1915         loff_t end_of_write = offset + bytes_written;
1916
1917         if (end_of_write > cifsi->server_eof)
1918                 cifsi->server_eof = end_of_write;
1919 }
1920
1921 static ssize_t
1922 cifs_write(struct cifsFileInfo *open_file, __u32 pid, const char *write_data,
1923            size_t write_size, loff_t *offset)
1924 {
1925         int rc = 0;
1926         unsigned int bytes_written = 0;
1927         unsigned int total_written;
1928         struct cifs_tcon *tcon;
1929         struct TCP_Server_Info *server;
1930         unsigned int xid;
1931         struct dentry *dentry = open_file->dentry;
1932         struct cifsInodeInfo *cifsi = CIFS_I(d_inode(dentry));
1933         struct cifs_io_parms io_parms = {0};
1934
1935         cifs_dbg(FYI, "write %zd bytes to offset %lld of %pd\n",
1936                  write_size, *offset, dentry);
1937
1938         tcon = tlink_tcon(open_file->tlink);
1939         server = tcon->ses->server;
1940
1941         if (!server->ops->sync_write)
1942                 return -ENOSYS;
1943
1944         xid = get_xid();
1945
1946         for (total_written = 0; write_size > total_written;
1947              total_written += bytes_written) {
1948                 rc = -EAGAIN;
1949                 while (rc == -EAGAIN) {
1950                         struct kvec iov[2];
1951                         unsigned int len;
1952
1953                         if (open_file->invalidHandle) {
1954                                 /* we could deadlock if we called
1955                                    filemap_fdatawait from here so tell
1956                                    reopen_file not to flush data to
1957                                    server now */
1958                                 rc = cifs_reopen_file(open_file, false);
1959                                 if (rc != 0)
1960                                         break;
1961                         }
1962
1963                         len = min(server->ops->wp_retry_size(d_inode(dentry)),
1964                                   (unsigned int)write_size - total_written);
1965                         /* iov[0] is reserved for smb header */
1966                         iov[1].iov_base = (char *)write_data + total_written;
1967                         iov[1].iov_len = len;
1968                         io_parms.pid = pid;
1969                         io_parms.tcon = tcon;
1970                         io_parms.offset = *offset;
1971                         io_parms.length = len;
1972                         rc = server->ops->sync_write(xid, &open_file->fid,
1973                                         &io_parms, &bytes_written, iov, 1);
1974                 }
1975                 if (rc || (bytes_written == 0)) {
1976                         if (total_written)
1977                                 break;
1978                         else {
1979                                 free_xid(xid);
1980                                 return rc;
1981                         }
1982                 } else {
1983                         spin_lock(&d_inode(dentry)->i_lock);
1984                         cifs_update_eof(cifsi, *offset, bytes_written);
1985                         spin_unlock(&d_inode(dentry)->i_lock);
1986                         *offset += bytes_written;
1987                 }
1988         }
1989
1990         cifs_stats_bytes_written(tcon, total_written);
1991
1992         if (total_written > 0) {
1993                 spin_lock(&d_inode(dentry)->i_lock);
1994                 if (*offset > d_inode(dentry)->i_size) {
1995                         i_size_write(d_inode(dentry), *offset);
1996                         d_inode(dentry)->i_blocks = (512 - 1 + *offset) >> 9;
1997                 }
1998                 spin_unlock(&d_inode(dentry)->i_lock);
1999         }
2000         mark_inode_dirty_sync(d_inode(dentry));
2001         free_xid(xid);
2002         return total_written;
2003 }
2004
2005 struct cifsFileInfo *find_readable_file(struct cifsInodeInfo *cifs_inode,
2006                                         bool fsuid_only)
2007 {
2008         struct cifsFileInfo *open_file = NULL;
2009         struct cifs_sb_info *cifs_sb = CIFS_SB(cifs_inode->vfs_inode.i_sb);
2010
2011         /* only filter by fsuid on multiuser mounts */
2012         if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
2013                 fsuid_only = false;
2014
2015         spin_lock(&cifs_inode->open_file_lock);
2016         /* we could simply get the first_list_entry since write-only entries
2017            are always at the end of the list but since the first entry might
2018            have a close pending, we go through the whole list */
2019         list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
2020                 if (fsuid_only && !uid_eq(open_file->uid, current_fsuid()))
2021                         continue;
2022                 if (OPEN_FMODE(open_file->f_flags) & FMODE_READ) {
2023                         if ((!open_file->invalidHandle) &&
2024                                 (!open_file->oplock_break_received)) {
2025                                 /* found a good file */
2026                                 /* lock it so it will not be closed on us */
2027                                 cifsFileInfo_get(open_file);
2028                                 spin_unlock(&cifs_inode->open_file_lock);
2029                                 return open_file;
2030                         } /* else might as well continue, and look for
2031                              another, or simply have the caller reopen it
2032                              again rather than trying to fix this handle */
2033                 } else /* write only file */
2034                         break; /* write only files are last so must be done */
2035         }
2036         spin_unlock(&cifs_inode->open_file_lock);
2037         return NULL;
2038 }
2039
2040 /* Return -EBADF if no handle is found and general rc otherwise */
2041 int
2042 cifs_get_writable_file(struct cifsInodeInfo *cifs_inode, int flags,
2043                        struct cifsFileInfo **ret_file)
2044 {
2045         struct cifsFileInfo *open_file, *inv_file = NULL;
2046         struct cifs_sb_info *cifs_sb;
2047         bool any_available = false;
2048         int rc = -EBADF;
2049         unsigned int refind = 0;
2050         bool fsuid_only = flags & FIND_WR_FSUID_ONLY;
2051         bool with_delete = flags & FIND_WR_WITH_DELETE;
2052         *ret_file = NULL;
2053
2054         /*
2055          * Having a null inode here (because mapping->host was set to zero by
2056          * the VFS or MM) should not happen but we had reports of on oops (due
2057          * to it being zero) during stress testcases so we need to check for it
2058          */
2059
2060         if (cifs_inode == NULL) {
2061                 cifs_dbg(VFS, "Null inode passed to cifs_writeable_file\n");
2062                 dump_stack();
2063                 return rc;
2064         }
2065
2066         cifs_sb = CIFS_SB(cifs_inode->vfs_inode.i_sb);
2067
2068         /* only filter by fsuid on multiuser mounts */
2069         if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
2070                 fsuid_only = false;
2071
2072         spin_lock(&cifs_inode->open_file_lock);
2073 refind_writable:
2074         if (refind > MAX_REOPEN_ATT) {
2075                 spin_unlock(&cifs_inode->open_file_lock);
2076                 return rc;
2077         }
2078         list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
2079                 if (!any_available && open_file->pid != current->tgid)
2080                         continue;
2081                 if (fsuid_only && !uid_eq(open_file->uid, current_fsuid()))
2082                         continue;
2083                 if (with_delete && !(open_file->fid.access & DELETE))
2084                         continue;
2085                 if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) {
2086                         if (!open_file->invalidHandle) {
2087                                 /* found a good writable file */
2088                                 cifsFileInfo_get(open_file);
2089                                 spin_unlock(&cifs_inode->open_file_lock);
2090                                 *ret_file = open_file;
2091                                 return 0;
2092                         } else {
2093                                 if (!inv_file)
2094                                         inv_file = open_file;
2095                         }
2096                 }
2097         }
2098         /* couldn't find useable FH with same pid, try any available */
2099         if (!any_available) {
2100                 any_available = true;
2101                 goto refind_writable;
2102         }
2103
2104         if (inv_file) {
2105                 any_available = false;
2106                 cifsFileInfo_get(inv_file);
2107         }
2108
2109         spin_unlock(&cifs_inode->open_file_lock);
2110
2111         if (inv_file) {
2112                 rc = cifs_reopen_file(inv_file, false);
2113                 if (!rc) {
2114                         *ret_file = inv_file;
2115                         return 0;
2116                 }
2117
2118                 spin_lock(&cifs_inode->open_file_lock);
2119                 list_move_tail(&inv_file->flist, &cifs_inode->openFileList);
2120                 spin_unlock(&cifs_inode->open_file_lock);
2121                 cifsFileInfo_put(inv_file);
2122                 ++refind;
2123                 inv_file = NULL;
2124                 spin_lock(&cifs_inode->open_file_lock);
2125                 goto refind_writable;
2126         }
2127
2128         return rc;
2129 }
2130
2131 struct cifsFileInfo *
2132 find_writable_file(struct cifsInodeInfo *cifs_inode, int flags)
2133 {
2134         struct cifsFileInfo *cfile;
2135         int rc;
2136
2137         rc = cifs_get_writable_file(cifs_inode, flags, &cfile);
2138         if (rc)
2139                 cifs_dbg(FYI, "Couldn't find writable handle rc=%d\n", rc);
2140
2141         return cfile;
2142 }
2143
2144 int
2145 cifs_get_writable_path(struct cifs_tcon *tcon, const char *name,
2146                        int flags,
2147                        struct cifsFileInfo **ret_file)
2148 {
2149         struct cifsFileInfo *cfile;
2150         void *page = alloc_dentry_path();
2151
2152         *ret_file = NULL;
2153
2154         spin_lock(&tcon->open_file_lock);
2155         list_for_each_entry(cfile, &tcon->openFileList, tlist) {
2156                 struct cifsInodeInfo *cinode;
2157                 const char *full_path = build_path_from_dentry(cfile->dentry, page);
2158                 if (IS_ERR(full_path)) {
2159                         spin_unlock(&tcon->open_file_lock);
2160                         free_dentry_path(page);
2161                         return PTR_ERR(full_path);
2162                 }
2163                 if (strcmp(full_path, name))
2164                         continue;
2165
2166                 cinode = CIFS_I(d_inode(cfile->dentry));
2167                 spin_unlock(&tcon->open_file_lock);
2168                 free_dentry_path(page);
2169                 return cifs_get_writable_file(cinode, flags, ret_file);
2170         }
2171
2172         spin_unlock(&tcon->open_file_lock);
2173         free_dentry_path(page);
2174         return -ENOENT;
2175 }
2176
2177 int
2178 cifs_get_readable_path(struct cifs_tcon *tcon, const char *name,
2179                        struct cifsFileInfo **ret_file)
2180 {
2181         struct cifsFileInfo *cfile;
2182         void *page = alloc_dentry_path();
2183
2184         *ret_file = NULL;
2185
2186         spin_lock(&tcon->open_file_lock);
2187         list_for_each_entry(cfile, &tcon->openFileList, tlist) {
2188                 struct cifsInodeInfo *cinode;
2189                 const char *full_path = build_path_from_dentry(cfile->dentry, page);
2190                 if (IS_ERR(full_path)) {
2191                         spin_unlock(&tcon->open_file_lock);
2192                         free_dentry_path(page);
2193                         return PTR_ERR(full_path);
2194                 }
2195                 if (strcmp(full_path, name))
2196                         continue;
2197
2198                 cinode = CIFS_I(d_inode(cfile->dentry));
2199                 spin_unlock(&tcon->open_file_lock);
2200                 free_dentry_path(page);
2201                 *ret_file = find_readable_file(cinode, 0);
2202                 return *ret_file ? 0 : -ENOENT;
2203         }
2204
2205         spin_unlock(&tcon->open_file_lock);
2206         free_dentry_path(page);
2207         return -ENOENT;
2208 }
2209
2210 static int cifs_partialpagewrite(struct page *page, unsigned from, unsigned to)
2211 {
2212         struct address_space *mapping = page->mapping;
2213         loff_t offset = (loff_t)page->index << PAGE_SHIFT;
2214         char *write_data;
2215         int rc = -EFAULT;
2216         int bytes_written = 0;
2217         struct inode *inode;
2218         struct cifsFileInfo *open_file;
2219
2220         if (!mapping || !mapping->host)
2221                 return -EFAULT;
2222
2223         inode = page->mapping->host;
2224
2225         offset += (loff_t)from;
2226         write_data = kmap(page);
2227         write_data += from;
2228
2229         if ((to > PAGE_SIZE) || (from > to)) {
2230                 kunmap(page);
2231                 return -EIO;
2232         }
2233
2234         /* racing with truncate? */
2235         if (offset > mapping->host->i_size) {
2236                 kunmap(page);
2237                 return 0; /* don't care */
2238         }
2239
2240         /* check to make sure that we are not extending the file */
2241         if (mapping->host->i_size - offset < (loff_t)to)
2242                 to = (unsigned)(mapping->host->i_size - offset);
2243
2244         rc = cifs_get_writable_file(CIFS_I(mapping->host), FIND_WR_ANY,
2245                                     &open_file);
2246         if (!rc) {
2247                 bytes_written = cifs_write(open_file, open_file->pid,
2248                                            write_data, to - from, &offset);
2249                 cifsFileInfo_put(open_file);
2250                 /* Does mm or vfs already set times? */
2251                 inode->i_atime = inode->i_mtime = current_time(inode);
2252                 if ((bytes_written > 0) && (offset))
2253                         rc = 0;
2254                 else if (bytes_written < 0)
2255                         rc = bytes_written;
2256                 else
2257                         rc = -EFAULT;
2258         } else {
2259                 cifs_dbg(FYI, "No writable handle for write page rc=%d\n", rc);
2260                 if (!is_retryable_error(rc))
2261                         rc = -EIO;
2262         }
2263
2264         kunmap(page);
2265         return rc;
2266 }
2267
2268 static struct cifs_writedata *
2269 wdata_alloc_and_fillpages(pgoff_t tofind, struct address_space *mapping,
2270                           pgoff_t end, pgoff_t *index,
2271                           unsigned int *found_pages)
2272 {
2273         struct cifs_writedata *wdata;
2274
2275         wdata = cifs_writedata_alloc((unsigned int)tofind,
2276                                      cifs_writev_complete);
2277         if (!wdata)
2278                 return NULL;
2279
2280         *found_pages = find_get_pages_range_tag(mapping, index, end,
2281                                 PAGECACHE_TAG_DIRTY, tofind, wdata->pages);
2282         return wdata;
2283 }
2284
2285 static unsigned int
2286 wdata_prepare_pages(struct cifs_writedata *wdata, unsigned int found_pages,
2287                     struct address_space *mapping,
2288                     struct writeback_control *wbc,
2289                     pgoff_t end, pgoff_t *index, pgoff_t *next, bool *done)
2290 {
2291         unsigned int nr_pages = 0, i;
2292         struct page *page;
2293
2294         for (i = 0; i < found_pages; i++) {
2295                 page = wdata->pages[i];
2296                 /*
2297                  * At this point we hold neither the i_pages lock nor the
2298                  * page lock: the page may be truncated or invalidated
2299                  * (changing page->mapping to NULL), or even swizzled
2300                  * back from swapper_space to tmpfs file mapping
2301                  */
2302
2303                 if (nr_pages == 0)
2304                         lock_page(page);
2305                 else if (!trylock_page(page))
2306                         break;
2307
2308                 if (unlikely(page->mapping != mapping)) {
2309                         unlock_page(page);
2310                         break;
2311                 }
2312
2313                 if (!wbc->range_cyclic && page->index > end) {
2314                         *done = true;
2315                         unlock_page(page);
2316                         break;
2317                 }
2318
2319                 if (*next && (page->index != *next)) {
2320                         /* Not next consecutive page */
2321                         unlock_page(page);
2322                         break;
2323                 }
2324
2325                 if (wbc->sync_mode != WB_SYNC_NONE)
2326                         wait_on_page_writeback(page);
2327
2328                 if (PageWriteback(page) ||
2329                                 !clear_page_dirty_for_io(page)) {
2330                         unlock_page(page);
2331                         break;
2332                 }
2333
2334                 /*
2335                  * This actually clears the dirty bit in the radix tree.
2336                  * See cifs_writepage() for more commentary.
2337                  */
2338                 set_page_writeback(page);
2339                 if (page_offset(page) >= i_size_read(mapping->host)) {
2340                         *done = true;
2341                         unlock_page(page);
2342                         end_page_writeback(page);
2343                         break;
2344                 }
2345
2346                 wdata->pages[i] = page;
2347                 *next = page->index + 1;
2348                 ++nr_pages;
2349         }
2350
2351         /* reset index to refind any pages skipped */
2352         if (nr_pages == 0)
2353                 *index = wdata->pages[0]->index + 1;
2354
2355         /* put any pages we aren't going to use */
2356         for (i = nr_pages; i < found_pages; i++) {
2357                 put_page(wdata->pages[i]);
2358                 wdata->pages[i] = NULL;
2359         }
2360
2361         return nr_pages;
2362 }
2363
2364 static int
2365 wdata_send_pages(struct cifs_writedata *wdata, unsigned int nr_pages,
2366                  struct address_space *mapping, struct writeback_control *wbc)
2367 {
2368         int rc;
2369
2370         wdata->sync_mode = wbc->sync_mode;
2371         wdata->nr_pages = nr_pages;
2372         wdata->offset = page_offset(wdata->pages[0]);
2373         wdata->pagesz = PAGE_SIZE;
2374         wdata->tailsz = min(i_size_read(mapping->host) -
2375                         page_offset(wdata->pages[nr_pages - 1]),
2376                         (loff_t)PAGE_SIZE);
2377         wdata->bytes = ((nr_pages - 1) * PAGE_SIZE) + wdata->tailsz;
2378         wdata->pid = wdata->cfile->pid;
2379
2380         rc = adjust_credits(wdata->server, &wdata->credits, wdata->bytes);
2381         if (rc)
2382                 return rc;
2383
2384         if (wdata->cfile->invalidHandle)
2385                 rc = -EAGAIN;
2386         else
2387                 rc = wdata->server->ops->async_writev(wdata,
2388                                                       cifs_writedata_release);
2389
2390         return rc;
2391 }
2392
2393 static int cifs_writepages(struct address_space *mapping,
2394                            struct writeback_control *wbc)
2395 {
2396         struct inode *inode = mapping->host;
2397         struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
2398         struct TCP_Server_Info *server;
2399         bool done = false, scanned = false, range_whole = false;
2400         pgoff_t end, index;
2401         struct cifs_writedata *wdata;
2402         struct cifsFileInfo *cfile = NULL;
2403         int rc = 0;
2404         int saved_rc = 0;
2405         unsigned int xid;
2406
2407         /*
2408          * If wsize is smaller than the page cache size, default to writing
2409          * one page at a time via cifs_writepage
2410          */
2411         if (cifs_sb->ctx->wsize < PAGE_SIZE)
2412                 return generic_writepages(mapping, wbc);
2413
2414         xid = get_xid();
2415         if (wbc->range_cyclic) {
2416                 index = mapping->writeback_index; /* Start from prev offset */
2417                 end = -1;
2418         } else {
2419                 index = wbc->range_start >> PAGE_SHIFT;
2420                 end = wbc->range_end >> PAGE_SHIFT;
2421                 if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
2422                         range_whole = true;
2423                 scanned = true;
2424         }
2425         server = cifs_pick_channel(cifs_sb_master_tcon(cifs_sb)->ses);
2426
2427 retry:
2428         while (!done && index <= end) {
2429                 unsigned int i, nr_pages, found_pages, wsize;
2430                 pgoff_t next = 0, tofind, saved_index = index;
2431                 struct cifs_credits credits_on_stack;
2432                 struct cifs_credits *credits = &credits_on_stack;
2433                 int get_file_rc = 0;
2434
2435                 if (cfile)
2436                         cifsFileInfo_put(cfile);
2437
2438                 rc = cifs_get_writable_file(CIFS_I(inode), FIND_WR_ANY, &cfile);
2439
2440                 /* in case of an error store it to return later */
2441                 if (rc)
2442                         get_file_rc = rc;
2443
2444                 rc = server->ops->wait_mtu_credits(server, cifs_sb->ctx->wsize,
2445                                                    &wsize, credits);
2446                 if (rc != 0) {
2447                         done = true;
2448                         break;
2449                 }
2450
2451                 tofind = min((wsize / PAGE_SIZE) - 1, end - index) + 1;
2452
2453                 wdata = wdata_alloc_and_fillpages(tofind, mapping, end, &index,
2454                                                   &found_pages);
2455                 if (!wdata) {
2456                         rc = -ENOMEM;
2457                         done = true;
2458                         add_credits_and_wake_if(server, credits, 0);
2459                         break;
2460                 }
2461
2462                 if (found_pages == 0) {
2463                         kref_put(&wdata->refcount, cifs_writedata_release);
2464                         add_credits_and_wake_if(server, credits, 0);
2465                         break;
2466                 }
2467
2468                 nr_pages = wdata_prepare_pages(wdata, found_pages, mapping, wbc,
2469                                                end, &index, &next, &done);
2470
2471                 /* nothing to write? */
2472                 if (nr_pages == 0) {
2473                         kref_put(&wdata->refcount, cifs_writedata_release);
2474                         add_credits_and_wake_if(server, credits, 0);
2475                         continue;
2476                 }
2477
2478                 wdata->credits = credits_on_stack;
2479                 wdata->cfile = cfile;
2480                 wdata->server = server;
2481                 cfile = NULL;
2482
2483                 if (!wdata->cfile) {
2484                         cifs_dbg(VFS, "No writable handle in writepages rc=%d\n",
2485                                  get_file_rc);
2486                         if (is_retryable_error(get_file_rc))
2487                                 rc = get_file_rc;
2488                         else
2489                                 rc = -EBADF;
2490                 } else
2491                         rc = wdata_send_pages(wdata, nr_pages, mapping, wbc);
2492
2493                 for (i = 0; i < nr_pages; ++i)
2494                         unlock_page(wdata->pages[i]);
2495
2496                 /* send failure -- clean up the mess */
2497                 if (rc != 0) {
2498                         add_credits_and_wake_if(server, &wdata->credits, 0);
2499                         for (i = 0; i < nr_pages; ++i) {
2500                                 if (is_retryable_error(rc))
2501                                         redirty_page_for_writepage(wbc,
2502                                                            wdata->pages[i]);
2503                                 else
2504                                         SetPageError(wdata->pages[i]);
2505                                 end_page_writeback(wdata->pages[i]);
2506                                 put_page(wdata->pages[i]);
2507                         }
2508                         if (!is_retryable_error(rc))
2509                                 mapping_set_error(mapping, rc);
2510                 }
2511                 kref_put(&wdata->refcount, cifs_writedata_release);
2512
2513                 if (wbc->sync_mode == WB_SYNC_ALL && rc == -EAGAIN) {
2514                         index = saved_index;
2515                         continue;
2516                 }
2517
2518                 /* Return immediately if we received a signal during writing */
2519                 if (is_interrupt_error(rc)) {
2520                         done = true;
2521                         break;
2522                 }
2523
2524                 if (rc != 0 && saved_rc == 0)
2525                         saved_rc = rc;
2526
2527                 wbc->nr_to_write -= nr_pages;
2528                 if (wbc->nr_to_write <= 0)
2529                         done = true;
2530
2531                 index = next;
2532         }
2533
2534         if (!scanned && !done) {
2535                 /*
2536                  * We hit the last page and there is more work to be done: wrap
2537                  * back to the start of the file
2538                  */
2539                 scanned = true;
2540                 index = 0;
2541                 goto retry;
2542         }
2543
2544         if (saved_rc != 0)
2545                 rc = saved_rc;
2546
2547         if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0))
2548                 mapping->writeback_index = index;
2549
2550         if (cfile)
2551                 cifsFileInfo_put(cfile);
2552         free_xid(xid);
2553         /* Indication to update ctime and mtime as close is deferred */
2554         set_bit(CIFS_INO_MODIFIED_ATTR, &CIFS_I(inode)->flags);
2555         return rc;
2556 }
2557
2558 static int
2559 cifs_writepage_locked(struct page *page, struct writeback_control *wbc)
2560 {
2561         int rc;
2562         unsigned int xid;
2563
2564         xid = get_xid();
2565 /* BB add check for wbc flags */
2566         get_page(page);
2567         if (!PageUptodate(page))
2568                 cifs_dbg(FYI, "ppw - page not up to date\n");
2569
2570         /*
2571          * Set the "writeback" flag, and clear "dirty" in the radix tree.
2572          *
2573          * A writepage() implementation always needs to do either this,
2574          * or re-dirty the page with "redirty_page_for_writepage()" in
2575          * the case of a failure.
2576          *
2577          * Just unlocking the page will cause the radix tree tag-bits
2578          * to fail to update with the state of the page correctly.
2579          */
2580         set_page_writeback(page);
2581 retry_write:
2582         rc = cifs_partialpagewrite(page, 0, PAGE_SIZE);
2583         if (is_retryable_error(rc)) {
2584                 if (wbc->sync_mode == WB_SYNC_ALL && rc == -EAGAIN)
2585                         goto retry_write;
2586                 redirty_page_for_writepage(wbc, page);
2587         } else if (rc != 0) {
2588                 SetPageError(page);
2589                 mapping_set_error(page->mapping, rc);
2590         } else {
2591                 SetPageUptodate(page);
2592         }
2593         end_page_writeback(page);
2594         put_page(page);
2595         free_xid(xid);
2596         return rc;
2597 }
2598
2599 static int cifs_writepage(struct page *page, struct writeback_control *wbc)
2600 {
2601         int rc = cifs_writepage_locked(page, wbc);
2602         unlock_page(page);
2603         return rc;
2604 }
2605
2606 static int cifs_write_end(struct file *file, struct address_space *mapping,
2607                         loff_t pos, unsigned len, unsigned copied,
2608                         struct page *page, void *fsdata)
2609 {
2610         int rc;
2611         struct inode *inode = mapping->host;
2612         struct cifsFileInfo *cfile = file->private_data;
2613         struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
2614         __u32 pid;
2615
2616         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
2617                 pid = cfile->pid;
2618         else
2619                 pid = current->tgid;
2620
2621         cifs_dbg(FYI, "write_end for page %p from pos %lld with %d bytes\n",
2622                  page, pos, copied);
2623
2624         if (PageChecked(page)) {
2625                 if (copied == len)
2626                         SetPageUptodate(page);
2627                 ClearPageChecked(page);
2628         } else if (!PageUptodate(page) && copied == PAGE_SIZE)
2629                 SetPageUptodate(page);
2630
2631         if (!PageUptodate(page)) {
2632                 char *page_data;
2633                 unsigned offset = pos & (PAGE_SIZE - 1);
2634                 unsigned int xid;
2635
2636                 xid = get_xid();
2637                 /* this is probably better than directly calling
2638                    partialpage_write since in this function the file handle is
2639                    known which we might as well leverage */
2640                 /* BB check if anything else missing out of ppw
2641                    such as updating last write time */
2642                 page_data = kmap(page);
2643                 rc = cifs_write(cfile, pid, page_data + offset, copied, &pos);
2644                 /* if (rc < 0) should we set writebehind rc? */
2645                 kunmap(page);
2646
2647                 free_xid(xid);
2648         } else {
2649                 rc = copied;
2650                 pos += copied;
2651                 set_page_dirty(page);
2652         }
2653
2654         if (rc > 0) {
2655                 spin_lock(&inode->i_lock);
2656                 if (pos > inode->i_size) {
2657                         i_size_write(inode, pos);
2658                         inode->i_blocks = (512 - 1 + pos) >> 9;
2659                 }
2660                 spin_unlock(&inode->i_lock);
2661         }
2662
2663         unlock_page(page);
2664         put_page(page);
2665         /* Indication to update ctime and mtime as close is deferred */
2666         set_bit(CIFS_INO_MODIFIED_ATTR, &CIFS_I(inode)->flags);
2667
2668         return rc;
2669 }
2670
2671 int cifs_strict_fsync(struct file *file, loff_t start, loff_t end,
2672                       int datasync)
2673 {
2674         unsigned int xid;
2675         int rc = 0;
2676         struct cifs_tcon *tcon;
2677         struct TCP_Server_Info *server;
2678         struct cifsFileInfo *smbfile = file->private_data;
2679         struct inode *inode = file_inode(file);
2680         struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
2681
2682         rc = file_write_and_wait_range(file, start, end);
2683         if (rc) {
2684                 trace_cifs_fsync_err(inode->i_ino, rc);
2685                 return rc;
2686         }
2687
2688         xid = get_xid();
2689
2690         cifs_dbg(FYI, "Sync file - name: %pD datasync: 0x%x\n",
2691                  file, datasync);
2692
2693         if (!CIFS_CACHE_READ(CIFS_I(inode))) {
2694                 rc = cifs_zap_mapping(inode);
2695                 if (rc) {
2696                         cifs_dbg(FYI, "rc: %d during invalidate phase\n", rc);
2697                         rc = 0; /* don't care about it in fsync */
2698                 }
2699         }
2700
2701         tcon = tlink_tcon(smbfile->tlink);
2702         if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) {
2703                 server = tcon->ses->server;
2704                 if (server->ops->flush)
2705                         rc = server->ops->flush(xid, tcon, &smbfile->fid);
2706                 else
2707                         rc = -ENOSYS;
2708         }
2709
2710         free_xid(xid);
2711         return rc;
2712 }
2713
2714 int cifs_fsync(struct file *file, loff_t start, loff_t end, int datasync)
2715 {
2716         unsigned int xid;
2717         int rc = 0;
2718         struct cifs_tcon *tcon;
2719         struct TCP_Server_Info *server;
2720         struct cifsFileInfo *smbfile = file->private_data;
2721         struct cifs_sb_info *cifs_sb = CIFS_FILE_SB(file);
2722
2723         rc = file_write_and_wait_range(file, start, end);
2724         if (rc) {
2725                 trace_cifs_fsync_err(file_inode(file)->i_ino, rc);
2726                 return rc;
2727         }
2728
2729         xid = get_xid();
2730
2731         cifs_dbg(FYI, "Sync file - name: %pD datasync: 0x%x\n",
2732                  file, datasync);
2733
2734         tcon = tlink_tcon(smbfile->tlink);
2735         if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) {
2736                 server = tcon->ses->server;
2737                 if (server->ops->flush)
2738                         rc = server->ops->flush(xid, tcon, &smbfile->fid);
2739                 else
2740                         rc = -ENOSYS;
2741         }
2742
2743         free_xid(xid);
2744         return rc;
2745 }
2746
2747 /*
2748  * As file closes, flush all cached write data for this inode checking
2749  * for write behind errors.
2750  */
2751 int cifs_flush(struct file *file, fl_owner_t id)
2752 {
2753         struct inode *inode = file_inode(file);
2754         int rc = 0;
2755
2756         if (file->f_mode & FMODE_WRITE)
2757                 rc = filemap_write_and_wait(inode->i_mapping);
2758
2759         cifs_dbg(FYI, "Flush inode %p file %p rc %d\n", inode, file, rc);
2760         if (rc)
2761                 trace_cifs_flush_err(inode->i_ino, rc);
2762         return rc;
2763 }
2764
2765 static int
2766 cifs_write_allocate_pages(struct page **pages, unsigned long num_pages)
2767 {
2768         int rc = 0;
2769         unsigned long i;
2770
2771         for (i = 0; i < num_pages; i++) {
2772                 pages[i] = alloc_page(GFP_KERNEL|__GFP_HIGHMEM);
2773                 if (!pages[i]) {
2774                         /*
2775                          * save number of pages we have already allocated and
2776                          * return with ENOMEM error
2777                          */
2778                         num_pages = i;
2779                         rc = -ENOMEM;
2780                         break;
2781                 }
2782         }
2783
2784         if (rc) {
2785                 for (i = 0; i < num_pages; i++)
2786                         put_page(pages[i]);
2787         }
2788         return rc;
2789 }
2790
2791 static inline
2792 size_t get_numpages(const size_t wsize, const size_t len, size_t *cur_len)
2793 {
2794         size_t num_pages;
2795         size_t clen;
2796
2797         clen = min_t(const size_t, len, wsize);
2798         num_pages = DIV_ROUND_UP(clen, PAGE_SIZE);
2799
2800         if (cur_len)
2801                 *cur_len = clen;
2802
2803         return num_pages;
2804 }
2805
2806 static void
2807 cifs_uncached_writedata_release(struct kref *refcount)
2808 {
2809         int i;
2810         struct cifs_writedata *wdata = container_of(refcount,
2811                                         struct cifs_writedata, refcount);
2812
2813         kref_put(&wdata->ctx->refcount, cifs_aio_ctx_release);
2814         for (i = 0; i < wdata->nr_pages; i++)
2815                 put_page(wdata->pages[i]);
2816         cifs_writedata_release(refcount);
2817 }
2818
2819 static void collect_uncached_write_data(struct cifs_aio_ctx *ctx);
2820
2821 static void
2822 cifs_uncached_writev_complete(struct work_struct *work)
2823 {
2824         struct cifs_writedata *wdata = container_of(work,
2825                                         struct cifs_writedata, work);
2826         struct inode *inode = d_inode(wdata->cfile->dentry);
2827         struct cifsInodeInfo *cifsi = CIFS_I(inode);
2828
2829         spin_lock(&inode->i_lock);
2830         cifs_update_eof(cifsi, wdata->offset, wdata->bytes);
2831         if (cifsi->server_eof > inode->i_size)
2832                 i_size_write(inode, cifsi->server_eof);
2833         spin_unlock(&inode->i_lock);
2834
2835         complete(&wdata->done);
2836         collect_uncached_write_data(wdata->ctx);
2837         /* the below call can possibly free the last ref to aio ctx */
2838         kref_put(&wdata->refcount, cifs_uncached_writedata_release);
2839 }
2840
2841 static int
2842 wdata_fill_from_iovec(struct cifs_writedata *wdata, struct iov_iter *from,
2843                       size_t *len, unsigned long *num_pages)
2844 {
2845         size_t save_len, copied, bytes, cur_len = *len;
2846         unsigned long i, nr_pages = *num_pages;
2847
2848         save_len = cur_len;
2849         for (i = 0; i < nr_pages; i++) {
2850                 bytes = min_t(const size_t, cur_len, PAGE_SIZE);
2851                 copied = copy_page_from_iter(wdata->pages[i], 0, bytes, from);
2852                 cur_len -= copied;
2853                 /*
2854                  * If we didn't copy as much as we expected, then that
2855                  * may mean we trod into an unmapped area. Stop copying
2856                  * at that point. On the next pass through the big
2857                  * loop, we'll likely end up getting a zero-length
2858                  * write and bailing out of it.
2859                  */
2860                 if (copied < bytes)
2861                         break;
2862         }
2863         cur_len = save_len - cur_len;
2864         *len = cur_len;
2865
2866         /*
2867          * If we have no data to send, then that probably means that
2868          * the copy above failed altogether. That's most likely because
2869          * the address in the iovec was bogus. Return -EFAULT and let
2870          * the caller free anything we allocated and bail out.
2871          */
2872         if (!cur_len)
2873                 return -EFAULT;
2874
2875         /*
2876          * i + 1 now represents the number of pages we actually used in
2877          * the copy phase above.
2878          */
2879         *num_pages = i + 1;
2880         return 0;
2881 }
2882
2883 static int
2884 cifs_resend_wdata(struct cifs_writedata *wdata, struct list_head *wdata_list,
2885         struct cifs_aio_ctx *ctx)
2886 {
2887         unsigned int wsize;
2888         struct cifs_credits credits;
2889         int rc;
2890         struct TCP_Server_Info *server = wdata->server;
2891
2892         do {
2893                 if (wdata->cfile->invalidHandle) {
2894                         rc = cifs_reopen_file(wdata->cfile, false);
2895                         if (rc == -EAGAIN)
2896                                 continue;
2897                         else if (rc)
2898                                 break;
2899                 }
2900
2901
2902                 /*
2903                  * Wait for credits to resend this wdata.
2904                  * Note: we are attempting to resend the whole wdata not in
2905                  * segments
2906                  */
2907                 do {
2908                         rc = server->ops->wait_mtu_credits(server, wdata->bytes,
2909                                                 &wsize, &credits);
2910                         if (rc)
2911                                 goto fail;
2912
2913                         if (wsize < wdata->bytes) {
2914                                 add_credits_and_wake_if(server, &credits, 0);
2915                                 msleep(1000);
2916                         }
2917                 } while (wsize < wdata->bytes);
2918                 wdata->credits = credits;
2919
2920                 rc = adjust_credits(server, &wdata->credits, wdata->bytes);
2921
2922                 if (!rc) {
2923                         if (wdata->cfile->invalidHandle)
2924                                 rc = -EAGAIN;
2925                         else {
2926 #ifdef CONFIG_CIFS_SMB_DIRECT
2927                                 if (wdata->mr) {
2928                                         wdata->mr->need_invalidate = true;
2929                                         smbd_deregister_mr(wdata->mr);
2930                                         wdata->mr = NULL;
2931                                 }
2932 #endif
2933                                 rc = server->ops->async_writev(wdata,
2934                                         cifs_uncached_writedata_release);
2935                         }
2936                 }
2937
2938                 /* If the write was successfully sent, we are done */
2939                 if (!rc) {
2940                         list_add_tail(&wdata->list, wdata_list);
2941                         return 0;
2942                 }
2943
2944                 /* Roll back credits and retry if needed */
2945                 add_credits_and_wake_if(server, &wdata->credits, 0);
2946         } while (rc == -EAGAIN);
2947
2948 fail:
2949         kref_put(&wdata->refcount, cifs_uncached_writedata_release);
2950         return rc;
2951 }
2952
2953 static int
2954 cifs_write_from_iter(loff_t offset, size_t len, struct iov_iter *from,
2955                      struct cifsFileInfo *open_file,
2956                      struct cifs_sb_info *cifs_sb, struct list_head *wdata_list,
2957                      struct cifs_aio_ctx *ctx)
2958 {
2959         int rc = 0;
2960         size_t cur_len;
2961         unsigned long nr_pages, num_pages, i;
2962         struct cifs_writedata *wdata;
2963         struct iov_iter saved_from = *from;
2964         loff_t saved_offset = offset;
2965         pid_t pid;
2966         struct TCP_Server_Info *server;
2967         struct page **pagevec;
2968         size_t start;
2969         unsigned int xid;
2970
2971         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
2972                 pid = open_file->pid;
2973         else
2974                 pid = current->tgid;
2975
2976         server = cifs_pick_channel(tlink_tcon(open_file->tlink)->ses);
2977         xid = get_xid();
2978
2979         do {
2980                 unsigned int wsize;
2981                 struct cifs_credits credits_on_stack;
2982                 struct cifs_credits *credits = &credits_on_stack;
2983
2984                 if (open_file->invalidHandle) {
2985                         rc = cifs_reopen_file(open_file, false);
2986                         if (rc == -EAGAIN)
2987                                 continue;
2988                         else if (rc)
2989                                 break;
2990                 }
2991
2992                 rc = server->ops->wait_mtu_credits(server, cifs_sb->ctx->wsize,
2993                                                    &wsize, credits);
2994                 if (rc)
2995                         break;
2996
2997                 cur_len = min_t(const size_t, len, wsize);
2998
2999                 if (ctx->direct_io) {
3000                         ssize_t result;
3001
3002                         result = iov_iter_get_pages_alloc(
3003                                 from, &pagevec, cur_len, &start);
3004                         if (result < 0) {
3005                                 cifs_dbg(VFS,
3006                                          "direct_writev couldn't get user pages (rc=%zd) iter type %d iov_offset %zd count %zd\n",
3007                                          result, iov_iter_type(from),
3008                                          from->iov_offset, from->count);
3009                                 dump_stack();
3010
3011                                 rc = result;
3012                                 add_credits_and_wake_if(server, credits, 0);
3013                                 break;
3014                         }
3015                         cur_len = (size_t)result;
3016                         iov_iter_advance(from, cur_len);
3017
3018                         nr_pages =
3019                                 (cur_len + start + PAGE_SIZE - 1) / PAGE_SIZE;
3020
3021                         wdata = cifs_writedata_direct_alloc(pagevec,
3022                                              cifs_uncached_writev_complete);
3023                         if (!wdata) {
3024                                 rc = -ENOMEM;
3025                                 add_credits_and_wake_if(server, credits, 0);
3026                                 break;
3027                         }
3028
3029
3030                         wdata->page_offset = start;
3031                         wdata->tailsz =
3032                                 nr_pages > 1 ?
3033                                         cur_len - (PAGE_SIZE - start) -
3034                                         (nr_pages - 2) * PAGE_SIZE :
3035                                         cur_len;
3036                 } else {
3037                         nr_pages = get_numpages(wsize, len, &cur_len);
3038                         wdata = cifs_writedata_alloc(nr_pages,
3039                                              cifs_uncached_writev_complete);
3040                         if (!wdata) {
3041                                 rc = -ENOMEM;
3042                                 add_credits_and_wake_if(server, credits, 0);
3043                                 break;
3044                         }
3045
3046                         rc = cifs_write_allocate_pages(wdata->pages, nr_pages);
3047                         if (rc) {
3048                                 kvfree(wdata->pages);
3049                                 kfree(wdata);
3050                                 add_credits_and_wake_if(server, credits, 0);
3051                                 break;
3052                         }
3053
3054                         num_pages = nr_pages;
3055                         rc = wdata_fill_from_iovec(
3056                                 wdata, from, &cur_len, &num_pages);
3057                         if (rc) {
3058                                 for (i = 0; i < nr_pages; i++)
3059                                         put_page(wdata->pages[i]);
3060                                 kvfree(wdata->pages);
3061                                 kfree(wdata);
3062                                 add_credits_and_wake_if(server, credits, 0);
3063                                 break;
3064                         }
3065
3066                         /*
3067                          * Bring nr_pages down to the number of pages we
3068                          * actually used, and free any pages that we didn't use.
3069                          */
3070                         for ( ; nr_pages > num_pages; nr_pages--)
3071                                 put_page(wdata->pages[nr_pages - 1]);
3072
3073                         wdata->tailsz = cur_len - ((nr_pages - 1) * PAGE_SIZE);
3074                 }
3075
3076                 wdata->sync_mode = WB_SYNC_ALL;
3077                 wdata->nr_pages = nr_pages;
3078                 wdata->offset = (__u64)offset;
3079                 wdata->cfile = cifsFileInfo_get(open_file);
3080                 wdata->server = server;
3081                 wdata->pid = pid;
3082                 wdata->bytes = cur_len;
3083                 wdata->pagesz = PAGE_SIZE;
3084                 wdata->credits = credits_on_stack;
3085                 wdata->ctx = ctx;
3086                 kref_get(&ctx->refcount);
3087
3088                 rc = adjust_credits(server, &wdata->credits, wdata->bytes);
3089
3090                 if (!rc) {
3091                         if (wdata->cfile->invalidHandle)
3092                                 rc = -EAGAIN;
3093                         else
3094                                 rc = server->ops->async_writev(wdata,
3095                                         cifs_uncached_writedata_release);
3096                 }
3097
3098                 if (rc) {
3099                         add_credits_and_wake_if(server, &wdata->credits, 0);
3100                         kref_put(&wdata->refcount,
3101                                  cifs_uncached_writedata_release);
3102                         if (rc == -EAGAIN) {
3103                                 *from = saved_from;
3104                                 iov_iter_advance(from, offset - saved_offset);
3105                                 continue;
3106                         }
3107                         break;
3108                 }
3109
3110                 list_add_tail(&wdata->list, wdata_list);
3111                 offset += cur_len;
3112                 len -= cur_len;
3113         } while (len > 0);
3114
3115         free_xid(xid);
3116         return rc;
3117 }
3118
3119 static void collect_uncached_write_data(struct cifs_aio_ctx *ctx)
3120 {
3121         struct cifs_writedata *wdata, *tmp;
3122         struct cifs_tcon *tcon;
3123         struct cifs_sb_info *cifs_sb;
3124         struct dentry *dentry = ctx->cfile->dentry;
3125         int rc;
3126
3127         tcon = tlink_tcon(ctx->cfile->tlink);
3128         cifs_sb = CIFS_SB(dentry->d_sb);
3129
3130         mutex_lock(&ctx->aio_mutex);
3131
3132         if (list_empty(&ctx->list)) {
3133                 mutex_unlock(&ctx->aio_mutex);
3134                 return;
3135         }
3136
3137         rc = ctx->rc;
3138         /*
3139          * Wait for and collect replies for any successful sends in order of
3140          * increasing offset. Once an error is hit, then return without waiting
3141          * for any more replies.
3142          */
3143 restart_loop:
3144         list_for_each_entry_safe(wdata, tmp, &ctx->list, list) {
3145                 if (!rc) {
3146                         if (!try_wait_for_completion(&wdata->done)) {
3147                                 mutex_unlock(&ctx->aio_mutex);
3148                                 return;
3149                         }
3150
3151                         if (wdata->result)
3152                                 rc = wdata->result;
3153                         else
3154                                 ctx->total_len += wdata->bytes;
3155
3156                         /* resend call if it's a retryable error */
3157                         if (rc == -EAGAIN) {
3158                                 struct list_head tmp_list;
3159                                 struct iov_iter tmp_from = ctx->iter;
3160
3161                                 INIT_LIST_HEAD(&tmp_list);
3162                                 list_del_init(&wdata->list);
3163
3164                                 if (ctx->direct_io)
3165                                         rc = cifs_resend_wdata(
3166                                                 wdata, &tmp_list, ctx);
3167                                 else {
3168                                         iov_iter_advance(&tmp_from,
3169                                                  wdata->offset - ctx->pos);
3170
3171                                         rc = cifs_write_from_iter(wdata->offset,
3172                                                 wdata->bytes, &tmp_from,
3173                                                 ctx->cfile, cifs_sb, &tmp_list,
3174                                                 ctx);
3175
3176                                         kref_put(&wdata->refcount,
3177                                                 cifs_uncached_writedata_release);
3178                                 }
3179
3180                                 list_splice(&tmp_list, &ctx->list);
3181                                 goto restart_loop;
3182                         }
3183                 }
3184                 list_del_init(&wdata->list);
3185                 kref_put(&wdata->refcount, cifs_uncached_writedata_release);
3186         }
3187
3188         cifs_stats_bytes_written(tcon, ctx->total_len);
3189         set_bit(CIFS_INO_INVALID_MAPPING, &CIFS_I(dentry->d_inode)->flags);
3190
3191         ctx->rc = (rc == 0) ? ctx->total_len : rc;
3192
3193         mutex_unlock(&ctx->aio_mutex);
3194
3195         if (ctx->iocb && ctx->iocb->ki_complete)
3196                 ctx->iocb->ki_complete(ctx->iocb, ctx->rc, 0);
3197         else
3198                 complete(&ctx->done);
3199 }
3200
3201 static ssize_t __cifs_writev(
3202         struct kiocb *iocb, struct iov_iter *from, bool direct)
3203 {
3204         struct file *file = iocb->ki_filp;
3205         ssize_t total_written = 0;
3206         struct cifsFileInfo *cfile;
3207         struct cifs_tcon *tcon;
3208         struct cifs_sb_info *cifs_sb;
3209         struct cifs_aio_ctx *ctx;
3210         struct iov_iter saved_from = *from;
3211         size_t len = iov_iter_count(from);
3212         int rc;
3213
3214         /*
3215          * iov_iter_get_pages_alloc doesn't work with ITER_KVEC.
3216          * In this case, fall back to non-direct write function.
3217          * this could be improved by getting pages directly in ITER_KVEC
3218          */
3219         if (direct && iov_iter_is_kvec(from)) {
3220                 cifs_dbg(FYI, "use non-direct cifs_writev for kvec I/O\n");
3221                 direct = false;
3222         }
3223
3224         rc = generic_write_checks(iocb, from);
3225         if (rc <= 0)
3226                 return rc;
3227
3228         cifs_sb = CIFS_FILE_SB(file);
3229         cfile = file->private_data;
3230         tcon = tlink_tcon(cfile->tlink);
3231
3232         if (!tcon->ses->server->ops->async_writev)
3233                 return -ENOSYS;
3234
3235         ctx = cifs_aio_ctx_alloc();
3236         if (!ctx)
3237                 return -ENOMEM;
3238
3239         ctx->cfile = cifsFileInfo_get(cfile);
3240
3241         if (!is_sync_kiocb(iocb))
3242                 ctx->iocb = iocb;
3243
3244         ctx->pos = iocb->ki_pos;
3245
3246         if (direct) {
3247                 ctx->direct_io = true;
3248                 ctx->iter = *from;
3249                 ctx->len = len;
3250         } else {
3251                 rc = setup_aio_ctx_iter(ctx, from, WRITE);
3252                 if (rc) {
3253                         kref_put(&ctx->refcount, cifs_aio_ctx_release);
3254                         return rc;
3255                 }
3256         }
3257
3258         /* grab a lock here due to read response handlers can access ctx */
3259         mutex_lock(&ctx->aio_mutex);
3260
3261         rc = cifs_write_from_iter(iocb->ki_pos, ctx->len, &saved_from,
3262                                   cfile, cifs_sb, &ctx->list, ctx);
3263
3264         /*
3265          * If at least one write was successfully sent, then discard any rc
3266          * value from the later writes. If the other write succeeds, then
3267          * we'll end up returning whatever was written. If it fails, then
3268          * we'll get a new rc value from that.
3269          */
3270         if (!list_empty(&ctx->list))
3271                 rc = 0;
3272
3273         mutex_unlock(&ctx->aio_mutex);
3274
3275         if (rc) {
3276                 kref_put(&ctx->refcount, cifs_aio_ctx_release);
3277                 return rc;
3278         }
3279
3280         if (!is_sync_kiocb(iocb)) {
3281                 kref_put(&ctx->refcount, cifs_aio_ctx_release);
3282                 return -EIOCBQUEUED;
3283         }
3284
3285         rc = wait_for_completion_killable(&ctx->done);
3286         if (rc) {
3287                 mutex_lock(&ctx->aio_mutex);
3288                 ctx->rc = rc = -EINTR;
3289                 total_written = ctx->total_len;
3290                 mutex_unlock(&ctx->aio_mutex);
3291         } else {
3292                 rc = ctx->rc;
3293                 total_written = ctx->total_len;
3294         }
3295
3296         kref_put(&ctx->refcount, cifs_aio_ctx_release);
3297
3298         if (unlikely(!total_written))
3299                 return rc;
3300
3301         iocb->ki_pos += total_written;
3302         return total_written;
3303 }
3304
3305 ssize_t cifs_direct_writev(struct kiocb *iocb, struct iov_iter *from)
3306 {
3307         return __cifs_writev(iocb, from, true);
3308 }
3309
3310 ssize_t cifs_user_writev(struct kiocb *iocb, struct iov_iter *from)
3311 {
3312         return __cifs_writev(iocb, from, false);
3313 }
3314
3315 static ssize_t
3316 cifs_writev(struct kiocb *iocb, struct iov_iter *from)
3317 {
3318         struct file *file = iocb->ki_filp;
3319         struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
3320         struct inode *inode = file->f_mapping->host;
3321         struct cifsInodeInfo *cinode = CIFS_I(inode);
3322         struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
3323         ssize_t rc;
3324
3325         inode_lock(inode);
3326         /*
3327          * We need to hold the sem to be sure nobody modifies lock list
3328          * with a brlock that prevents writing.
3329          */
3330         down_read(&cinode->lock_sem);
3331
3332         rc = generic_write_checks(iocb, from);
3333         if (rc <= 0)
3334                 goto out;
3335
3336         if (!cifs_find_lock_conflict(cfile, iocb->ki_pos, iov_iter_count(from),
3337                                      server->vals->exclusive_lock_type, 0,
3338                                      NULL, CIFS_WRITE_OP))
3339                 rc = __generic_file_write_iter(iocb, from);
3340         else
3341                 rc = -EACCES;
3342 out:
3343         up_read(&cinode->lock_sem);
3344         inode_unlock(inode);
3345
3346         if (rc > 0)
3347                 rc = generic_write_sync(iocb, rc);
3348         return rc;
3349 }
3350
3351 ssize_t
3352 cifs_strict_writev(struct kiocb *iocb, struct iov_iter *from)
3353 {
3354         struct inode *inode = file_inode(iocb->ki_filp);
3355         struct cifsInodeInfo *cinode = CIFS_I(inode);
3356         struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
3357         struct cifsFileInfo *cfile = (struct cifsFileInfo *)
3358                                                 iocb->ki_filp->private_data;
3359         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
3360         ssize_t written;
3361
3362         written = cifs_get_writer(cinode);
3363         if (written)
3364                 return written;
3365
3366         if (CIFS_CACHE_WRITE(cinode)) {
3367                 if (cap_unix(tcon->ses) &&
3368                 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability))
3369                   && ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0)) {
3370                         written = generic_file_write_iter(iocb, from);
3371                         goto out;
3372                 }
3373                 written = cifs_writev(iocb, from);
3374                 goto out;
3375         }
3376         /*
3377          * For non-oplocked files in strict cache mode we need to write the data
3378          * to the server exactly from the pos to pos+len-1 rather than flush all
3379          * affected pages because it may cause a error with mandatory locks on
3380          * these pages but not on the region from pos to ppos+len-1.
3381          */
3382         written = cifs_user_writev(iocb, from);
3383         if (CIFS_CACHE_READ(cinode)) {
3384                 /*
3385                  * We have read level caching and we have just sent a write
3386                  * request to the server thus making data in the cache stale.
3387                  * Zap the cache and set oplock/lease level to NONE to avoid
3388                  * reading stale data from the cache. All subsequent read
3389                  * operations will read new data from the server.
3390                  */
3391                 cifs_zap_mapping(inode);
3392                 cifs_dbg(FYI, "Set Oplock/Lease to NONE for inode=%p after write\n",
3393                          inode);
3394                 cinode->oplock = 0;
3395         }
3396 out:
3397         cifs_put_writer(cinode);
3398         return written;
3399 }
3400
3401 static struct cifs_readdata *
3402 cifs_readdata_direct_alloc(struct page **pages, work_func_t complete)
3403 {
3404         struct cifs_readdata *rdata;
3405
3406         rdata = kzalloc(sizeof(*rdata), GFP_KERNEL);
3407         if (rdata != NULL) {
3408                 rdata->pages = pages;
3409                 kref_init(&rdata->refcount);
3410                 INIT_LIST_HEAD(&rdata->list);
3411                 init_completion(&rdata->done);
3412                 INIT_WORK(&rdata->work, complete);
3413         }
3414
3415         return rdata;
3416 }
3417
3418 static struct cifs_readdata *
3419 cifs_readdata_alloc(unsigned int nr_pages, work_func_t complete)
3420 {
3421         struct page **pages =
3422                 kcalloc(nr_pages, sizeof(struct page *), GFP_KERNEL);
3423         struct cifs_readdata *ret = NULL;
3424
3425         if (pages) {
3426                 ret = cifs_readdata_direct_alloc(pages, complete);
3427                 if (!ret)
3428                         kfree(pages);
3429         }
3430
3431         return ret;
3432 }
3433
3434 void
3435 cifs_readdata_release(struct kref *refcount)
3436 {
3437         struct cifs_readdata *rdata = container_of(refcount,
3438                                         struct cifs_readdata, refcount);
3439 #ifdef CONFIG_CIFS_SMB_DIRECT
3440         if (rdata->mr) {
3441                 smbd_deregister_mr(rdata->mr);
3442                 rdata->mr = NULL;
3443         }
3444 #endif
3445         if (rdata->cfile)
3446                 cifsFileInfo_put(rdata->cfile);
3447
3448         kvfree(rdata->pages);
3449         kfree(rdata);
3450 }
3451
3452 static int
3453 cifs_read_allocate_pages(struct cifs_readdata *rdata, unsigned int nr_pages)
3454 {
3455         int rc = 0;
3456         struct page *page;
3457         unsigned int i;
3458
3459         for (i = 0; i < nr_pages; i++) {
3460                 page = alloc_page(GFP_KERNEL|__GFP_HIGHMEM);
3461                 if (!page) {
3462                         rc = -ENOMEM;
3463                         break;
3464                 }
3465                 rdata->pages[i] = page;
3466         }
3467
3468         if (rc) {
3469                 unsigned int nr_page_failed = i;
3470
3471                 for (i = 0; i < nr_page_failed; i++) {
3472                         put_page(rdata->pages[i]);
3473                         rdata->pages[i] = NULL;
3474                 }
3475         }
3476         return rc;
3477 }
3478
3479 static void
3480 cifs_uncached_readdata_release(struct kref *refcount)
3481 {
3482         struct cifs_readdata *rdata = container_of(refcount,
3483                                         struct cifs_readdata, refcount);
3484         unsigned int i;
3485
3486         kref_put(&rdata->ctx->refcount, cifs_aio_ctx_release);
3487         for (i = 0; i < rdata->nr_pages; i++) {
3488                 put_page(rdata->pages[i]);
3489         }
3490         cifs_readdata_release(refcount);
3491 }
3492
3493 /**
3494  * cifs_readdata_to_iov - copy data from pages in response to an iovec
3495  * @rdata:      the readdata response with list of pages holding data
3496  * @iter:       destination for our data
3497  *
3498  * This function copies data from a list of pages in a readdata response into
3499  * an array of iovecs. It will first calculate where the data should go
3500  * based on the info in the readdata and then copy the data into that spot.
3501  */
3502 static int
3503 cifs_readdata_to_iov(struct cifs_readdata *rdata, struct iov_iter *iter)
3504 {
3505         size_t remaining = rdata->got_bytes;
3506         unsigned int i;
3507
3508         for (i = 0; i < rdata->nr_pages; i++) {
3509                 struct page *page = rdata->pages[i];
3510                 size_t copy = min_t(size_t, remaining, PAGE_SIZE);
3511                 size_t written;
3512
3513                 if (unlikely(iov_iter_is_pipe(iter))) {
3514                         void *addr = kmap_atomic(page);
3515
3516                         written = copy_to_iter(addr, copy, iter);
3517                         kunmap_atomic(addr);
3518                 } else
3519                         written = copy_page_to_iter(page, 0, copy, iter);
3520                 remaining -= written;
3521                 if (written < copy && iov_iter_count(iter) > 0)
3522                         break;
3523         }
3524         return remaining ? -EFAULT : 0;
3525 }
3526
3527 static void collect_uncached_read_data(struct cifs_aio_ctx *ctx);
3528
3529 static void
3530 cifs_uncached_readv_complete(struct work_struct *work)
3531 {
3532         struct cifs_readdata *rdata = container_of(work,
3533                                                 struct cifs_readdata, work);
3534
3535         complete(&rdata->done);
3536         collect_uncached_read_data(rdata->ctx);
3537         /* the below call can possibly free the last ref to aio ctx */
3538         kref_put(&rdata->refcount, cifs_uncached_readdata_release);
3539 }
3540
3541 static int
3542 uncached_fill_pages(struct TCP_Server_Info *server,
3543                     struct cifs_readdata *rdata, struct iov_iter *iter,
3544                     unsigned int len)
3545 {
3546         int result = 0;
3547         unsigned int i;
3548         unsigned int nr_pages = rdata->nr_pages;
3549         unsigned int page_offset = rdata->page_offset;
3550
3551         rdata->got_bytes = 0;
3552         rdata->tailsz = PAGE_SIZE;
3553         for (i = 0; i < nr_pages; i++) {
3554                 struct page *page = rdata->pages[i];
3555                 size_t n;
3556                 unsigned int segment_size = rdata->pagesz;
3557
3558                 if (i == 0)
3559                         segment_size -= page_offset;
3560                 else
3561                         page_offset = 0;
3562
3563
3564                 if (len <= 0) {
3565                         /* no need to hold page hostage */
3566                         rdata->pages[i] = NULL;
3567                         rdata->nr_pages--;
3568                         put_page(page);
3569                         continue;
3570                 }
3571
3572                 n = len;
3573                 if (len >= segment_size)
3574                         /* enough data to fill the page */
3575                         n = segment_size;
3576                 else
3577                         rdata->tailsz = len;
3578                 len -= n;
3579
3580                 if (iter)
3581                         result = copy_page_from_iter(
3582                                         page, page_offset, n, iter);
3583 #ifdef CONFIG_CIFS_SMB_DIRECT
3584                 else if (rdata->mr)
3585                         result = n;
3586 #endif
3587                 else
3588                         result = cifs_read_page_from_socket(
3589                                         server, page, page_offset, n);
3590                 if (result < 0)
3591                         break;
3592
3593                 rdata->got_bytes += result;
3594         }
3595
3596         return rdata->got_bytes > 0 && result != -ECONNABORTED ?
3597                                                 rdata->got_bytes : result;
3598 }
3599
3600 static int
3601 cifs_uncached_read_into_pages(struct TCP_Server_Info *server,
3602                               struct cifs_readdata *rdata, unsigned int len)
3603 {
3604         return uncached_fill_pages(server, rdata, NULL, len);
3605 }
3606
3607 static int
3608 cifs_uncached_copy_into_pages(struct TCP_Server_Info *server,
3609                               struct cifs_readdata *rdata,
3610                               struct iov_iter *iter)
3611 {
3612         return uncached_fill_pages(server, rdata, iter, iter->count);
3613 }
3614
3615 static int cifs_resend_rdata(struct cifs_readdata *rdata,
3616                         struct list_head *rdata_list,
3617                         struct cifs_aio_ctx *ctx)
3618 {
3619         unsigned int rsize;
3620         struct cifs_credits credits;
3621         int rc;
3622         struct TCP_Server_Info *server;
3623
3624         /* XXX: should we pick a new channel here? */
3625         server = rdata->server;
3626
3627         do {
3628                 if (rdata->cfile->invalidHandle) {
3629                         rc = cifs_reopen_file(rdata->cfile, true);
3630                         if (rc == -EAGAIN)
3631                                 continue;
3632                         else if (rc)
3633                                 break;
3634                 }
3635
3636                 /*
3637                  * Wait for credits to resend this rdata.
3638                  * Note: we are attempting to resend the whole rdata not in
3639                  * segments
3640                  */
3641                 do {
3642                         rc = server->ops->wait_mtu_credits(server, rdata->bytes,
3643                                                 &rsize, &credits);
3644
3645                         if (rc)
3646                                 goto fail;
3647
3648                         if (rsize < rdata->bytes) {
3649                                 add_credits_and_wake_if(server, &credits, 0);
3650                                 msleep(1000);
3651                         }
3652                 } while (rsize < rdata->bytes);
3653                 rdata->credits = credits;
3654
3655                 rc = adjust_credits(server, &rdata->credits, rdata->bytes);
3656                 if (!rc) {
3657                         if (rdata->cfile->invalidHandle)
3658                                 rc = -EAGAIN;
3659                         else {
3660 #ifdef CONFIG_CIFS_SMB_DIRECT
3661                                 if (rdata->mr) {
3662                                         rdata->mr->need_invalidate = true;
3663                                         smbd_deregister_mr(rdata->mr);
3664                                         rdata->mr = NULL;
3665                                 }
3666 #endif
3667                                 rc = server->ops->async_readv(rdata);
3668                         }
3669                 }
3670
3671                 /* If the read was successfully sent, we are done */
3672                 if (!rc) {
3673                         /* Add to aio pending list */
3674                         list_add_tail(&rdata->list, rdata_list);
3675                         return 0;
3676                 }
3677
3678                 /* Roll back credits and retry if needed */
3679                 add_credits_and_wake_if(server, &rdata->credits, 0);
3680         } while (rc == -EAGAIN);
3681
3682 fail:
3683         kref_put(&rdata->refcount, cifs_uncached_readdata_release);
3684         return rc;
3685 }
3686
3687 static int
3688 cifs_send_async_read(loff_t offset, size_t len, struct cifsFileInfo *open_file,
3689                      struct cifs_sb_info *cifs_sb, struct list_head *rdata_list,
3690                      struct cifs_aio_ctx *ctx)
3691 {
3692         struct cifs_readdata *rdata;
3693         unsigned int npages, rsize;
3694         struct cifs_credits credits_on_stack;
3695         struct cifs_credits *credits = &credits_on_stack;
3696         size_t cur_len;
3697         int rc;
3698         pid_t pid;
3699         struct TCP_Server_Info *server;
3700         struct page **pagevec;
3701         size_t start;
3702         struct iov_iter direct_iov = ctx->iter;
3703
3704         server = cifs_pick_channel(tlink_tcon(open_file->tlink)->ses);
3705
3706         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
3707                 pid = open_file->pid;
3708         else
3709                 pid = current->tgid;
3710
3711         if (ctx->direct_io)
3712                 iov_iter_advance(&direct_iov, offset - ctx->pos);
3713
3714         do {
3715                 if (open_file->invalidHandle) {
3716                         rc = cifs_reopen_file(open_file, true);
3717                         if (rc == -EAGAIN)
3718                                 continue;
3719                         else if (rc)
3720                                 break;
3721                 }
3722
3723                 rc = server->ops->wait_mtu_credits(server, cifs_sb->ctx->rsize,
3724                                                    &rsize, credits);
3725                 if (rc)
3726                         break;
3727
3728                 cur_len = min_t(const size_t, len, rsize);
3729
3730                 if (ctx->direct_io) {
3731                         ssize_t result;
3732
3733                         result = iov_iter_get_pages_alloc(
3734                                         &direct_iov, &pagevec,
3735                                         cur_len, &start);
3736                         if (result < 0) {
3737                                 cifs_dbg(VFS,
3738                                          "Couldn't get user pages (rc=%zd) iter type %d iov_offset %zd count %zd\n",
3739                                          result, iov_iter_type(&direct_iov),
3740                                          direct_iov.iov_offset,
3741                                          direct_iov.count);
3742                                 dump_stack();
3743
3744                                 rc = result;
3745                                 add_credits_and_wake_if(server, credits, 0);
3746                                 break;
3747                         }
3748                         cur_len = (size_t)result;
3749                         iov_iter_advance(&direct_iov, cur_len);
3750
3751                         rdata = cifs_readdata_direct_alloc(
3752                                         pagevec, cifs_uncached_readv_complete);
3753                         if (!rdata) {
3754                                 add_credits_and_wake_if(server, credits, 0);
3755                                 rc = -ENOMEM;
3756                                 break;
3757                         }
3758
3759                         npages = (cur_len + start + PAGE_SIZE-1) / PAGE_SIZE;
3760                         rdata->page_offset = start;
3761                         rdata->tailsz = npages > 1 ?
3762                                 cur_len-(PAGE_SIZE-start)-(npages-2)*PAGE_SIZE :
3763                                 cur_len;
3764
3765                 } else {
3766
3767                         npages = DIV_ROUND_UP(cur_len, PAGE_SIZE);
3768                         /* allocate a readdata struct */
3769                         rdata = cifs_readdata_alloc(npages,
3770                                             cifs_uncached_readv_complete);
3771                         if (!rdata) {
3772                                 add_credits_and_wake_if(server, credits, 0);
3773                                 rc = -ENOMEM;
3774                                 break;
3775                         }
3776
3777                         rc = cifs_read_allocate_pages(rdata, npages);
3778                         if (rc) {
3779                                 kvfree(rdata->pages);
3780                                 kfree(rdata);
3781                                 add_credits_and_wake_if(server, credits, 0);
3782                                 break;
3783                         }
3784
3785                         rdata->tailsz = PAGE_SIZE;
3786                 }
3787
3788                 rdata->server = server;
3789                 rdata->cfile = cifsFileInfo_get(open_file);
3790                 rdata->nr_pages = npages;
3791                 rdata->offset = offset;
3792                 rdata->bytes = cur_len;
3793                 rdata->pid = pid;
3794                 rdata->pagesz = PAGE_SIZE;
3795                 rdata->read_into_pages = cifs_uncached_read_into_pages;
3796                 rdata->copy_into_pages = cifs_uncached_copy_into_pages;
3797                 rdata->credits = credits_on_stack;
3798                 rdata->ctx = ctx;
3799                 kref_get(&ctx->refcount);
3800
3801                 rc = adjust_credits(server, &rdata->credits, rdata->bytes);
3802
3803                 if (!rc) {
3804                         if (rdata->cfile->invalidHandle)
3805                                 rc = -EAGAIN;
3806                         else
3807                                 rc = server->ops->async_readv(rdata);
3808                 }
3809
3810                 if (rc) {
3811                         add_credits_and_wake_if(server, &rdata->credits, 0);
3812                         kref_put(&rdata->refcount,
3813                                 cifs_uncached_readdata_release);
3814                         if (rc == -EAGAIN) {
3815                                 iov_iter_revert(&direct_iov, cur_len);
3816                                 continue;
3817                         }
3818                         break;
3819                 }
3820
3821                 list_add_tail(&rdata->list, rdata_list);
3822                 offset += cur_len;
3823                 len -= cur_len;
3824         } while (len > 0);
3825
3826         return rc;
3827 }
3828
3829 static void
3830 collect_uncached_read_data(struct cifs_aio_ctx *ctx)
3831 {
3832         struct cifs_readdata *rdata, *tmp;
3833         struct iov_iter *to = &ctx->iter;
3834         struct cifs_sb_info *cifs_sb;
3835         int rc;
3836
3837         cifs_sb = CIFS_SB(ctx->cfile->dentry->d_sb);
3838
3839         mutex_lock(&ctx->aio_mutex);
3840
3841         if (list_empty(&ctx->list)) {
3842                 mutex_unlock(&ctx->aio_mutex);
3843                 return;
3844         }
3845
3846         rc = ctx->rc;
3847         /* the loop below should proceed in the order of increasing offsets */
3848 again:
3849         list_for_each_entry_safe(rdata, tmp, &ctx->list, list) {
3850                 if (!rc) {
3851                         if (!try_wait_for_completion(&rdata->done)) {
3852                                 mutex_unlock(&ctx->aio_mutex);
3853                                 return;
3854                         }
3855
3856                         if (rdata->result == -EAGAIN) {
3857                                 /* resend call if it's a retryable error */
3858                                 struct list_head tmp_list;
3859                                 unsigned int got_bytes = rdata->got_bytes;
3860
3861                                 list_del_init(&rdata->list);
3862                                 INIT_LIST_HEAD(&tmp_list);
3863
3864                                 /*
3865                                  * Got a part of data and then reconnect has
3866                                  * happened -- fill the buffer and continue
3867                                  * reading.
3868                                  */
3869                                 if (got_bytes && got_bytes < rdata->bytes) {
3870                                         rc = 0;
3871                                         if (!ctx->direct_io)
3872                                                 rc = cifs_readdata_to_iov(rdata, to);
3873                                         if (rc) {
3874                                                 kref_put(&rdata->refcount,
3875                                                         cifs_uncached_readdata_release);
3876                                                 continue;
3877                                         }
3878                                 }
3879
3880                                 if (ctx->direct_io) {
3881                                         /*
3882                                          * Re-use rdata as this is a
3883                                          * direct I/O
3884                                          */
3885                                         rc = cifs_resend_rdata(
3886                                                 rdata,
3887                                                 &tmp_list, ctx);
3888                                 } else {
3889                                         rc = cifs_send_async_read(
3890                                                 rdata->offset + got_bytes,
3891                                                 rdata->bytes - got_bytes,
3892                                                 rdata->cfile, cifs_sb,
3893                                                 &tmp_list, ctx);
3894
3895                                         kref_put(&rdata->refcount,
3896                                                 cifs_uncached_readdata_release);
3897                                 }
3898
3899                                 list_splice(&tmp_list, &ctx->list);
3900
3901                                 goto again;
3902                         } else if (rdata->result)
3903                                 rc = rdata->result;
3904                         else if (!ctx->direct_io)
3905                                 rc = cifs_readdata_to_iov(rdata, to);
3906
3907                         /* if there was a short read -- discard anything left */
3908                         if (rdata->got_bytes && rdata->got_bytes < rdata->bytes)
3909                                 rc = -ENODATA;
3910
3911                         ctx->total_len += rdata->got_bytes;
3912                 }
3913                 list_del_init(&rdata->list);
3914                 kref_put(&rdata->refcount, cifs_uncached_readdata_release);
3915         }
3916
3917         if (!ctx->direct_io)
3918                 ctx->total_len = ctx->len - iov_iter_count(to);
3919
3920         /* mask nodata case */
3921         if (rc == -ENODATA)
3922                 rc = 0;
3923
3924         ctx->rc = (rc == 0) ? (ssize_t)ctx->total_len : rc;
3925
3926         mutex_unlock(&ctx->aio_mutex);
3927
3928         if (ctx->iocb && ctx->iocb->ki_complete)
3929                 ctx->iocb->ki_complete(ctx->iocb, ctx->rc, 0);
3930         else
3931                 complete(&ctx->done);
3932 }
3933
3934 static ssize_t __cifs_readv(
3935         struct kiocb *iocb, struct iov_iter *to, bool direct)
3936 {
3937         size_t len;
3938         struct file *file = iocb->ki_filp;
3939         struct cifs_sb_info *cifs_sb;
3940         struct cifsFileInfo *cfile;
3941         struct cifs_tcon *tcon;
3942         ssize_t rc, total_read = 0;
3943         loff_t offset = iocb->ki_pos;
3944         struct cifs_aio_ctx *ctx;
3945
3946         /*
3947          * iov_iter_get_pages_alloc() doesn't work with ITER_KVEC,
3948          * fall back to data copy read path
3949          * this could be improved by getting pages directly in ITER_KVEC
3950          */
3951         if (direct && iov_iter_is_kvec(to)) {
3952                 cifs_dbg(FYI, "use non-direct cifs_user_readv for kvec I/O\n");
3953                 direct = false;
3954         }
3955
3956         len = iov_iter_count(to);
3957         if (!len)
3958                 return 0;
3959
3960         cifs_sb = CIFS_FILE_SB(file);
3961         cfile = file->private_data;
3962         tcon = tlink_tcon(cfile->tlink);
3963
3964         if (!tcon->ses->server->ops->async_readv)
3965                 return -ENOSYS;
3966
3967         if ((file->f_flags & O_ACCMODE) == O_WRONLY)
3968                 cifs_dbg(FYI, "attempting read on write only file instance\n");
3969
3970         ctx = cifs_aio_ctx_alloc();
3971         if (!ctx)
3972                 return -ENOMEM;
3973
3974         ctx->cfile = cifsFileInfo_get(cfile);
3975
3976         if (!is_sync_kiocb(iocb))
3977                 ctx->iocb = iocb;
3978
3979         if (iter_is_iovec(to))
3980                 ctx->should_dirty = true;
3981
3982         if (direct) {
3983                 ctx->pos = offset;
3984                 ctx->direct_io = true;
3985                 ctx->iter = *to;
3986                 ctx->len = len;
3987         } else {
3988                 rc = setup_aio_ctx_iter(ctx, to, READ);
3989                 if (rc) {
3990                         kref_put(&ctx->refcount, cifs_aio_ctx_release);
3991                         return rc;
3992                 }
3993                 len = ctx->len;
3994         }
3995
3996         /* grab a lock here due to read response handlers can access ctx */
3997         mutex_lock(&ctx->aio_mutex);
3998
3999         rc = cifs_send_async_read(offset, len, cfile, cifs_sb, &ctx->list, ctx);
4000
4001         /* if at least one read request send succeeded, then reset rc */
4002         if (!list_empty(&ctx->list))
4003                 rc = 0;
4004
4005         mutex_unlock(&ctx->aio_mutex);
4006
4007         if (rc) {
4008                 kref_put(&ctx->refcount, cifs_aio_ctx_release);
4009                 return rc;
4010         }
4011
4012         if (!is_sync_kiocb(iocb)) {
4013                 kref_put(&ctx->refcount, cifs_aio_ctx_release);
4014                 return -EIOCBQUEUED;
4015         }
4016
4017         rc = wait_for_completion_killable(&ctx->done);
4018         if (rc) {
4019                 mutex_lock(&ctx->aio_mutex);
4020                 ctx->rc = rc = -EINTR;
4021                 total_read = ctx->total_len;
4022                 mutex_unlock(&ctx->aio_mutex);
4023         } else {
4024                 rc = ctx->rc;
4025                 total_read = ctx->total_len;
4026         }
4027
4028         kref_put(&ctx->refcount, cifs_aio_ctx_release);
4029
4030         if (total_read) {
4031                 iocb->ki_pos += total_read;
4032                 return total_read;
4033         }
4034         return rc;
4035 }
4036
4037 ssize_t cifs_direct_readv(struct kiocb *iocb, struct iov_iter *to)
4038 {
4039         return __cifs_readv(iocb, to, true);
4040 }
4041
4042 ssize_t cifs_user_readv(struct kiocb *iocb, struct iov_iter *to)
4043 {
4044         return __cifs_readv(iocb, to, false);
4045 }
4046
4047 ssize_t
4048 cifs_strict_readv(struct kiocb *iocb, struct iov_iter *to)
4049 {
4050         struct inode *inode = file_inode(iocb->ki_filp);
4051         struct cifsInodeInfo *cinode = CIFS_I(inode);
4052         struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
4053         struct cifsFileInfo *cfile = (struct cifsFileInfo *)
4054                                                 iocb->ki_filp->private_data;
4055         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
4056         int rc = -EACCES;
4057
4058         /*
4059          * In strict cache mode we need to read from the server all the time
4060          * if we don't have level II oplock because the server can delay mtime
4061          * change - so we can't make a decision about inode invalidating.
4062          * And we can also fail with pagereading if there are mandatory locks
4063          * on pages affected by this read but not on the region from pos to
4064          * pos+len-1.
4065          */
4066         if (!CIFS_CACHE_READ(cinode))
4067                 return cifs_user_readv(iocb, to);
4068
4069         if (cap_unix(tcon->ses) &&
4070             (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
4071             ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
4072                 return generic_file_read_iter(iocb, to);
4073
4074         /*
4075          * We need to hold the sem to be sure nobody modifies lock list
4076          * with a brlock that prevents reading.
4077          */
4078         down_read(&cinode->lock_sem);
4079         if (!cifs_find_lock_conflict(cfile, iocb->ki_pos, iov_iter_count(to),
4080                                      tcon->ses->server->vals->shared_lock_type,
4081                                      0, NULL, CIFS_READ_OP))
4082                 rc = generic_file_read_iter(iocb, to);
4083         up_read(&cinode->lock_sem);
4084         return rc;
4085 }
4086
4087 static ssize_t
4088 cifs_read(struct file *file, char *read_data, size_t read_size, loff_t *offset)
4089 {
4090         int rc = -EACCES;
4091         unsigned int bytes_read = 0;
4092         unsigned int total_read;
4093         unsigned int current_read_size;
4094         unsigned int rsize;
4095         struct cifs_sb_info *cifs_sb;
4096         struct cifs_tcon *tcon;
4097         struct TCP_Server_Info *server;
4098         unsigned int xid;
4099         char *cur_offset;
4100         struct cifsFileInfo *open_file;
4101         struct cifs_io_parms io_parms = {0};
4102         int buf_type = CIFS_NO_BUFFER;
4103         __u32 pid;
4104
4105         xid = get_xid();
4106         cifs_sb = CIFS_FILE_SB(file);
4107
4108         /* FIXME: set up handlers for larger reads and/or convert to async */
4109         rsize = min_t(unsigned int, cifs_sb->ctx->rsize, CIFSMaxBufSize);
4110
4111         if (file->private_data == NULL) {
4112                 rc = -EBADF;
4113                 free_xid(xid);
4114                 return rc;
4115         }
4116         open_file = file->private_data;
4117         tcon = tlink_tcon(open_file->tlink);
4118         server = cifs_pick_channel(tcon->ses);
4119
4120         if (!server->ops->sync_read) {
4121                 free_xid(xid);
4122                 return -ENOSYS;
4123         }
4124
4125         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
4126                 pid = open_file->pid;
4127         else
4128                 pid = current->tgid;
4129
4130         if ((file->f_flags & O_ACCMODE) == O_WRONLY)
4131                 cifs_dbg(FYI, "attempting read on write only file instance\n");
4132
4133         for (total_read = 0, cur_offset = read_data; read_size > total_read;
4134              total_read += bytes_read, cur_offset += bytes_read) {
4135                 do {
4136                         current_read_size = min_t(uint, read_size - total_read,
4137                                                   rsize);
4138                         /*
4139                          * For windows me and 9x we do not want to request more
4140                          * than it negotiated since it will refuse the read
4141                          * then.
4142                          */
4143                         if (!(tcon->ses->capabilities &
4144                                 tcon->ses->server->vals->cap_large_files)) {
4145                                 current_read_size = min_t(uint,
4146                                         current_read_size, CIFSMaxBufSize);
4147                         }
4148                         if (open_file->invalidHandle) {
4149                                 rc = cifs_reopen_file(open_file, true);
4150                                 if (rc != 0)
4151                                         break;
4152                         }
4153                         io_parms.pid = pid;
4154                         io_parms.tcon = tcon;
4155                         io_parms.offset = *offset;
4156                         io_parms.length = current_read_size;
4157                         io_parms.server = server;
4158                         rc = server->ops->sync_read(xid, &open_file->fid, &io_parms,
4159                                                     &bytes_read, &cur_offset,
4160                                                     &buf_type);
4161                 } while (rc == -EAGAIN);
4162
4163                 if (rc || (bytes_read == 0)) {
4164                         if (total_read) {
4165                                 break;
4166                         } else {
4167                                 free_xid(xid);
4168                                 return rc;
4169                         }
4170                 } else {
4171                         cifs_stats_bytes_read(tcon, total_read);
4172                         *offset += bytes_read;
4173                 }
4174         }
4175         free_xid(xid);
4176         return total_read;
4177 }
4178
4179 /*
4180  * If the page is mmap'ed into a process' page tables, then we need to make
4181  * sure that it doesn't change while being written back.
4182  */
4183 static vm_fault_t
4184 cifs_page_mkwrite(struct vm_fault *vmf)
4185 {
4186         struct page *page = vmf->page;
4187
4188         lock_page(page);
4189         return VM_FAULT_LOCKED;
4190 }
4191
4192 static const struct vm_operations_struct cifs_file_vm_ops = {
4193         .fault = filemap_fault,
4194         .map_pages = filemap_map_pages,
4195         .page_mkwrite = cifs_page_mkwrite,
4196 };
4197
4198 int cifs_file_strict_mmap(struct file *file, struct vm_area_struct *vma)
4199 {
4200         int xid, rc = 0;
4201         struct inode *inode = file_inode(file);
4202
4203         xid = get_xid();
4204
4205         if (!CIFS_CACHE_READ(CIFS_I(inode)))
4206                 rc = cifs_zap_mapping(inode);
4207         if (!rc)
4208                 rc = generic_file_mmap(file, vma);
4209         if (!rc)
4210                 vma->vm_ops = &cifs_file_vm_ops;
4211
4212         free_xid(xid);
4213         return rc;
4214 }
4215
4216 int cifs_file_mmap(struct file *file, struct vm_area_struct *vma)
4217 {
4218         int rc, xid;
4219
4220         xid = get_xid();
4221
4222         rc = cifs_revalidate_file(file);
4223         if (rc)
4224                 cifs_dbg(FYI, "Validation prior to mmap failed, error=%d\n",
4225                          rc);
4226         if (!rc)
4227                 rc = generic_file_mmap(file, vma);
4228         if (!rc)
4229                 vma->vm_ops = &cifs_file_vm_ops;
4230
4231         free_xid(xid);
4232         return rc;
4233 }
4234
4235 static void
4236 cifs_readv_complete(struct work_struct *work)
4237 {
4238         unsigned int i, got_bytes;
4239         struct cifs_readdata *rdata = container_of(work,
4240                                                 struct cifs_readdata, work);
4241
4242         got_bytes = rdata->got_bytes;
4243         for (i = 0; i < rdata->nr_pages; i++) {
4244                 struct page *page = rdata->pages[i];
4245
4246                 lru_cache_add(page);
4247
4248                 if (rdata->result == 0 ||
4249                     (rdata->result == -EAGAIN && got_bytes)) {
4250                         flush_dcache_page(page);
4251                         SetPageUptodate(page);
4252                 }
4253
4254                 unlock_page(page);
4255
4256                 if (rdata->result == 0 ||
4257                     (rdata->result == -EAGAIN && got_bytes))
4258                         cifs_readpage_to_fscache(rdata->mapping->host, page);
4259
4260                 got_bytes -= min_t(unsigned int, PAGE_SIZE, got_bytes);
4261
4262                 put_page(page);
4263                 rdata->pages[i] = NULL;
4264         }
4265         kref_put(&rdata->refcount, cifs_readdata_release);
4266 }
4267
4268 static int
4269 readpages_fill_pages(struct TCP_Server_Info *server,
4270                      struct cifs_readdata *rdata, struct iov_iter *iter,
4271                      unsigned int len)
4272 {
4273         int result = 0;
4274         unsigned int i;
4275         u64 eof;
4276         pgoff_t eof_index;
4277         unsigned int nr_pages = rdata->nr_pages;
4278         unsigned int page_offset = rdata->page_offset;
4279
4280         /* determine the eof that the server (probably) has */
4281         eof = CIFS_I(rdata->mapping->host)->server_eof;
4282         eof_index = eof ? (eof - 1) >> PAGE_SHIFT : 0;
4283         cifs_dbg(FYI, "eof=%llu eof_index=%lu\n", eof, eof_index);
4284
4285         rdata->got_bytes = 0;
4286         rdata->tailsz = PAGE_SIZE;
4287         for (i = 0; i < nr_pages; i++) {
4288                 struct page *page = rdata->pages[i];
4289                 unsigned int to_read = rdata->pagesz;
4290                 size_t n;
4291
4292                 if (i == 0)
4293                         to_read -= page_offset;
4294                 else
4295                         page_offset = 0;
4296
4297                 n = to_read;
4298
4299                 if (len >= to_read) {
4300                         len -= to_read;
4301                 } else if (len > 0) {
4302                         /* enough for partial page, fill and zero the rest */
4303                         zero_user(page, len + page_offset, to_read - len);
4304                         n = rdata->tailsz = len;
4305                         len = 0;
4306                 } else if (page->index > eof_index) {
4307                         /*
4308                          * The VFS will not try to do readahead past the
4309                          * i_size, but it's possible that we have outstanding
4310                          * writes with gaps in the middle and the i_size hasn't
4311                          * caught up yet. Populate those with zeroed out pages
4312                          * to prevent the VFS from repeatedly attempting to
4313                          * fill them until the writes are flushed.
4314                          */
4315                         zero_user(page, 0, PAGE_SIZE);
4316                         lru_cache_add(page);
4317                         flush_dcache_page(page);
4318                         SetPageUptodate(page);
4319                         unlock_page(page);
4320                         put_page(page);
4321                         rdata->pages[i] = NULL;
4322                         rdata->nr_pages--;
4323                         continue;
4324                 } else {
4325                         /* no need to hold page hostage */
4326                         lru_cache_add(page);
4327                         unlock_page(page);
4328                         put_page(page);
4329                         rdata->pages[i] = NULL;
4330                         rdata->nr_pages--;
4331                         continue;
4332                 }
4333
4334                 if (iter)
4335                         result = copy_page_from_iter(
4336                                         page, page_offset, n, iter);
4337 #ifdef CONFIG_CIFS_SMB_DIRECT
4338                 else if (rdata->mr)
4339                         result = n;
4340 #endif
4341                 else
4342                         result = cifs_read_page_from_socket(
4343                                         server, page, page_offset, n);
4344                 if (result < 0)
4345                         break;
4346
4347                 rdata->got_bytes += result;
4348         }
4349
4350         return rdata->got_bytes > 0 && result != -ECONNABORTED ?
4351                                                 rdata->got_bytes : result;
4352 }
4353
4354 static int
4355 cifs_readpages_read_into_pages(struct TCP_Server_Info *server,
4356                                struct cifs_readdata *rdata, unsigned int len)
4357 {
4358         return readpages_fill_pages(server, rdata, NULL, len);
4359 }
4360
4361 static int
4362 cifs_readpages_copy_into_pages(struct TCP_Server_Info *server,
4363                                struct cifs_readdata *rdata,
4364                                struct iov_iter *iter)
4365 {
4366         return readpages_fill_pages(server, rdata, iter, iter->count);
4367 }
4368
4369 static int
4370 readpages_get_pages(struct address_space *mapping, struct list_head *page_list,
4371                     unsigned int rsize, struct list_head *tmplist,
4372                     unsigned int *nr_pages, loff_t *offset, unsigned int *bytes)
4373 {
4374         struct page *page, *tpage;
4375         unsigned int expected_index;
4376         int rc;
4377         gfp_t gfp = readahead_gfp_mask(mapping);
4378
4379         INIT_LIST_HEAD(tmplist);
4380
4381         page = lru_to_page(page_list);
4382
4383         /*
4384          * Lock the page and put it in the cache. Since no one else
4385          * should have access to this page, we're safe to simply set
4386          * PG_locked without checking it first.
4387          */
4388         __SetPageLocked(page);
4389         rc = add_to_page_cache_locked(page, mapping,
4390                                       page->index, gfp);
4391
4392         /* give up if we can't stick it in the cache */
4393         if (rc) {
4394                 __ClearPageLocked(page);
4395                 return rc;
4396         }
4397
4398         /* move first page to the tmplist */
4399         *offset = (loff_t)page->index << PAGE_SHIFT;
4400         *bytes = PAGE_SIZE;
4401         *nr_pages = 1;
4402         list_move_tail(&page->lru, tmplist);
4403
4404         /* now try and add more pages onto the request */
4405         expected_index = page->index + 1;
4406         list_for_each_entry_safe_reverse(page, tpage, page_list, lru) {
4407                 /* discontinuity ? */
4408                 if (page->index != expected_index)
4409                         break;
4410
4411                 /* would this page push the read over the rsize? */
4412                 if (*bytes + PAGE_SIZE > rsize)
4413                         break;
4414
4415                 __SetPageLocked(page);
4416                 rc = add_to_page_cache_locked(page, mapping, page->index, gfp);
4417                 if (rc) {
4418                         __ClearPageLocked(page);
4419                         break;
4420                 }
4421                 list_move_tail(&page->lru, tmplist);
4422                 (*bytes) += PAGE_SIZE;
4423                 expected_index++;
4424                 (*nr_pages)++;
4425         }
4426         return rc;
4427 }
4428
4429 static int cifs_readpages(struct file *file, struct address_space *mapping,
4430         struct list_head *page_list, unsigned num_pages)
4431 {
4432         int rc;
4433         int err = 0;
4434         struct list_head tmplist;
4435         struct cifsFileInfo *open_file = file->private_data;
4436         struct cifs_sb_info *cifs_sb = CIFS_FILE_SB(file);
4437         struct TCP_Server_Info *server;
4438         pid_t pid;
4439         unsigned int xid;
4440
4441         xid = get_xid();
4442         /*
4443          * Reads as many pages as possible from fscache. Returns -ENOBUFS
4444          * immediately if the cookie is negative
4445          *
4446          * After this point, every page in the list might have PG_fscache set,
4447          * so we will need to clean that up off of every page we don't use.
4448          */
4449         rc = cifs_readpages_from_fscache(mapping->host, mapping, page_list,
4450                                          &num_pages);
4451         if (rc == 0) {
4452                 free_xid(xid);
4453                 return rc;
4454         }
4455
4456         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
4457                 pid = open_file->pid;
4458         else
4459                 pid = current->tgid;
4460
4461         rc = 0;
4462         server = cifs_pick_channel(tlink_tcon(open_file->tlink)->ses);
4463
4464         cifs_dbg(FYI, "%s: file=%p mapping=%p num_pages=%u\n",
4465                  __func__, file, mapping, num_pages);
4466
4467         /*
4468          * Start with the page at end of list and move it to private
4469          * list. Do the same with any following pages until we hit
4470          * the rsize limit, hit an index discontinuity, or run out of
4471          * pages. Issue the async read and then start the loop again
4472          * until the list is empty.
4473          *
4474          * Note that list order is important. The page_list is in
4475          * the order of declining indexes. When we put the pages in
4476          * the rdata->pages, then we want them in increasing order.
4477          */
4478         while (!list_empty(page_list) && !err) {
4479                 unsigned int i, nr_pages, bytes, rsize;
4480                 loff_t offset;
4481                 struct page *page, *tpage;
4482                 struct cifs_readdata *rdata;
4483                 struct cifs_credits credits_on_stack;
4484                 struct cifs_credits *credits = &credits_on_stack;
4485
4486                 if (open_file->invalidHandle) {
4487                         rc = cifs_reopen_file(open_file, true);
4488                         if (rc == -EAGAIN)
4489                                 continue;
4490                         else if (rc)
4491                                 break;
4492                 }
4493
4494                 rc = server->ops->wait_mtu_credits(server, cifs_sb->ctx->rsize,
4495                                                    &rsize, credits);
4496                 if (rc)
4497                         break;
4498
4499                 /*
4500                  * Give up immediately if rsize is too small to read an entire
4501                  * page. The VFS will fall back to readpage. We should never
4502                  * reach this point however since we set ra_pages to 0 when the
4503                  * rsize is smaller than a cache page.
4504                  */
4505                 if (unlikely(rsize < PAGE_SIZE)) {
4506                         add_credits_and_wake_if(server, credits, 0);
4507                         free_xid(xid);
4508                         return 0;
4509                 }
4510
4511                 nr_pages = 0;
4512                 err = readpages_get_pages(mapping, page_list, rsize, &tmplist,
4513                                          &nr_pages, &offset, &bytes);
4514                 if (!nr_pages) {
4515                         add_credits_and_wake_if(server, credits, 0);
4516                         break;
4517                 }
4518
4519                 rdata = cifs_readdata_alloc(nr_pages, cifs_readv_complete);
4520                 if (!rdata) {
4521                         /* best to give up if we're out of mem */
4522                         list_for_each_entry_safe(page, tpage, &tmplist, lru) {
4523                                 list_del(&page->lru);
4524                                 lru_cache_add(page);
4525                                 unlock_page(page);
4526                                 put_page(page);
4527                         }
4528                         rc = -ENOMEM;
4529                         add_credits_and_wake_if(server, credits, 0);
4530                         break;
4531                 }
4532
4533                 rdata->cfile = cifsFileInfo_get(open_file);
4534                 rdata->server = server;
4535                 rdata->mapping = mapping;
4536                 rdata->offset = offset;
4537                 rdata->bytes = bytes;
4538                 rdata->pid = pid;
4539                 rdata->pagesz = PAGE_SIZE;
4540                 rdata->tailsz = PAGE_SIZE;
4541                 rdata->read_into_pages = cifs_readpages_read_into_pages;
4542                 rdata->copy_into_pages = cifs_readpages_copy_into_pages;
4543                 rdata->credits = credits_on_stack;
4544
4545                 list_for_each_entry_safe(page, tpage, &tmplist, lru) {
4546                         list_del(&page->lru);
4547                         rdata->pages[rdata->nr_pages++] = page;
4548                 }
4549
4550                 rc = adjust_credits(server, &rdata->credits, rdata->bytes);
4551
4552                 if (!rc) {
4553                         if (rdata->cfile->invalidHandle)
4554                                 rc = -EAGAIN;
4555                         else
4556                                 rc = server->ops->async_readv(rdata);
4557                 }
4558
4559                 if (rc) {
4560                         add_credits_and_wake_if(server, &rdata->credits, 0);
4561                         for (i = 0; i < rdata->nr_pages; i++) {
4562                                 page = rdata->pages[i];
4563                                 lru_cache_add(page);
4564                                 unlock_page(page);
4565                                 put_page(page);
4566                         }
4567                         /* Fallback to the readpage in error/reconnect cases */
4568                         kref_put(&rdata->refcount, cifs_readdata_release);
4569                         break;
4570                 }
4571
4572                 kref_put(&rdata->refcount, cifs_readdata_release);
4573         }
4574
4575         /* Any pages that have been shown to fscache but didn't get added to
4576          * the pagecache must be uncached before they get returned to the
4577          * allocator.
4578          */
4579         cifs_fscache_readpages_cancel(mapping->host, page_list);
4580         free_xid(xid);
4581         return rc;
4582 }
4583
4584 /*
4585  * cifs_readpage_worker must be called with the page pinned
4586  */
4587 static int cifs_readpage_worker(struct file *file, struct page *page,
4588         loff_t *poffset)
4589 {
4590         char *read_data;
4591         int rc;
4592
4593         /* Is the page cached? */
4594         rc = cifs_readpage_from_fscache(file_inode(file), page);
4595         if (rc == 0)
4596                 goto read_complete;
4597
4598         read_data = kmap(page);
4599         /* for reads over a certain size could initiate async read ahead */
4600
4601         rc = cifs_read(file, read_data, PAGE_SIZE, poffset);
4602
4603         if (rc < 0)
4604                 goto io_error;
4605         else
4606                 cifs_dbg(FYI, "Bytes read %d\n", rc);
4607
4608         /* we do not want atime to be less than mtime, it broke some apps */
4609         file_inode(file)->i_atime = current_time(file_inode(file));
4610         if (timespec64_compare(&(file_inode(file)->i_atime), &(file_inode(file)->i_mtime)))
4611                 file_inode(file)->i_atime = file_inode(file)->i_mtime;
4612         else
4613                 file_inode(file)->i_atime = current_time(file_inode(file));
4614
4615         if (PAGE_SIZE > rc)
4616                 memset(read_data + rc, 0, PAGE_SIZE - rc);
4617
4618         flush_dcache_page(page);
4619         SetPageUptodate(page);
4620
4621         /* send this page to the cache */
4622         cifs_readpage_to_fscache(file_inode(file), page);
4623
4624         rc = 0;
4625
4626 io_error:
4627         kunmap(page);
4628         unlock_page(page);
4629
4630 read_complete:
4631         return rc;
4632 }
4633
4634 static int cifs_readpage(struct file *file, struct page *page)
4635 {
4636         loff_t offset = (loff_t)page->index << PAGE_SHIFT;
4637         int rc = -EACCES;
4638         unsigned int xid;
4639
4640         xid = get_xid();
4641
4642         if (file->private_data == NULL) {
4643                 rc = -EBADF;
4644                 free_xid(xid);
4645                 return rc;
4646         }
4647
4648         cifs_dbg(FYI, "readpage %p at offset %d 0x%x\n",
4649                  page, (int)offset, (int)offset);
4650
4651         rc = cifs_readpage_worker(file, page, &offset);
4652
4653         free_xid(xid);
4654         return rc;
4655 }
4656
4657 static int is_inode_writable(struct cifsInodeInfo *cifs_inode)
4658 {
4659         struct cifsFileInfo *open_file;
4660
4661         spin_lock(&cifs_inode->open_file_lock);
4662         list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
4663                 if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) {
4664                         spin_unlock(&cifs_inode->open_file_lock);
4665                         return 1;
4666                 }
4667         }
4668         spin_unlock(&cifs_inode->open_file_lock);
4669         return 0;
4670 }
4671
4672 /* We do not want to update the file size from server for inodes
4673    open for write - to avoid races with writepage extending
4674    the file - in the future we could consider allowing
4675    refreshing the inode only on increases in the file size
4676    but this is tricky to do without racing with writebehind
4677    page caching in the current Linux kernel design */
4678 bool is_size_safe_to_change(struct cifsInodeInfo *cifsInode, __u64 end_of_file)
4679 {
4680         if (!cifsInode)
4681                 return true;
4682
4683         if (is_inode_writable(cifsInode)) {
4684                 /* This inode is open for write at least once */
4685                 struct cifs_sb_info *cifs_sb;
4686
4687                 cifs_sb = CIFS_SB(cifsInode->vfs_inode.i_sb);
4688                 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DIRECT_IO) {
4689                         /* since no page cache to corrupt on directio
4690                         we can change size safely */
4691                         return true;
4692                 }
4693
4694                 if (i_size_read(&cifsInode->vfs_inode) < end_of_file)
4695                         return true;
4696
4697                 return false;
4698         } else
4699                 return true;
4700 }
4701
4702 static int cifs_write_begin(struct file *file, struct address_space *mapping,
4703                         loff_t pos, unsigned len, unsigned flags,
4704                         struct page **pagep, void **fsdata)
4705 {
4706         int oncethru = 0;
4707         pgoff_t index = pos >> PAGE_SHIFT;
4708         loff_t offset = pos & (PAGE_SIZE - 1);
4709         loff_t page_start = pos & PAGE_MASK;
4710         loff_t i_size;
4711         struct page *page;
4712         int rc = 0;
4713
4714         cifs_dbg(FYI, "write_begin from %lld len %d\n", (long long)pos, len);
4715
4716 start:
4717         page = grab_cache_page_write_begin(mapping, index, flags);
4718         if (!page) {
4719                 rc = -ENOMEM;
4720                 goto out;
4721         }
4722
4723         if (PageUptodate(page))
4724                 goto out;
4725
4726         /*
4727          * If we write a full page it will be up to date, no need to read from
4728          * the server. If the write is short, we'll end up doing a sync write
4729          * instead.
4730          */
4731         if (len == PAGE_SIZE)
4732                 goto out;
4733
4734         /*
4735          * optimize away the read when we have an oplock, and we're not
4736          * expecting to use any of the data we'd be reading in. That
4737          * is, when the page lies beyond the EOF, or straddles the EOF
4738          * and the write will cover all of the existing data.
4739          */
4740         if (CIFS_CACHE_READ(CIFS_I(mapping->host))) {
4741                 i_size = i_size_read(mapping->host);
4742                 if (page_start >= i_size ||
4743                     (offset == 0 && (pos + len) >= i_size)) {
4744                         zero_user_segments(page, 0, offset,
4745                                            offset + len,
4746                                            PAGE_SIZE);
4747                         /*
4748                          * PageChecked means that the parts of the page
4749                          * to which we're not writing are considered up
4750                          * to date. Once the data is copied to the
4751                          * page, it can be set uptodate.
4752                          */
4753                         SetPageChecked(page);
4754                         goto out;
4755                 }
4756         }
4757
4758         if ((file->f_flags & O_ACCMODE) != O_WRONLY && !oncethru) {
4759                 /*
4760                  * might as well read a page, it is fast enough. If we get
4761                  * an error, we don't need to return it. cifs_write_end will
4762                  * do a sync write instead since PG_uptodate isn't set.
4763                  */
4764                 cifs_readpage_worker(file, page, &page_start);
4765                 put_page(page);
4766                 oncethru = 1;
4767                 goto start;
4768         } else {
4769                 /* we could try using another file handle if there is one -
4770                    but how would we lock it to prevent close of that handle
4771                    racing with this read? In any case
4772                    this will be written out by write_end so is fine */
4773         }
4774 out:
4775         *pagep = page;
4776         return rc;
4777 }
4778
4779 static int cifs_release_page(struct page *page, gfp_t gfp)
4780 {
4781         if (PagePrivate(page))
4782                 return 0;
4783
4784         return cifs_fscache_release_page(page, gfp);
4785 }
4786
4787 static void cifs_invalidate_page(struct page *page, unsigned int offset,
4788                                  unsigned int length)
4789 {
4790         struct cifsInodeInfo *cifsi = CIFS_I(page->mapping->host);
4791
4792         if (offset == 0 && length == PAGE_SIZE)
4793                 cifs_fscache_invalidate_page(page, &cifsi->vfs_inode);
4794 }
4795
4796 static int cifs_launder_page(struct page *page)
4797 {
4798         int rc = 0;
4799         loff_t range_start = page_offset(page);
4800         loff_t range_end = range_start + (loff_t)(PAGE_SIZE - 1);
4801         struct writeback_control wbc = {
4802                 .sync_mode = WB_SYNC_ALL,
4803                 .nr_to_write = 0,
4804                 .range_start = range_start,
4805                 .range_end = range_end,
4806         };
4807
4808         cifs_dbg(FYI, "Launder page: %p\n", page);
4809
4810         if (clear_page_dirty_for_io(page))
4811                 rc = cifs_writepage_locked(page, &wbc);
4812
4813         cifs_fscache_invalidate_page(page, page->mapping->host);
4814         return rc;
4815 }
4816
4817 void cifs_oplock_break(struct work_struct *work)
4818 {
4819         struct cifsFileInfo *cfile = container_of(work, struct cifsFileInfo,
4820                                                   oplock_break);
4821         struct inode *inode = d_inode(cfile->dentry);
4822         struct cifsInodeInfo *cinode = CIFS_I(inode);
4823         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
4824         struct TCP_Server_Info *server = tcon->ses->server;
4825         int rc = 0;
4826         bool purge_cache = false;
4827         bool is_deferred = false;
4828         struct cifs_deferred_close *dclose;
4829
4830         wait_on_bit(&cinode->flags, CIFS_INODE_PENDING_WRITERS,
4831                         TASK_UNINTERRUPTIBLE);
4832
4833         server->ops->downgrade_oplock(server, cinode, cfile->oplock_level,
4834                                       cfile->oplock_epoch, &purge_cache);
4835
4836         if (!CIFS_CACHE_WRITE(cinode) && CIFS_CACHE_READ(cinode) &&
4837                                                 cifs_has_mand_locks(cinode)) {
4838                 cifs_dbg(FYI, "Reset oplock to None for inode=%p due to mand locks\n",
4839                          inode);
4840                 cinode->oplock = 0;
4841         }
4842
4843         if (inode && S_ISREG(inode->i_mode)) {
4844                 if (CIFS_CACHE_READ(cinode))
4845                         break_lease(inode, O_RDONLY);
4846                 else
4847                         break_lease(inode, O_WRONLY);
4848                 rc = filemap_fdatawrite(inode->i_mapping);
4849                 if (!CIFS_CACHE_READ(cinode) || purge_cache) {
4850                         rc = filemap_fdatawait(inode->i_mapping);
4851                         mapping_set_error(inode->i_mapping, rc);
4852                         cifs_zap_mapping(inode);
4853                 }
4854                 cifs_dbg(FYI, "Oplock flush inode %p rc %d\n", inode, rc);
4855                 if (CIFS_CACHE_WRITE(cinode))
4856                         goto oplock_break_ack;
4857         }
4858
4859         rc = cifs_push_locks(cfile);
4860         if (rc)
4861                 cifs_dbg(VFS, "Push locks rc = %d\n", rc);
4862
4863 oplock_break_ack:
4864         /*
4865          * releasing stale oplock after recent reconnect of smb session using
4866          * a now incorrect file handle is not a data integrity issue but do
4867          * not bother sending an oplock release if session to server still is
4868          * disconnected since oplock already released by the server
4869          */
4870         if (!cfile->oplock_break_cancelled) {
4871                 rc = tcon->ses->server->ops->oplock_response(tcon, &cfile->fid,
4872                                                              cinode);
4873                 cifs_dbg(FYI, "Oplock release rc = %d\n", rc);
4874         }
4875         /*
4876          * When oplock break is received and there are no active
4877          * file handles but cached, then set the flag oplock_break_received.
4878          * So, new open will not use cached handle.
4879          */
4880         spin_lock(&CIFS_I(inode)->deferred_lock);
4881         is_deferred = cifs_is_deferred_close(cfile, &dclose);
4882         if (is_deferred && cfile->deferred_scheduled) {
4883                 cfile->oplock_break_received = true;
4884                 mod_delayed_work(deferredclose_wq, &cfile->deferred, 0);
4885         }
4886         spin_unlock(&CIFS_I(inode)->deferred_lock);
4887         _cifsFileInfo_put(cfile, false /* do not wait for ourself */, false);
4888         cifs_done_oplock_break(cinode);
4889 }
4890
4891 /*
4892  * The presence of cifs_direct_io() in the address space ops vector
4893  * allowes open() O_DIRECT flags which would have failed otherwise.
4894  *
4895  * In the non-cached mode (mount with cache=none), we shunt off direct read and write requests
4896  * so this method should never be called.
4897  *
4898  * Direct IO is not yet supported in the cached mode. 
4899  */
4900 static ssize_t
4901 cifs_direct_io(struct kiocb *iocb, struct iov_iter *iter)
4902 {
4903         /*
4904          * FIXME
4905          * Eventually need to support direct IO for non forcedirectio mounts
4906          */
4907         return -EINVAL;
4908 }
4909
4910 static int cifs_swap_activate(struct swap_info_struct *sis,
4911                               struct file *swap_file, sector_t *span)
4912 {
4913         struct cifsFileInfo *cfile = swap_file->private_data;
4914         struct inode *inode = swap_file->f_mapping->host;
4915         unsigned long blocks;
4916         long long isize;
4917
4918         cifs_dbg(FYI, "swap activate\n");
4919
4920         spin_lock(&inode->i_lock);
4921         blocks = inode->i_blocks;
4922         isize = inode->i_size;
4923         spin_unlock(&inode->i_lock);
4924         if (blocks*512 < isize) {
4925                 pr_warn("swap activate: swapfile has holes\n");
4926                 return -EINVAL;
4927         }
4928         *span = sis->pages;
4929
4930         pr_warn_once("Swap support over SMB3 is experimental\n");
4931
4932         /*
4933          * TODO: consider adding ACL (or documenting how) to prevent other
4934          * users (on this or other systems) from reading it
4935          */
4936
4937
4938         /* TODO: add sk_set_memalloc(inet) or similar */
4939
4940         if (cfile)
4941                 cfile->swapfile = true;
4942         /*
4943          * TODO: Since file already open, we can't open with DENY_ALL here
4944          * but we could add call to grab a byte range lock to prevent others
4945          * from reading or writing the file
4946          */
4947
4948         return 0;
4949 }
4950
4951 static void cifs_swap_deactivate(struct file *file)
4952 {
4953         struct cifsFileInfo *cfile = file->private_data;
4954
4955         cifs_dbg(FYI, "swap deactivate\n");
4956
4957         /* TODO: undo sk_set_memalloc(inet) will eventually be needed */
4958
4959         if (cfile)
4960                 cfile->swapfile = false;
4961
4962         /* do we need to unpin (or unlock) the file */
4963 }
4964
4965 const struct address_space_operations cifs_addr_ops = {
4966         .readpage = cifs_readpage,
4967         .readpages = cifs_readpages,
4968         .writepage = cifs_writepage,
4969         .writepages = cifs_writepages,
4970         .write_begin = cifs_write_begin,
4971         .write_end = cifs_write_end,
4972         .set_page_dirty = __set_page_dirty_nobuffers,
4973         .releasepage = cifs_release_page,
4974         .direct_IO = cifs_direct_io,
4975         .invalidatepage = cifs_invalidate_page,
4976         .launder_page = cifs_launder_page,
4977         /*
4978          * TODO: investigate and if useful we could add an cifs_migratePage
4979          * helper (under an CONFIG_MIGRATION) in the future, and also
4980          * investigate and add an is_dirty_writeback helper if needed
4981          */
4982         .swap_activate = cifs_swap_activate,
4983         .swap_deactivate = cifs_swap_deactivate,
4984 };
4985
4986 /*
4987  * cifs_readpages requires the server to support a buffer large enough to
4988  * contain the header plus one complete page of data.  Otherwise, we need
4989  * to leave cifs_readpages out of the address space operations.
4990  */
4991 const struct address_space_operations cifs_addr_ops_smallbuf = {
4992         .readpage = cifs_readpage,
4993         .writepage = cifs_writepage,
4994         .writepages = cifs_writepages,
4995         .write_begin = cifs_write_begin,
4996         .write_end = cifs_write_end,
4997         .set_page_dirty = __set_page_dirty_nobuffers,
4998         .releasepage = cifs_release_page,
4999         .invalidatepage = cifs_invalidate_page,
5000         .launder_page = cifs_launder_page,
5001 };