ARM: 9148/1: handle CONFIG_CPU_ENDIAN_BE32 in arch/arm/kernel/head.S
[platform/kernel/linux-rpi.git] / fs / cifs / file.c
1 // SPDX-License-Identifier: LGPL-2.1
2 /*
3  *   fs/cifs/file.c
4  *
5  *   vfs operations that deal with files
6  *
7  *   Copyright (C) International Business Machines  Corp., 2002,2010
8  *   Author(s): Steve French (sfrench@us.ibm.com)
9  *              Jeremy Allison (jra@samba.org)
10  *
11  */
12 #include <linux/fs.h>
13 #include <linux/backing-dev.h>
14 #include <linux/stat.h>
15 #include <linux/fcntl.h>
16 #include <linux/pagemap.h>
17 #include <linux/pagevec.h>
18 #include <linux/writeback.h>
19 #include <linux/task_io_accounting_ops.h>
20 #include <linux/delay.h>
21 #include <linux/mount.h>
22 #include <linux/slab.h>
23 #include <linux/swap.h>
24 #include <linux/mm.h>
25 #include <asm/div64.h>
26 #include "cifsfs.h"
27 #include "cifspdu.h"
28 #include "cifsglob.h"
29 #include "cifsproto.h"
30 #include "cifs_unicode.h"
31 #include "cifs_debug.h"
32 #include "cifs_fs_sb.h"
33 #include "fscache.h"
34 #include "smbdirect.h"
35 #include "fs_context.h"
36 #include "cifs_ioctl.h"
37
38 static inline int cifs_convert_flags(unsigned int flags)
39 {
40         if ((flags & O_ACCMODE) == O_RDONLY)
41                 return GENERIC_READ;
42         else if ((flags & O_ACCMODE) == O_WRONLY)
43                 return GENERIC_WRITE;
44         else if ((flags & O_ACCMODE) == O_RDWR) {
45                 /* GENERIC_ALL is too much permission to request
46                    can cause unnecessary access denied on create */
47                 /* return GENERIC_ALL; */
48                 return (GENERIC_READ | GENERIC_WRITE);
49         }
50
51         return (READ_CONTROL | FILE_WRITE_ATTRIBUTES | FILE_READ_ATTRIBUTES |
52                 FILE_WRITE_EA | FILE_APPEND_DATA | FILE_WRITE_DATA |
53                 FILE_READ_DATA);
54 }
55
56 static u32 cifs_posix_convert_flags(unsigned int flags)
57 {
58         u32 posix_flags = 0;
59
60         if ((flags & O_ACCMODE) == O_RDONLY)
61                 posix_flags = SMB_O_RDONLY;
62         else if ((flags & O_ACCMODE) == O_WRONLY)
63                 posix_flags = SMB_O_WRONLY;
64         else if ((flags & O_ACCMODE) == O_RDWR)
65                 posix_flags = SMB_O_RDWR;
66
67         if (flags & O_CREAT) {
68                 posix_flags |= SMB_O_CREAT;
69                 if (flags & O_EXCL)
70                         posix_flags |= SMB_O_EXCL;
71         } else if (flags & O_EXCL)
72                 cifs_dbg(FYI, "Application %s pid %d has incorrectly set O_EXCL flag but not O_CREAT on file open. Ignoring O_EXCL\n",
73                          current->comm, current->tgid);
74
75         if (flags & O_TRUNC)
76                 posix_flags |= SMB_O_TRUNC;
77         /* be safe and imply O_SYNC for O_DSYNC */
78         if (flags & O_DSYNC)
79                 posix_flags |= SMB_O_SYNC;
80         if (flags & O_DIRECTORY)
81                 posix_flags |= SMB_O_DIRECTORY;
82         if (flags & O_NOFOLLOW)
83                 posix_flags |= SMB_O_NOFOLLOW;
84         if (flags & O_DIRECT)
85                 posix_flags |= SMB_O_DIRECT;
86
87         return posix_flags;
88 }
89
90 static inline int cifs_get_disposition(unsigned int flags)
91 {
92         if ((flags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL))
93                 return FILE_CREATE;
94         else if ((flags & (O_CREAT | O_TRUNC)) == (O_CREAT | O_TRUNC))
95                 return FILE_OVERWRITE_IF;
96         else if ((flags & O_CREAT) == O_CREAT)
97                 return FILE_OPEN_IF;
98         else if ((flags & O_TRUNC) == O_TRUNC)
99                 return FILE_OVERWRITE;
100         else
101                 return FILE_OPEN;
102 }
103
104 int cifs_posix_open(const char *full_path, struct inode **pinode,
105                         struct super_block *sb, int mode, unsigned int f_flags,
106                         __u32 *poplock, __u16 *pnetfid, unsigned int xid)
107 {
108         int rc;
109         FILE_UNIX_BASIC_INFO *presp_data;
110         __u32 posix_flags = 0;
111         struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
112         struct cifs_fattr fattr;
113         struct tcon_link *tlink;
114         struct cifs_tcon *tcon;
115
116         cifs_dbg(FYI, "posix open %s\n", full_path);
117
118         presp_data = kzalloc(sizeof(FILE_UNIX_BASIC_INFO), GFP_KERNEL);
119         if (presp_data == NULL)
120                 return -ENOMEM;
121
122         tlink = cifs_sb_tlink(cifs_sb);
123         if (IS_ERR(tlink)) {
124                 rc = PTR_ERR(tlink);
125                 goto posix_open_ret;
126         }
127
128         tcon = tlink_tcon(tlink);
129         mode &= ~current_umask();
130
131         posix_flags = cifs_posix_convert_flags(f_flags);
132         rc = CIFSPOSIXCreate(xid, tcon, posix_flags, mode, pnetfid, presp_data,
133                              poplock, full_path, cifs_sb->local_nls,
134                              cifs_remap(cifs_sb));
135         cifs_put_tlink(tlink);
136
137         if (rc)
138                 goto posix_open_ret;
139
140         if (presp_data->Type == cpu_to_le32(-1))
141                 goto posix_open_ret; /* open ok, caller does qpathinfo */
142
143         if (!pinode)
144                 goto posix_open_ret; /* caller does not need info */
145
146         cifs_unix_basic_to_fattr(&fattr, presp_data, cifs_sb);
147
148         /* get new inode and set it up */
149         if (*pinode == NULL) {
150                 cifs_fill_uniqueid(sb, &fattr);
151                 *pinode = cifs_iget(sb, &fattr);
152                 if (!*pinode) {
153                         rc = -ENOMEM;
154                         goto posix_open_ret;
155                 }
156         } else {
157                 cifs_revalidate_mapping(*pinode);
158                 rc = cifs_fattr_to_inode(*pinode, &fattr);
159         }
160
161 posix_open_ret:
162         kfree(presp_data);
163         return rc;
164 }
165
166 static int
167 cifs_nt_open(const char *full_path, struct inode *inode, struct cifs_sb_info *cifs_sb,
168              struct cifs_tcon *tcon, unsigned int f_flags, __u32 *oplock,
169              struct cifs_fid *fid, unsigned int xid)
170 {
171         int rc;
172         int desired_access;
173         int disposition;
174         int create_options = CREATE_NOT_DIR;
175         FILE_ALL_INFO *buf;
176         struct TCP_Server_Info *server = tcon->ses->server;
177         struct cifs_open_parms oparms;
178
179         if (!server->ops->open)
180                 return -ENOSYS;
181
182         desired_access = cifs_convert_flags(f_flags);
183
184 /*********************************************************************
185  *  open flag mapping table:
186  *
187  *      POSIX Flag            CIFS Disposition
188  *      ----------            ----------------
189  *      O_CREAT               FILE_OPEN_IF
190  *      O_CREAT | O_EXCL      FILE_CREATE
191  *      O_CREAT | O_TRUNC     FILE_OVERWRITE_IF
192  *      O_TRUNC               FILE_OVERWRITE
193  *      none of the above     FILE_OPEN
194  *
195  *      Note that there is not a direct match between disposition
196  *      FILE_SUPERSEDE (ie create whether or not file exists although
197  *      O_CREAT | O_TRUNC is similar but truncates the existing
198  *      file rather than creating a new file as FILE_SUPERSEDE does
199  *      (which uses the attributes / metadata passed in on open call)
200  *?
201  *?  O_SYNC is a reasonable match to CIFS writethrough flag
202  *?  and the read write flags match reasonably.  O_LARGEFILE
203  *?  is irrelevant because largefile support is always used
204  *?  by this client. Flags O_APPEND, O_DIRECT, O_DIRECTORY,
205  *       O_FASYNC, O_NOFOLLOW, O_NONBLOCK need further investigation
206  *********************************************************************/
207
208         disposition = cifs_get_disposition(f_flags);
209
210         /* BB pass O_SYNC flag through on file attributes .. BB */
211
212         buf = kmalloc(sizeof(FILE_ALL_INFO), GFP_KERNEL);
213         if (!buf)
214                 return -ENOMEM;
215
216         /* O_SYNC also has bit for O_DSYNC so following check picks up either */
217         if (f_flags & O_SYNC)
218                 create_options |= CREATE_WRITE_THROUGH;
219
220         if (f_flags & O_DIRECT)
221                 create_options |= CREATE_NO_BUFFER;
222
223         oparms.tcon = tcon;
224         oparms.cifs_sb = cifs_sb;
225         oparms.desired_access = desired_access;
226         oparms.create_options = cifs_create_options(cifs_sb, create_options);
227         oparms.disposition = disposition;
228         oparms.path = full_path;
229         oparms.fid = fid;
230         oparms.reconnect = false;
231
232         rc = server->ops->open(xid, &oparms, oplock, buf);
233
234         if (rc)
235                 goto out;
236
237         /* TODO: Add support for calling posix query info but with passing in fid */
238         if (tcon->unix_ext)
239                 rc = cifs_get_inode_info_unix(&inode, full_path, inode->i_sb,
240                                               xid);
241         else
242                 rc = cifs_get_inode_info(&inode, full_path, buf, inode->i_sb,
243                                          xid, fid);
244
245         if (rc) {
246                 server->ops->close(xid, tcon, fid);
247                 if (rc == -ESTALE)
248                         rc = -EOPENSTALE;
249         }
250
251 out:
252         kfree(buf);
253         return rc;
254 }
255
256 static bool
257 cifs_has_mand_locks(struct cifsInodeInfo *cinode)
258 {
259         struct cifs_fid_locks *cur;
260         bool has_locks = false;
261
262         down_read(&cinode->lock_sem);
263         list_for_each_entry(cur, &cinode->llist, llist) {
264                 if (!list_empty(&cur->locks)) {
265                         has_locks = true;
266                         break;
267                 }
268         }
269         up_read(&cinode->lock_sem);
270         return has_locks;
271 }
272
273 void
274 cifs_down_write(struct rw_semaphore *sem)
275 {
276         while (!down_write_trylock(sem))
277                 msleep(10);
278 }
279
280 static void cifsFileInfo_put_work(struct work_struct *work);
281
282 struct cifsFileInfo *
283 cifs_new_fileinfo(struct cifs_fid *fid, struct file *file,
284                   struct tcon_link *tlink, __u32 oplock)
285 {
286         struct dentry *dentry = file_dentry(file);
287         struct inode *inode = d_inode(dentry);
288         struct cifsInodeInfo *cinode = CIFS_I(inode);
289         struct cifsFileInfo *cfile;
290         struct cifs_fid_locks *fdlocks;
291         struct cifs_tcon *tcon = tlink_tcon(tlink);
292         struct TCP_Server_Info *server = tcon->ses->server;
293
294         cfile = kzalloc(sizeof(struct cifsFileInfo), GFP_KERNEL);
295         if (cfile == NULL)
296                 return cfile;
297
298         fdlocks = kzalloc(sizeof(struct cifs_fid_locks), GFP_KERNEL);
299         if (!fdlocks) {
300                 kfree(cfile);
301                 return NULL;
302         }
303
304         INIT_LIST_HEAD(&fdlocks->locks);
305         fdlocks->cfile = cfile;
306         cfile->llist = fdlocks;
307
308         cfile->count = 1;
309         cfile->pid = current->tgid;
310         cfile->uid = current_fsuid();
311         cfile->dentry = dget(dentry);
312         cfile->f_flags = file->f_flags;
313         cfile->invalidHandle = false;
314         cfile->deferred_close_scheduled = false;
315         cfile->tlink = cifs_get_tlink(tlink);
316         INIT_WORK(&cfile->oplock_break, cifs_oplock_break);
317         INIT_WORK(&cfile->put, cifsFileInfo_put_work);
318         INIT_DELAYED_WORK(&cfile->deferred, smb2_deferred_work_close);
319         mutex_init(&cfile->fh_mutex);
320         spin_lock_init(&cfile->file_info_lock);
321
322         cifs_sb_active(inode->i_sb);
323
324         /*
325          * If the server returned a read oplock and we have mandatory brlocks,
326          * set oplock level to None.
327          */
328         if (server->ops->is_read_op(oplock) && cifs_has_mand_locks(cinode)) {
329                 cifs_dbg(FYI, "Reset oplock val from read to None due to mand locks\n");
330                 oplock = 0;
331         }
332
333         cifs_down_write(&cinode->lock_sem);
334         list_add(&fdlocks->llist, &cinode->llist);
335         up_write(&cinode->lock_sem);
336
337         spin_lock(&tcon->open_file_lock);
338         if (fid->pending_open->oplock != CIFS_OPLOCK_NO_CHANGE && oplock)
339                 oplock = fid->pending_open->oplock;
340         list_del(&fid->pending_open->olist);
341
342         fid->purge_cache = false;
343         server->ops->set_fid(cfile, fid, oplock);
344
345         list_add(&cfile->tlist, &tcon->openFileList);
346         atomic_inc(&tcon->num_local_opens);
347
348         /* if readable file instance put first in list*/
349         spin_lock(&cinode->open_file_lock);
350         if (file->f_mode & FMODE_READ)
351                 list_add(&cfile->flist, &cinode->openFileList);
352         else
353                 list_add_tail(&cfile->flist, &cinode->openFileList);
354         spin_unlock(&cinode->open_file_lock);
355         spin_unlock(&tcon->open_file_lock);
356
357         if (fid->purge_cache)
358                 cifs_zap_mapping(inode);
359
360         file->private_data = cfile;
361         return cfile;
362 }
363
364 struct cifsFileInfo *
365 cifsFileInfo_get(struct cifsFileInfo *cifs_file)
366 {
367         spin_lock(&cifs_file->file_info_lock);
368         cifsFileInfo_get_locked(cifs_file);
369         spin_unlock(&cifs_file->file_info_lock);
370         return cifs_file;
371 }
372
373 static void cifsFileInfo_put_final(struct cifsFileInfo *cifs_file)
374 {
375         struct inode *inode = d_inode(cifs_file->dentry);
376         struct cifsInodeInfo *cifsi = CIFS_I(inode);
377         struct cifsLockInfo *li, *tmp;
378         struct super_block *sb = inode->i_sb;
379
380         cifs_fscache_release_inode_cookie(inode);
381
382         /*
383          * Delete any outstanding lock records. We'll lose them when the file
384          * is closed anyway.
385          */
386         cifs_down_write(&cifsi->lock_sem);
387         list_for_each_entry_safe(li, tmp, &cifs_file->llist->locks, llist) {
388                 list_del(&li->llist);
389                 cifs_del_lock_waiters(li);
390                 kfree(li);
391         }
392         list_del(&cifs_file->llist->llist);
393         kfree(cifs_file->llist);
394         up_write(&cifsi->lock_sem);
395
396         cifs_put_tlink(cifs_file->tlink);
397         dput(cifs_file->dentry);
398         cifs_sb_deactive(sb);
399         kfree(cifs_file);
400 }
401
402 static void cifsFileInfo_put_work(struct work_struct *work)
403 {
404         struct cifsFileInfo *cifs_file = container_of(work,
405                         struct cifsFileInfo, put);
406
407         cifsFileInfo_put_final(cifs_file);
408 }
409
410 /**
411  * cifsFileInfo_put - release a reference of file priv data
412  *
413  * Always potentially wait for oplock handler. See _cifsFileInfo_put().
414  *
415  * @cifs_file:  cifs/smb3 specific info (eg refcounts) for an open file
416  */
417 void cifsFileInfo_put(struct cifsFileInfo *cifs_file)
418 {
419         _cifsFileInfo_put(cifs_file, true, true);
420 }
421
422 /**
423  * _cifsFileInfo_put - release a reference of file priv data
424  *
425  * This may involve closing the filehandle @cifs_file out on the
426  * server. Must be called without holding tcon->open_file_lock,
427  * cinode->open_file_lock and cifs_file->file_info_lock.
428  *
429  * If @wait_for_oplock_handler is true and we are releasing the last
430  * reference, wait for any running oplock break handler of the file
431  * and cancel any pending one.
432  *
433  * @cifs_file:  cifs/smb3 specific info (eg refcounts) for an open file
434  * @wait_oplock_handler: must be false if called from oplock_break_handler
435  * @offload:    not offloaded on close and oplock breaks
436  *
437  */
438 void _cifsFileInfo_put(struct cifsFileInfo *cifs_file,
439                        bool wait_oplock_handler, bool offload)
440 {
441         struct inode *inode = d_inode(cifs_file->dentry);
442         struct cifs_tcon *tcon = tlink_tcon(cifs_file->tlink);
443         struct TCP_Server_Info *server = tcon->ses->server;
444         struct cifsInodeInfo *cifsi = CIFS_I(inode);
445         struct super_block *sb = inode->i_sb;
446         struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
447         struct cifs_fid fid;
448         struct cifs_pending_open open;
449         bool oplock_break_cancelled;
450
451         spin_lock(&tcon->open_file_lock);
452         spin_lock(&cifsi->open_file_lock);
453         spin_lock(&cifs_file->file_info_lock);
454         if (--cifs_file->count > 0) {
455                 spin_unlock(&cifs_file->file_info_lock);
456                 spin_unlock(&cifsi->open_file_lock);
457                 spin_unlock(&tcon->open_file_lock);
458                 return;
459         }
460         spin_unlock(&cifs_file->file_info_lock);
461
462         if (server->ops->get_lease_key)
463                 server->ops->get_lease_key(inode, &fid);
464
465         /* store open in pending opens to make sure we don't miss lease break */
466         cifs_add_pending_open_locked(&fid, cifs_file->tlink, &open);
467
468         /* remove it from the lists */
469         list_del(&cifs_file->flist);
470         list_del(&cifs_file->tlist);
471         atomic_dec(&tcon->num_local_opens);
472
473         if (list_empty(&cifsi->openFileList)) {
474                 cifs_dbg(FYI, "closing last open instance for inode %p\n",
475                          d_inode(cifs_file->dentry));
476                 /*
477                  * In strict cache mode we need invalidate mapping on the last
478                  * close  because it may cause a error when we open this file
479                  * again and get at least level II oplock.
480                  */
481                 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO)
482                         set_bit(CIFS_INO_INVALID_MAPPING, &cifsi->flags);
483                 cifs_set_oplock_level(cifsi, 0);
484         }
485
486         spin_unlock(&cifsi->open_file_lock);
487         spin_unlock(&tcon->open_file_lock);
488
489         oplock_break_cancelled = wait_oplock_handler ?
490                 cancel_work_sync(&cifs_file->oplock_break) : false;
491
492         if (!tcon->need_reconnect && !cifs_file->invalidHandle) {
493                 struct TCP_Server_Info *server = tcon->ses->server;
494                 unsigned int xid;
495
496                 xid = get_xid();
497                 if (server->ops->close_getattr)
498                         server->ops->close_getattr(xid, tcon, cifs_file);
499                 else if (server->ops->close)
500                         server->ops->close(xid, tcon, &cifs_file->fid);
501                 _free_xid(xid);
502         }
503
504         if (oplock_break_cancelled)
505                 cifs_done_oplock_break(cifsi);
506
507         cifs_del_pending_open(&open);
508
509         if (offload)
510                 queue_work(fileinfo_put_wq, &cifs_file->put);
511         else
512                 cifsFileInfo_put_final(cifs_file);
513 }
514
515 int cifs_open(struct inode *inode, struct file *file)
516
517 {
518         int rc = -EACCES;
519         unsigned int xid;
520         __u32 oplock;
521         struct cifs_sb_info *cifs_sb;
522         struct TCP_Server_Info *server;
523         struct cifs_tcon *tcon;
524         struct tcon_link *tlink;
525         struct cifsFileInfo *cfile = NULL;
526         void *page;
527         const char *full_path;
528         bool posix_open_ok = false;
529         struct cifs_fid fid;
530         struct cifs_pending_open open;
531
532         xid = get_xid();
533
534         cifs_sb = CIFS_SB(inode->i_sb);
535         if (unlikely(cifs_forced_shutdown(cifs_sb))) {
536                 free_xid(xid);
537                 return -EIO;
538         }
539
540         tlink = cifs_sb_tlink(cifs_sb);
541         if (IS_ERR(tlink)) {
542                 free_xid(xid);
543                 return PTR_ERR(tlink);
544         }
545         tcon = tlink_tcon(tlink);
546         server = tcon->ses->server;
547
548         page = alloc_dentry_path();
549         full_path = build_path_from_dentry(file_dentry(file), page);
550         if (IS_ERR(full_path)) {
551                 rc = PTR_ERR(full_path);
552                 goto out;
553         }
554
555         cifs_dbg(FYI, "inode = 0x%p file flags are 0x%x for %s\n",
556                  inode, file->f_flags, full_path);
557
558         if (file->f_flags & O_DIRECT &&
559             cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO) {
560                 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_BRL)
561                         file->f_op = &cifs_file_direct_nobrl_ops;
562                 else
563                         file->f_op = &cifs_file_direct_ops;
564         }
565
566         /* Get the cached handle as SMB2 close is deferred */
567         rc = cifs_get_readable_path(tcon, full_path, &cfile);
568         if (rc == 0) {
569                 if (file->f_flags == cfile->f_flags) {
570                         file->private_data = cfile;
571                         spin_lock(&CIFS_I(inode)->deferred_lock);
572                         cifs_del_deferred_close(cfile);
573                         spin_unlock(&CIFS_I(inode)->deferred_lock);
574                         goto out;
575                 } else {
576                         _cifsFileInfo_put(cfile, true, false);
577                 }
578         }
579
580         if (server->oplocks)
581                 oplock = REQ_OPLOCK;
582         else
583                 oplock = 0;
584
585         if (!tcon->broken_posix_open && tcon->unix_ext &&
586             cap_unix(tcon->ses) && (CIFS_UNIX_POSIX_PATH_OPS_CAP &
587                                 le64_to_cpu(tcon->fsUnixInfo.Capability))) {
588                 /* can not refresh inode info since size could be stale */
589                 rc = cifs_posix_open(full_path, &inode, inode->i_sb,
590                                 cifs_sb->ctx->file_mode /* ignored */,
591                                 file->f_flags, &oplock, &fid.netfid, xid);
592                 if (rc == 0) {
593                         cifs_dbg(FYI, "posix open succeeded\n");
594                         posix_open_ok = true;
595                 } else if ((rc == -EINVAL) || (rc == -EOPNOTSUPP)) {
596                         if (tcon->ses->serverNOS)
597                                 cifs_dbg(VFS, "server %s of type %s returned unexpected error on SMB posix open, disabling posix open support. Check if server update available.\n",
598                                          tcon->ses->ip_addr,
599                                          tcon->ses->serverNOS);
600                         tcon->broken_posix_open = true;
601                 } else if ((rc != -EIO) && (rc != -EREMOTE) &&
602                          (rc != -EOPNOTSUPP)) /* path not found or net err */
603                         goto out;
604                 /*
605                  * Else fallthrough to retry open the old way on network i/o
606                  * or DFS errors.
607                  */
608         }
609
610         if (server->ops->get_lease_key)
611                 server->ops->get_lease_key(inode, &fid);
612
613         cifs_add_pending_open(&fid, tlink, &open);
614
615         if (!posix_open_ok) {
616                 if (server->ops->get_lease_key)
617                         server->ops->get_lease_key(inode, &fid);
618
619                 rc = cifs_nt_open(full_path, inode, cifs_sb, tcon,
620                                   file->f_flags, &oplock, &fid, xid);
621                 if (rc) {
622                         cifs_del_pending_open(&open);
623                         goto out;
624                 }
625         }
626
627         cfile = cifs_new_fileinfo(&fid, file, tlink, oplock);
628         if (cfile == NULL) {
629                 if (server->ops->close)
630                         server->ops->close(xid, tcon, &fid);
631                 cifs_del_pending_open(&open);
632                 rc = -ENOMEM;
633                 goto out;
634         }
635
636         cifs_fscache_set_inode_cookie(inode, file);
637
638         if ((oplock & CIFS_CREATE_ACTION) && !posix_open_ok && tcon->unix_ext) {
639                 /*
640                  * Time to set mode which we can not set earlier due to
641                  * problems creating new read-only files.
642                  */
643                 struct cifs_unix_set_info_args args = {
644                         .mode   = inode->i_mode,
645                         .uid    = INVALID_UID, /* no change */
646                         .gid    = INVALID_GID, /* no change */
647                         .ctime  = NO_CHANGE_64,
648                         .atime  = NO_CHANGE_64,
649                         .mtime  = NO_CHANGE_64,
650                         .device = 0,
651                 };
652                 CIFSSMBUnixSetFileInfo(xid, tcon, &args, fid.netfid,
653                                        cfile->pid);
654         }
655
656 out:
657         free_dentry_path(page);
658         free_xid(xid);
659         cifs_put_tlink(tlink);
660         return rc;
661 }
662
663 static int cifs_push_posix_locks(struct cifsFileInfo *cfile);
664
665 /*
666  * Try to reacquire byte range locks that were released when session
667  * to server was lost.
668  */
669 static int
670 cifs_relock_file(struct cifsFileInfo *cfile)
671 {
672         struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
673         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
674         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
675         int rc = 0;
676
677         down_read_nested(&cinode->lock_sem, SINGLE_DEPTH_NESTING);
678         if (cinode->can_cache_brlcks) {
679                 /* can cache locks - no need to relock */
680                 up_read(&cinode->lock_sem);
681                 return rc;
682         }
683
684         if (cap_unix(tcon->ses) &&
685             (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
686             ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
687                 rc = cifs_push_posix_locks(cfile);
688         else
689                 rc = tcon->ses->server->ops->push_mand_locks(cfile);
690
691         up_read(&cinode->lock_sem);
692         return rc;
693 }
694
695 static int
696 cifs_reopen_file(struct cifsFileInfo *cfile, bool can_flush)
697 {
698         int rc = -EACCES;
699         unsigned int xid;
700         __u32 oplock;
701         struct cifs_sb_info *cifs_sb;
702         struct cifs_tcon *tcon;
703         struct TCP_Server_Info *server;
704         struct cifsInodeInfo *cinode;
705         struct inode *inode;
706         void *page;
707         const char *full_path;
708         int desired_access;
709         int disposition = FILE_OPEN;
710         int create_options = CREATE_NOT_DIR;
711         struct cifs_open_parms oparms;
712
713         xid = get_xid();
714         mutex_lock(&cfile->fh_mutex);
715         if (!cfile->invalidHandle) {
716                 mutex_unlock(&cfile->fh_mutex);
717                 free_xid(xid);
718                 return 0;
719         }
720
721         inode = d_inode(cfile->dentry);
722         cifs_sb = CIFS_SB(inode->i_sb);
723         tcon = tlink_tcon(cfile->tlink);
724         server = tcon->ses->server;
725
726         /*
727          * Can not grab rename sem here because various ops, including those
728          * that already have the rename sem can end up causing writepage to get
729          * called and if the server was down that means we end up here, and we
730          * can never tell if the caller already has the rename_sem.
731          */
732         page = alloc_dentry_path();
733         full_path = build_path_from_dentry(cfile->dentry, page);
734         if (IS_ERR(full_path)) {
735                 mutex_unlock(&cfile->fh_mutex);
736                 free_dentry_path(page);
737                 free_xid(xid);
738                 return PTR_ERR(full_path);
739         }
740
741         cifs_dbg(FYI, "inode = 0x%p file flags 0x%x for %s\n",
742                  inode, cfile->f_flags, full_path);
743
744         if (tcon->ses->server->oplocks)
745                 oplock = REQ_OPLOCK;
746         else
747                 oplock = 0;
748
749         if (tcon->unix_ext && cap_unix(tcon->ses) &&
750             (CIFS_UNIX_POSIX_PATH_OPS_CAP &
751                                 le64_to_cpu(tcon->fsUnixInfo.Capability))) {
752                 /*
753                  * O_CREAT, O_EXCL and O_TRUNC already had their effect on the
754                  * original open. Must mask them off for a reopen.
755                  */
756                 unsigned int oflags = cfile->f_flags &
757                                                 ~(O_CREAT | O_EXCL | O_TRUNC);
758
759                 rc = cifs_posix_open(full_path, NULL, inode->i_sb,
760                                      cifs_sb->ctx->file_mode /* ignored */,
761                                      oflags, &oplock, &cfile->fid.netfid, xid);
762                 if (rc == 0) {
763                         cifs_dbg(FYI, "posix reopen succeeded\n");
764                         oparms.reconnect = true;
765                         goto reopen_success;
766                 }
767                 /*
768                  * fallthrough to retry open the old way on errors, especially
769                  * in the reconnect path it is important to retry hard
770                  */
771         }
772
773         desired_access = cifs_convert_flags(cfile->f_flags);
774
775         /* O_SYNC also has bit for O_DSYNC so following check picks up either */
776         if (cfile->f_flags & O_SYNC)
777                 create_options |= CREATE_WRITE_THROUGH;
778
779         if (cfile->f_flags & O_DIRECT)
780                 create_options |= CREATE_NO_BUFFER;
781
782         if (server->ops->get_lease_key)
783                 server->ops->get_lease_key(inode, &cfile->fid);
784
785         oparms.tcon = tcon;
786         oparms.cifs_sb = cifs_sb;
787         oparms.desired_access = desired_access;
788         oparms.create_options = cifs_create_options(cifs_sb, create_options);
789         oparms.disposition = disposition;
790         oparms.path = full_path;
791         oparms.fid = &cfile->fid;
792         oparms.reconnect = true;
793
794         /*
795          * Can not refresh inode by passing in file_info buf to be returned by
796          * ops->open and then calling get_inode_info with returned buf since
797          * file might have write behind data that needs to be flushed and server
798          * version of file size can be stale. If we knew for sure that inode was
799          * not dirty locally we could do this.
800          */
801         rc = server->ops->open(xid, &oparms, &oplock, NULL);
802         if (rc == -ENOENT && oparms.reconnect == false) {
803                 /* durable handle timeout is expired - open the file again */
804                 rc = server->ops->open(xid, &oparms, &oplock, NULL);
805                 /* indicate that we need to relock the file */
806                 oparms.reconnect = true;
807         }
808
809         if (rc) {
810                 mutex_unlock(&cfile->fh_mutex);
811                 cifs_dbg(FYI, "cifs_reopen returned 0x%x\n", rc);
812                 cifs_dbg(FYI, "oplock: %d\n", oplock);
813                 goto reopen_error_exit;
814         }
815
816 reopen_success:
817         cfile->invalidHandle = false;
818         mutex_unlock(&cfile->fh_mutex);
819         cinode = CIFS_I(inode);
820
821         if (can_flush) {
822                 rc = filemap_write_and_wait(inode->i_mapping);
823                 if (!is_interrupt_error(rc))
824                         mapping_set_error(inode->i_mapping, rc);
825
826                 if (tcon->posix_extensions)
827                         rc = smb311_posix_get_inode_info(&inode, full_path, inode->i_sb, xid);
828                 else if (tcon->unix_ext)
829                         rc = cifs_get_inode_info_unix(&inode, full_path,
830                                                       inode->i_sb, xid);
831                 else
832                         rc = cifs_get_inode_info(&inode, full_path, NULL,
833                                                  inode->i_sb, xid, NULL);
834         }
835         /*
836          * Else we are writing out data to server already and could deadlock if
837          * we tried to flush data, and since we do not know if we have data that
838          * would invalidate the current end of file on the server we can not go
839          * to the server to get the new inode info.
840          */
841
842         /*
843          * If the server returned a read oplock and we have mandatory brlocks,
844          * set oplock level to None.
845          */
846         if (server->ops->is_read_op(oplock) && cifs_has_mand_locks(cinode)) {
847                 cifs_dbg(FYI, "Reset oplock val from read to None due to mand locks\n");
848                 oplock = 0;
849         }
850
851         server->ops->set_fid(cfile, &cfile->fid, oplock);
852         if (oparms.reconnect)
853                 cifs_relock_file(cfile);
854
855 reopen_error_exit:
856         free_dentry_path(page);
857         free_xid(xid);
858         return rc;
859 }
860
861 void smb2_deferred_work_close(struct work_struct *work)
862 {
863         struct cifsFileInfo *cfile = container_of(work,
864                         struct cifsFileInfo, deferred.work);
865
866         spin_lock(&CIFS_I(d_inode(cfile->dentry))->deferred_lock);
867         cifs_del_deferred_close(cfile);
868         cfile->deferred_close_scheduled = false;
869         spin_unlock(&CIFS_I(d_inode(cfile->dentry))->deferred_lock);
870         _cifsFileInfo_put(cfile, true, false);
871 }
872
873 int cifs_close(struct inode *inode, struct file *file)
874 {
875         struct cifsFileInfo *cfile;
876         struct cifsInodeInfo *cinode = CIFS_I(inode);
877         struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
878         struct cifs_deferred_close *dclose;
879
880         if (file->private_data != NULL) {
881                 cfile = file->private_data;
882                 file->private_data = NULL;
883                 dclose = kmalloc(sizeof(struct cifs_deferred_close), GFP_KERNEL);
884                 if ((cinode->oplock == CIFS_CACHE_RHW_FLG) &&
885                     cinode->lease_granted &&
886                     dclose) {
887                         if (test_bit(CIFS_INO_MODIFIED_ATTR, &cinode->flags)) {
888                                 inode->i_ctime = inode->i_mtime = current_time(inode);
889                                 cifs_fscache_update_inode_cookie(inode);
890                         }
891                         spin_lock(&cinode->deferred_lock);
892                         cifs_add_deferred_close(cfile, dclose);
893                         if (cfile->deferred_close_scheduled &&
894                             delayed_work_pending(&cfile->deferred)) {
895                                 /*
896                                  * If there is no pending work, mod_delayed_work queues new work.
897                                  * So, Increase the ref count to avoid use-after-free.
898                                  */
899                                 if (!mod_delayed_work(deferredclose_wq,
900                                                 &cfile->deferred, cifs_sb->ctx->acregmax))
901                                         cifsFileInfo_get(cfile);
902                         } else {
903                                 /* Deferred close for files */
904                                 queue_delayed_work(deferredclose_wq,
905                                                 &cfile->deferred, cifs_sb->ctx->acregmax);
906                                 cfile->deferred_close_scheduled = true;
907                                 spin_unlock(&cinode->deferred_lock);
908                                 return 0;
909                         }
910                         spin_unlock(&cinode->deferred_lock);
911                         _cifsFileInfo_put(cfile, true, false);
912                 } else {
913                         _cifsFileInfo_put(cfile, true, false);
914                         kfree(dclose);
915                 }
916         }
917
918         /* return code from the ->release op is always ignored */
919         return 0;
920 }
921
922 void
923 cifs_reopen_persistent_handles(struct cifs_tcon *tcon)
924 {
925         struct cifsFileInfo *open_file;
926         struct list_head *tmp;
927         struct list_head *tmp1;
928         struct list_head tmp_list;
929
930         if (!tcon->use_persistent || !tcon->need_reopen_files)
931                 return;
932
933         tcon->need_reopen_files = false;
934
935         cifs_dbg(FYI, "Reopen persistent handles\n");
936         INIT_LIST_HEAD(&tmp_list);
937
938         /* list all files open on tree connection, reopen resilient handles  */
939         spin_lock(&tcon->open_file_lock);
940         list_for_each(tmp, &tcon->openFileList) {
941                 open_file = list_entry(tmp, struct cifsFileInfo, tlist);
942                 if (!open_file->invalidHandle)
943                         continue;
944                 cifsFileInfo_get(open_file);
945                 list_add_tail(&open_file->rlist, &tmp_list);
946         }
947         spin_unlock(&tcon->open_file_lock);
948
949         list_for_each_safe(tmp, tmp1, &tmp_list) {
950                 open_file = list_entry(tmp, struct cifsFileInfo, rlist);
951                 if (cifs_reopen_file(open_file, false /* do not flush */))
952                         tcon->need_reopen_files = true;
953                 list_del_init(&open_file->rlist);
954                 cifsFileInfo_put(open_file);
955         }
956 }
957
958 int cifs_closedir(struct inode *inode, struct file *file)
959 {
960         int rc = 0;
961         unsigned int xid;
962         struct cifsFileInfo *cfile = file->private_data;
963         struct cifs_tcon *tcon;
964         struct TCP_Server_Info *server;
965         char *buf;
966
967         cifs_dbg(FYI, "Closedir inode = 0x%p\n", inode);
968
969         if (cfile == NULL)
970                 return rc;
971
972         xid = get_xid();
973         tcon = tlink_tcon(cfile->tlink);
974         server = tcon->ses->server;
975
976         cifs_dbg(FYI, "Freeing private data in close dir\n");
977         spin_lock(&cfile->file_info_lock);
978         if (server->ops->dir_needs_close(cfile)) {
979                 cfile->invalidHandle = true;
980                 spin_unlock(&cfile->file_info_lock);
981                 if (server->ops->close_dir)
982                         rc = server->ops->close_dir(xid, tcon, &cfile->fid);
983                 else
984                         rc = -ENOSYS;
985                 cifs_dbg(FYI, "Closing uncompleted readdir with rc %d\n", rc);
986                 /* not much we can do if it fails anyway, ignore rc */
987                 rc = 0;
988         } else
989                 spin_unlock(&cfile->file_info_lock);
990
991         buf = cfile->srch_inf.ntwrk_buf_start;
992         if (buf) {
993                 cifs_dbg(FYI, "closedir free smb buf in srch struct\n");
994                 cfile->srch_inf.ntwrk_buf_start = NULL;
995                 if (cfile->srch_inf.smallBuf)
996                         cifs_small_buf_release(buf);
997                 else
998                         cifs_buf_release(buf);
999         }
1000
1001         cifs_put_tlink(cfile->tlink);
1002         kfree(file->private_data);
1003         file->private_data = NULL;
1004         /* BB can we lock the filestruct while this is going on? */
1005         free_xid(xid);
1006         return rc;
1007 }
1008
1009 static struct cifsLockInfo *
1010 cifs_lock_init(__u64 offset, __u64 length, __u8 type, __u16 flags)
1011 {
1012         struct cifsLockInfo *lock =
1013                 kmalloc(sizeof(struct cifsLockInfo), GFP_KERNEL);
1014         if (!lock)
1015                 return lock;
1016         lock->offset = offset;
1017         lock->length = length;
1018         lock->type = type;
1019         lock->pid = current->tgid;
1020         lock->flags = flags;
1021         INIT_LIST_HEAD(&lock->blist);
1022         init_waitqueue_head(&lock->block_q);
1023         return lock;
1024 }
1025
1026 void
1027 cifs_del_lock_waiters(struct cifsLockInfo *lock)
1028 {
1029         struct cifsLockInfo *li, *tmp;
1030         list_for_each_entry_safe(li, tmp, &lock->blist, blist) {
1031                 list_del_init(&li->blist);
1032                 wake_up(&li->block_q);
1033         }
1034 }
1035
1036 #define CIFS_LOCK_OP    0
1037 #define CIFS_READ_OP    1
1038 #define CIFS_WRITE_OP   2
1039
1040 /* @rw_check : 0 - no op, 1 - read, 2 - write */
1041 static bool
1042 cifs_find_fid_lock_conflict(struct cifs_fid_locks *fdlocks, __u64 offset,
1043                             __u64 length, __u8 type, __u16 flags,
1044                             struct cifsFileInfo *cfile,
1045                             struct cifsLockInfo **conf_lock, int rw_check)
1046 {
1047         struct cifsLockInfo *li;
1048         struct cifsFileInfo *cur_cfile = fdlocks->cfile;
1049         struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
1050
1051         list_for_each_entry(li, &fdlocks->locks, llist) {
1052                 if (offset + length <= li->offset ||
1053                     offset >= li->offset + li->length)
1054                         continue;
1055                 if (rw_check != CIFS_LOCK_OP && current->tgid == li->pid &&
1056                     server->ops->compare_fids(cfile, cur_cfile)) {
1057                         /* shared lock prevents write op through the same fid */
1058                         if (!(li->type & server->vals->shared_lock_type) ||
1059                             rw_check != CIFS_WRITE_OP)
1060                                 continue;
1061                 }
1062                 if ((type & server->vals->shared_lock_type) &&
1063                     ((server->ops->compare_fids(cfile, cur_cfile) &&
1064                      current->tgid == li->pid) || type == li->type))
1065                         continue;
1066                 if (rw_check == CIFS_LOCK_OP &&
1067                     (flags & FL_OFDLCK) && (li->flags & FL_OFDLCK) &&
1068                     server->ops->compare_fids(cfile, cur_cfile))
1069                         continue;
1070                 if (conf_lock)
1071                         *conf_lock = li;
1072                 return true;
1073         }
1074         return false;
1075 }
1076
1077 bool
1078 cifs_find_lock_conflict(struct cifsFileInfo *cfile, __u64 offset, __u64 length,
1079                         __u8 type, __u16 flags,
1080                         struct cifsLockInfo **conf_lock, int rw_check)
1081 {
1082         bool rc = false;
1083         struct cifs_fid_locks *cur;
1084         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1085
1086         list_for_each_entry(cur, &cinode->llist, llist) {
1087                 rc = cifs_find_fid_lock_conflict(cur, offset, length, type,
1088                                                  flags, cfile, conf_lock,
1089                                                  rw_check);
1090                 if (rc)
1091                         break;
1092         }
1093
1094         return rc;
1095 }
1096
1097 /*
1098  * Check if there is another lock that prevents us to set the lock (mandatory
1099  * style). If such a lock exists, update the flock structure with its
1100  * properties. Otherwise, set the flock type to F_UNLCK if we can cache brlocks
1101  * or leave it the same if we can't. Returns 0 if we don't need to request to
1102  * the server or 1 otherwise.
1103  */
1104 static int
1105 cifs_lock_test(struct cifsFileInfo *cfile, __u64 offset, __u64 length,
1106                __u8 type, struct file_lock *flock)
1107 {
1108         int rc = 0;
1109         struct cifsLockInfo *conf_lock;
1110         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1111         struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
1112         bool exist;
1113
1114         down_read(&cinode->lock_sem);
1115
1116         exist = cifs_find_lock_conflict(cfile, offset, length, type,
1117                                         flock->fl_flags, &conf_lock,
1118                                         CIFS_LOCK_OP);
1119         if (exist) {
1120                 flock->fl_start = conf_lock->offset;
1121                 flock->fl_end = conf_lock->offset + conf_lock->length - 1;
1122                 flock->fl_pid = conf_lock->pid;
1123                 if (conf_lock->type & server->vals->shared_lock_type)
1124                         flock->fl_type = F_RDLCK;
1125                 else
1126                         flock->fl_type = F_WRLCK;
1127         } else if (!cinode->can_cache_brlcks)
1128                 rc = 1;
1129         else
1130                 flock->fl_type = F_UNLCK;
1131
1132         up_read(&cinode->lock_sem);
1133         return rc;
1134 }
1135
1136 static void
1137 cifs_lock_add(struct cifsFileInfo *cfile, struct cifsLockInfo *lock)
1138 {
1139         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1140         cifs_down_write(&cinode->lock_sem);
1141         list_add_tail(&lock->llist, &cfile->llist->locks);
1142         up_write(&cinode->lock_sem);
1143 }
1144
1145 /*
1146  * Set the byte-range lock (mandatory style). Returns:
1147  * 1) 0, if we set the lock and don't need to request to the server;
1148  * 2) 1, if no locks prevent us but we need to request to the server;
1149  * 3) -EACCES, if there is a lock that prevents us and wait is false.
1150  */
1151 static int
1152 cifs_lock_add_if(struct cifsFileInfo *cfile, struct cifsLockInfo *lock,
1153                  bool wait)
1154 {
1155         struct cifsLockInfo *conf_lock;
1156         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1157         bool exist;
1158         int rc = 0;
1159
1160 try_again:
1161         exist = false;
1162         cifs_down_write(&cinode->lock_sem);
1163
1164         exist = cifs_find_lock_conflict(cfile, lock->offset, lock->length,
1165                                         lock->type, lock->flags, &conf_lock,
1166                                         CIFS_LOCK_OP);
1167         if (!exist && cinode->can_cache_brlcks) {
1168                 list_add_tail(&lock->llist, &cfile->llist->locks);
1169                 up_write(&cinode->lock_sem);
1170                 return rc;
1171         }
1172
1173         if (!exist)
1174                 rc = 1;
1175         else if (!wait)
1176                 rc = -EACCES;
1177         else {
1178                 list_add_tail(&lock->blist, &conf_lock->blist);
1179                 up_write(&cinode->lock_sem);
1180                 rc = wait_event_interruptible(lock->block_q,
1181                                         (lock->blist.prev == &lock->blist) &&
1182                                         (lock->blist.next == &lock->blist));
1183                 if (!rc)
1184                         goto try_again;
1185                 cifs_down_write(&cinode->lock_sem);
1186                 list_del_init(&lock->blist);
1187         }
1188
1189         up_write(&cinode->lock_sem);
1190         return rc;
1191 }
1192
1193 /*
1194  * Check if there is another lock that prevents us to set the lock (posix
1195  * style). If such a lock exists, update the flock structure with its
1196  * properties. Otherwise, set the flock type to F_UNLCK if we can cache brlocks
1197  * or leave it the same if we can't. Returns 0 if we don't need to request to
1198  * the server or 1 otherwise.
1199  */
1200 static int
1201 cifs_posix_lock_test(struct file *file, struct file_lock *flock)
1202 {
1203         int rc = 0;
1204         struct cifsInodeInfo *cinode = CIFS_I(file_inode(file));
1205         unsigned char saved_type = flock->fl_type;
1206
1207         if ((flock->fl_flags & FL_POSIX) == 0)
1208                 return 1;
1209
1210         down_read(&cinode->lock_sem);
1211         posix_test_lock(file, flock);
1212
1213         if (flock->fl_type == F_UNLCK && !cinode->can_cache_brlcks) {
1214                 flock->fl_type = saved_type;
1215                 rc = 1;
1216         }
1217
1218         up_read(&cinode->lock_sem);
1219         return rc;
1220 }
1221
1222 /*
1223  * Set the byte-range lock (posix style). Returns:
1224  * 1) <0, if the error occurs while setting the lock;
1225  * 2) 0, if we set the lock and don't need to request to the server;
1226  * 3) FILE_LOCK_DEFERRED, if we will wait for some other file_lock;
1227  * 4) FILE_LOCK_DEFERRED + 1, if we need to request to the server.
1228  */
1229 static int
1230 cifs_posix_lock_set(struct file *file, struct file_lock *flock)
1231 {
1232         struct cifsInodeInfo *cinode = CIFS_I(file_inode(file));
1233         int rc = FILE_LOCK_DEFERRED + 1;
1234
1235         if ((flock->fl_flags & FL_POSIX) == 0)
1236                 return rc;
1237
1238         cifs_down_write(&cinode->lock_sem);
1239         if (!cinode->can_cache_brlcks) {
1240                 up_write(&cinode->lock_sem);
1241                 return rc;
1242         }
1243
1244         rc = posix_lock_file(file, flock, NULL);
1245         up_write(&cinode->lock_sem);
1246         return rc;
1247 }
1248
1249 int
1250 cifs_push_mandatory_locks(struct cifsFileInfo *cfile)
1251 {
1252         unsigned int xid;
1253         int rc = 0, stored_rc;
1254         struct cifsLockInfo *li, *tmp;
1255         struct cifs_tcon *tcon;
1256         unsigned int num, max_num, max_buf;
1257         LOCKING_ANDX_RANGE *buf, *cur;
1258         static const int types[] = {
1259                 LOCKING_ANDX_LARGE_FILES,
1260                 LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES
1261         };
1262         int i;
1263
1264         xid = get_xid();
1265         tcon = tlink_tcon(cfile->tlink);
1266
1267         /*
1268          * Accessing maxBuf is racy with cifs_reconnect - need to store value
1269          * and check it before using.
1270          */
1271         max_buf = tcon->ses->server->maxBuf;
1272         if (max_buf < (sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE))) {
1273                 free_xid(xid);
1274                 return -EINVAL;
1275         }
1276
1277         BUILD_BUG_ON(sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE) >
1278                      PAGE_SIZE);
1279         max_buf = min_t(unsigned int, max_buf - sizeof(struct smb_hdr),
1280                         PAGE_SIZE);
1281         max_num = (max_buf - sizeof(struct smb_hdr)) /
1282                                                 sizeof(LOCKING_ANDX_RANGE);
1283         buf = kcalloc(max_num, sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
1284         if (!buf) {
1285                 free_xid(xid);
1286                 return -ENOMEM;
1287         }
1288
1289         for (i = 0; i < 2; i++) {
1290                 cur = buf;
1291                 num = 0;
1292                 list_for_each_entry_safe(li, tmp, &cfile->llist->locks, llist) {
1293                         if (li->type != types[i])
1294                                 continue;
1295                         cur->Pid = cpu_to_le16(li->pid);
1296                         cur->LengthLow = cpu_to_le32((u32)li->length);
1297                         cur->LengthHigh = cpu_to_le32((u32)(li->length>>32));
1298                         cur->OffsetLow = cpu_to_le32((u32)li->offset);
1299                         cur->OffsetHigh = cpu_to_le32((u32)(li->offset>>32));
1300                         if (++num == max_num) {
1301                                 stored_rc = cifs_lockv(xid, tcon,
1302                                                        cfile->fid.netfid,
1303                                                        (__u8)li->type, 0, num,
1304                                                        buf);
1305                                 if (stored_rc)
1306                                         rc = stored_rc;
1307                                 cur = buf;
1308                                 num = 0;
1309                         } else
1310                                 cur++;
1311                 }
1312
1313                 if (num) {
1314                         stored_rc = cifs_lockv(xid, tcon, cfile->fid.netfid,
1315                                                (__u8)types[i], 0, num, buf);
1316                         if (stored_rc)
1317                                 rc = stored_rc;
1318                 }
1319         }
1320
1321         kfree(buf);
1322         free_xid(xid);
1323         return rc;
1324 }
1325
1326 static __u32
1327 hash_lockowner(fl_owner_t owner)
1328 {
1329         return cifs_lock_secret ^ hash32_ptr((const void *)owner);
1330 }
1331
1332 struct lock_to_push {
1333         struct list_head llist;
1334         __u64 offset;
1335         __u64 length;
1336         __u32 pid;
1337         __u16 netfid;
1338         __u8 type;
1339 };
1340
1341 static int
1342 cifs_push_posix_locks(struct cifsFileInfo *cfile)
1343 {
1344         struct inode *inode = d_inode(cfile->dentry);
1345         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1346         struct file_lock *flock;
1347         struct file_lock_context *flctx = inode->i_flctx;
1348         unsigned int count = 0, i;
1349         int rc = 0, xid, type;
1350         struct list_head locks_to_send, *el;
1351         struct lock_to_push *lck, *tmp;
1352         __u64 length;
1353
1354         xid = get_xid();
1355
1356         if (!flctx)
1357                 goto out;
1358
1359         spin_lock(&flctx->flc_lock);
1360         list_for_each(el, &flctx->flc_posix) {
1361                 count++;
1362         }
1363         spin_unlock(&flctx->flc_lock);
1364
1365         INIT_LIST_HEAD(&locks_to_send);
1366
1367         /*
1368          * Allocating count locks is enough because no FL_POSIX locks can be
1369          * added to the list while we are holding cinode->lock_sem that
1370          * protects locking operations of this inode.
1371          */
1372         for (i = 0; i < count; i++) {
1373                 lck = kmalloc(sizeof(struct lock_to_push), GFP_KERNEL);
1374                 if (!lck) {
1375                         rc = -ENOMEM;
1376                         goto err_out;
1377                 }
1378                 list_add_tail(&lck->llist, &locks_to_send);
1379         }
1380
1381         el = locks_to_send.next;
1382         spin_lock(&flctx->flc_lock);
1383         list_for_each_entry(flock, &flctx->flc_posix, fl_list) {
1384                 if (el == &locks_to_send) {
1385                         /*
1386                          * The list ended. We don't have enough allocated
1387                          * structures - something is really wrong.
1388                          */
1389                         cifs_dbg(VFS, "Can't push all brlocks!\n");
1390                         break;
1391                 }
1392                 length = 1 + flock->fl_end - flock->fl_start;
1393                 if (flock->fl_type == F_RDLCK || flock->fl_type == F_SHLCK)
1394                         type = CIFS_RDLCK;
1395                 else
1396                         type = CIFS_WRLCK;
1397                 lck = list_entry(el, struct lock_to_push, llist);
1398                 lck->pid = hash_lockowner(flock->fl_owner);
1399                 lck->netfid = cfile->fid.netfid;
1400                 lck->length = length;
1401                 lck->type = type;
1402                 lck->offset = flock->fl_start;
1403         }
1404         spin_unlock(&flctx->flc_lock);
1405
1406         list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) {
1407                 int stored_rc;
1408
1409                 stored_rc = CIFSSMBPosixLock(xid, tcon, lck->netfid, lck->pid,
1410                                              lck->offset, lck->length, NULL,
1411                                              lck->type, 0);
1412                 if (stored_rc)
1413                         rc = stored_rc;
1414                 list_del(&lck->llist);
1415                 kfree(lck);
1416         }
1417
1418 out:
1419         free_xid(xid);
1420         return rc;
1421 err_out:
1422         list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) {
1423                 list_del(&lck->llist);
1424                 kfree(lck);
1425         }
1426         goto out;
1427 }
1428
1429 static int
1430 cifs_push_locks(struct cifsFileInfo *cfile)
1431 {
1432         struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
1433         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1434         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1435         int rc = 0;
1436
1437         /* we are going to update can_cache_brlcks here - need a write access */
1438         cifs_down_write(&cinode->lock_sem);
1439         if (!cinode->can_cache_brlcks) {
1440                 up_write(&cinode->lock_sem);
1441                 return rc;
1442         }
1443
1444         if (cap_unix(tcon->ses) &&
1445             (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
1446             ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
1447                 rc = cifs_push_posix_locks(cfile);
1448         else
1449                 rc = tcon->ses->server->ops->push_mand_locks(cfile);
1450
1451         cinode->can_cache_brlcks = false;
1452         up_write(&cinode->lock_sem);
1453         return rc;
1454 }
1455
1456 static void
1457 cifs_read_flock(struct file_lock *flock, __u32 *type, int *lock, int *unlock,
1458                 bool *wait_flag, struct TCP_Server_Info *server)
1459 {
1460         if (flock->fl_flags & FL_POSIX)
1461                 cifs_dbg(FYI, "Posix\n");
1462         if (flock->fl_flags & FL_FLOCK)
1463                 cifs_dbg(FYI, "Flock\n");
1464         if (flock->fl_flags & FL_SLEEP) {
1465                 cifs_dbg(FYI, "Blocking lock\n");
1466                 *wait_flag = true;
1467         }
1468         if (flock->fl_flags & FL_ACCESS)
1469                 cifs_dbg(FYI, "Process suspended by mandatory locking - not implemented yet\n");
1470         if (flock->fl_flags & FL_LEASE)
1471                 cifs_dbg(FYI, "Lease on file - not implemented yet\n");
1472         if (flock->fl_flags &
1473             (~(FL_POSIX | FL_FLOCK | FL_SLEEP |
1474                FL_ACCESS | FL_LEASE | FL_CLOSE | FL_OFDLCK)))
1475                 cifs_dbg(FYI, "Unknown lock flags 0x%x\n", flock->fl_flags);
1476
1477         *type = server->vals->large_lock_type;
1478         if (flock->fl_type == F_WRLCK) {
1479                 cifs_dbg(FYI, "F_WRLCK\n");
1480                 *type |= server->vals->exclusive_lock_type;
1481                 *lock = 1;
1482         } else if (flock->fl_type == F_UNLCK) {
1483                 cifs_dbg(FYI, "F_UNLCK\n");
1484                 *type |= server->vals->unlock_lock_type;
1485                 *unlock = 1;
1486                 /* Check if unlock includes more than one lock range */
1487         } else if (flock->fl_type == F_RDLCK) {
1488                 cifs_dbg(FYI, "F_RDLCK\n");
1489                 *type |= server->vals->shared_lock_type;
1490                 *lock = 1;
1491         } else if (flock->fl_type == F_EXLCK) {
1492                 cifs_dbg(FYI, "F_EXLCK\n");
1493                 *type |= server->vals->exclusive_lock_type;
1494                 *lock = 1;
1495         } else if (flock->fl_type == F_SHLCK) {
1496                 cifs_dbg(FYI, "F_SHLCK\n");
1497                 *type |= server->vals->shared_lock_type;
1498                 *lock = 1;
1499         } else
1500                 cifs_dbg(FYI, "Unknown type of lock\n");
1501 }
1502
1503 static int
1504 cifs_getlk(struct file *file, struct file_lock *flock, __u32 type,
1505            bool wait_flag, bool posix_lck, unsigned int xid)
1506 {
1507         int rc = 0;
1508         __u64 length = 1 + flock->fl_end - flock->fl_start;
1509         struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
1510         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1511         struct TCP_Server_Info *server = tcon->ses->server;
1512         __u16 netfid = cfile->fid.netfid;
1513
1514         if (posix_lck) {
1515                 int posix_lock_type;
1516
1517                 rc = cifs_posix_lock_test(file, flock);
1518                 if (!rc)
1519                         return rc;
1520
1521                 if (type & server->vals->shared_lock_type)
1522                         posix_lock_type = CIFS_RDLCK;
1523                 else
1524                         posix_lock_type = CIFS_WRLCK;
1525                 rc = CIFSSMBPosixLock(xid, tcon, netfid,
1526                                       hash_lockowner(flock->fl_owner),
1527                                       flock->fl_start, length, flock,
1528                                       posix_lock_type, wait_flag);
1529                 return rc;
1530         }
1531
1532         rc = cifs_lock_test(cfile, flock->fl_start, length, type, flock);
1533         if (!rc)
1534                 return rc;
1535
1536         /* BB we could chain these into one lock request BB */
1537         rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length, type,
1538                                     1, 0, false);
1539         if (rc == 0) {
1540                 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1541                                             type, 0, 1, false);
1542                 flock->fl_type = F_UNLCK;
1543                 if (rc != 0)
1544                         cifs_dbg(VFS, "Error unlocking previously locked range %d during test of lock\n",
1545                                  rc);
1546                 return 0;
1547         }
1548
1549         if (type & server->vals->shared_lock_type) {
1550                 flock->fl_type = F_WRLCK;
1551                 return 0;
1552         }
1553
1554         type &= ~server->vals->exclusive_lock_type;
1555
1556         rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1557                                     type | server->vals->shared_lock_type,
1558                                     1, 0, false);
1559         if (rc == 0) {
1560                 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1561                         type | server->vals->shared_lock_type, 0, 1, false);
1562                 flock->fl_type = F_RDLCK;
1563                 if (rc != 0)
1564                         cifs_dbg(VFS, "Error unlocking previously locked range %d during test of lock\n",
1565                                  rc);
1566         } else
1567                 flock->fl_type = F_WRLCK;
1568
1569         return 0;
1570 }
1571
1572 void
1573 cifs_move_llist(struct list_head *source, struct list_head *dest)
1574 {
1575         struct list_head *li, *tmp;
1576         list_for_each_safe(li, tmp, source)
1577                 list_move(li, dest);
1578 }
1579
1580 void
1581 cifs_free_llist(struct list_head *llist)
1582 {
1583         struct cifsLockInfo *li, *tmp;
1584         list_for_each_entry_safe(li, tmp, llist, llist) {
1585                 cifs_del_lock_waiters(li);
1586                 list_del(&li->llist);
1587                 kfree(li);
1588         }
1589 }
1590
1591 int
1592 cifs_unlock_range(struct cifsFileInfo *cfile, struct file_lock *flock,
1593                   unsigned int xid)
1594 {
1595         int rc = 0, stored_rc;
1596         static const int types[] = {
1597                 LOCKING_ANDX_LARGE_FILES,
1598                 LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES
1599         };
1600         unsigned int i;
1601         unsigned int max_num, num, max_buf;
1602         LOCKING_ANDX_RANGE *buf, *cur;
1603         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1604         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1605         struct cifsLockInfo *li, *tmp;
1606         __u64 length = 1 + flock->fl_end - flock->fl_start;
1607         struct list_head tmp_llist;
1608
1609         INIT_LIST_HEAD(&tmp_llist);
1610
1611         /*
1612          * Accessing maxBuf is racy with cifs_reconnect - need to store value
1613          * and check it before using.
1614          */
1615         max_buf = tcon->ses->server->maxBuf;
1616         if (max_buf < (sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE)))
1617                 return -EINVAL;
1618
1619         BUILD_BUG_ON(sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE) >
1620                      PAGE_SIZE);
1621         max_buf = min_t(unsigned int, max_buf - sizeof(struct smb_hdr),
1622                         PAGE_SIZE);
1623         max_num = (max_buf - sizeof(struct smb_hdr)) /
1624                                                 sizeof(LOCKING_ANDX_RANGE);
1625         buf = kcalloc(max_num, sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
1626         if (!buf)
1627                 return -ENOMEM;
1628
1629         cifs_down_write(&cinode->lock_sem);
1630         for (i = 0; i < 2; i++) {
1631                 cur = buf;
1632                 num = 0;
1633                 list_for_each_entry_safe(li, tmp, &cfile->llist->locks, llist) {
1634                         if (flock->fl_start > li->offset ||
1635                             (flock->fl_start + length) <
1636                             (li->offset + li->length))
1637                                 continue;
1638                         if (current->tgid != li->pid)
1639                                 continue;
1640                         if (types[i] != li->type)
1641                                 continue;
1642                         if (cinode->can_cache_brlcks) {
1643                                 /*
1644                                  * We can cache brlock requests - simply remove
1645                                  * a lock from the file's list.
1646                                  */
1647                                 list_del(&li->llist);
1648                                 cifs_del_lock_waiters(li);
1649                                 kfree(li);
1650                                 continue;
1651                         }
1652                         cur->Pid = cpu_to_le16(li->pid);
1653                         cur->LengthLow = cpu_to_le32((u32)li->length);
1654                         cur->LengthHigh = cpu_to_le32((u32)(li->length>>32));
1655                         cur->OffsetLow = cpu_to_le32((u32)li->offset);
1656                         cur->OffsetHigh = cpu_to_le32((u32)(li->offset>>32));
1657                         /*
1658                          * We need to save a lock here to let us add it again to
1659                          * the file's list if the unlock range request fails on
1660                          * the server.
1661                          */
1662                         list_move(&li->llist, &tmp_llist);
1663                         if (++num == max_num) {
1664                                 stored_rc = cifs_lockv(xid, tcon,
1665                                                        cfile->fid.netfid,
1666                                                        li->type, num, 0, buf);
1667                                 if (stored_rc) {
1668                                         /*
1669                                          * We failed on the unlock range
1670                                          * request - add all locks from the tmp
1671                                          * list to the head of the file's list.
1672                                          */
1673                                         cifs_move_llist(&tmp_llist,
1674                                                         &cfile->llist->locks);
1675                                         rc = stored_rc;
1676                                 } else
1677                                         /*
1678                                          * The unlock range request succeed -
1679                                          * free the tmp list.
1680                                          */
1681                                         cifs_free_llist(&tmp_llist);
1682                                 cur = buf;
1683                                 num = 0;
1684                         } else
1685                                 cur++;
1686                 }
1687                 if (num) {
1688                         stored_rc = cifs_lockv(xid, tcon, cfile->fid.netfid,
1689                                                types[i], num, 0, buf);
1690                         if (stored_rc) {
1691                                 cifs_move_llist(&tmp_llist,
1692                                                 &cfile->llist->locks);
1693                                 rc = stored_rc;
1694                         } else
1695                                 cifs_free_llist(&tmp_llist);
1696                 }
1697         }
1698
1699         up_write(&cinode->lock_sem);
1700         kfree(buf);
1701         return rc;
1702 }
1703
1704 static int
1705 cifs_setlk(struct file *file, struct file_lock *flock, __u32 type,
1706            bool wait_flag, bool posix_lck, int lock, int unlock,
1707            unsigned int xid)
1708 {
1709         int rc = 0;
1710         __u64 length = 1 + flock->fl_end - flock->fl_start;
1711         struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
1712         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1713         struct TCP_Server_Info *server = tcon->ses->server;
1714         struct inode *inode = d_inode(cfile->dentry);
1715
1716         if (posix_lck) {
1717                 int posix_lock_type;
1718
1719                 rc = cifs_posix_lock_set(file, flock);
1720                 if (rc <= FILE_LOCK_DEFERRED)
1721                         return rc;
1722
1723                 if (type & server->vals->shared_lock_type)
1724                         posix_lock_type = CIFS_RDLCK;
1725                 else
1726                         posix_lock_type = CIFS_WRLCK;
1727
1728                 if (unlock == 1)
1729                         posix_lock_type = CIFS_UNLCK;
1730
1731                 rc = CIFSSMBPosixLock(xid, tcon, cfile->fid.netfid,
1732                                       hash_lockowner(flock->fl_owner),
1733                                       flock->fl_start, length,
1734                                       NULL, posix_lock_type, wait_flag);
1735                 goto out;
1736         }
1737
1738         if (lock) {
1739                 struct cifsLockInfo *lock;
1740
1741                 lock = cifs_lock_init(flock->fl_start, length, type,
1742                                       flock->fl_flags);
1743                 if (!lock)
1744                         return -ENOMEM;
1745
1746                 rc = cifs_lock_add_if(cfile, lock, wait_flag);
1747                 if (rc < 0) {
1748                         kfree(lock);
1749                         return rc;
1750                 }
1751                 if (!rc)
1752                         goto out;
1753
1754                 /*
1755                  * Windows 7 server can delay breaking lease from read to None
1756                  * if we set a byte-range lock on a file - break it explicitly
1757                  * before sending the lock to the server to be sure the next
1758                  * read won't conflict with non-overlapted locks due to
1759                  * pagereading.
1760                  */
1761                 if (!CIFS_CACHE_WRITE(CIFS_I(inode)) &&
1762                                         CIFS_CACHE_READ(CIFS_I(inode))) {
1763                         cifs_zap_mapping(inode);
1764                         cifs_dbg(FYI, "Set no oplock for inode=%p due to mand locks\n",
1765                                  inode);
1766                         CIFS_I(inode)->oplock = 0;
1767                 }
1768
1769                 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1770                                             type, 1, 0, wait_flag);
1771                 if (rc) {
1772                         kfree(lock);
1773                         return rc;
1774                 }
1775
1776                 cifs_lock_add(cfile, lock);
1777         } else if (unlock)
1778                 rc = server->ops->mand_unlock_range(cfile, flock, xid);
1779
1780 out:
1781         if ((flock->fl_flags & FL_POSIX) || (flock->fl_flags & FL_FLOCK)) {
1782                 /*
1783                  * If this is a request to remove all locks because we
1784                  * are closing the file, it doesn't matter if the
1785                  * unlocking failed as both cifs.ko and the SMB server
1786                  * remove the lock on file close
1787                  */
1788                 if (rc) {
1789                         cifs_dbg(VFS, "%s failed rc=%d\n", __func__, rc);
1790                         if (!(flock->fl_flags & FL_CLOSE))
1791                                 return rc;
1792                 }
1793                 rc = locks_lock_file_wait(file, flock);
1794         }
1795         return rc;
1796 }
1797
1798 int cifs_flock(struct file *file, int cmd, struct file_lock *fl)
1799 {
1800         int rc, xid;
1801         int lock = 0, unlock = 0;
1802         bool wait_flag = false;
1803         bool posix_lck = false;
1804         struct cifs_sb_info *cifs_sb;
1805         struct cifs_tcon *tcon;
1806         struct cifsFileInfo *cfile;
1807         __u32 type;
1808
1809         rc = -EACCES;
1810         xid = get_xid();
1811
1812         if (!(fl->fl_flags & FL_FLOCK))
1813                 return -ENOLCK;
1814
1815         cfile = (struct cifsFileInfo *)file->private_data;
1816         tcon = tlink_tcon(cfile->tlink);
1817
1818         cifs_read_flock(fl, &type, &lock, &unlock, &wait_flag,
1819                         tcon->ses->server);
1820         cifs_sb = CIFS_FILE_SB(file);
1821
1822         if (cap_unix(tcon->ses) &&
1823             (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
1824             ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
1825                 posix_lck = true;
1826
1827         if (!lock && !unlock) {
1828                 /*
1829                  * if no lock or unlock then nothing to do since we do not
1830                  * know what it is
1831                  */
1832                 free_xid(xid);
1833                 return -EOPNOTSUPP;
1834         }
1835
1836         rc = cifs_setlk(file, fl, type, wait_flag, posix_lck, lock, unlock,
1837                         xid);
1838         free_xid(xid);
1839         return rc;
1840
1841
1842 }
1843
1844 int cifs_lock(struct file *file, int cmd, struct file_lock *flock)
1845 {
1846         int rc, xid;
1847         int lock = 0, unlock = 0;
1848         bool wait_flag = false;
1849         bool posix_lck = false;
1850         struct cifs_sb_info *cifs_sb;
1851         struct cifs_tcon *tcon;
1852         struct cifsFileInfo *cfile;
1853         __u32 type;
1854
1855         rc = -EACCES;
1856         xid = get_xid();
1857
1858         cifs_dbg(FYI, "Lock parm: 0x%x flockflags: 0x%x flocktype: 0x%x start: %lld end: %lld\n",
1859                  cmd, flock->fl_flags, flock->fl_type,
1860                  flock->fl_start, flock->fl_end);
1861
1862         cfile = (struct cifsFileInfo *)file->private_data;
1863         tcon = tlink_tcon(cfile->tlink);
1864
1865         cifs_read_flock(flock, &type, &lock, &unlock, &wait_flag,
1866                         tcon->ses->server);
1867         cifs_sb = CIFS_FILE_SB(file);
1868
1869         if (cap_unix(tcon->ses) &&
1870             (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
1871             ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
1872                 posix_lck = true;
1873         /*
1874          * BB add code here to normalize offset and length to account for
1875          * negative length which we can not accept over the wire.
1876          */
1877         if (IS_GETLK(cmd)) {
1878                 rc = cifs_getlk(file, flock, type, wait_flag, posix_lck, xid);
1879                 free_xid(xid);
1880                 return rc;
1881         }
1882
1883         if (!lock && !unlock) {
1884                 /*
1885                  * if no lock or unlock then nothing to do since we do not
1886                  * know what it is
1887                  */
1888                 free_xid(xid);
1889                 return -EOPNOTSUPP;
1890         }
1891
1892         rc = cifs_setlk(file, flock, type, wait_flag, posix_lck, lock, unlock,
1893                         xid);
1894         free_xid(xid);
1895         return rc;
1896 }
1897
1898 /*
1899  * update the file size (if needed) after a write. Should be called with
1900  * the inode->i_lock held
1901  */
1902 void
1903 cifs_update_eof(struct cifsInodeInfo *cifsi, loff_t offset,
1904                       unsigned int bytes_written)
1905 {
1906         loff_t end_of_write = offset + bytes_written;
1907
1908         if (end_of_write > cifsi->server_eof)
1909                 cifsi->server_eof = end_of_write;
1910 }
1911
1912 static ssize_t
1913 cifs_write(struct cifsFileInfo *open_file, __u32 pid, const char *write_data,
1914            size_t write_size, loff_t *offset)
1915 {
1916         int rc = 0;
1917         unsigned int bytes_written = 0;
1918         unsigned int total_written;
1919         struct cifs_tcon *tcon;
1920         struct TCP_Server_Info *server;
1921         unsigned int xid;
1922         struct dentry *dentry = open_file->dentry;
1923         struct cifsInodeInfo *cifsi = CIFS_I(d_inode(dentry));
1924         struct cifs_io_parms io_parms = {0};
1925
1926         cifs_dbg(FYI, "write %zd bytes to offset %lld of %pd\n",
1927                  write_size, *offset, dentry);
1928
1929         tcon = tlink_tcon(open_file->tlink);
1930         server = tcon->ses->server;
1931
1932         if (!server->ops->sync_write)
1933                 return -ENOSYS;
1934
1935         xid = get_xid();
1936
1937         for (total_written = 0; write_size > total_written;
1938              total_written += bytes_written) {
1939                 rc = -EAGAIN;
1940                 while (rc == -EAGAIN) {
1941                         struct kvec iov[2];
1942                         unsigned int len;
1943
1944                         if (open_file->invalidHandle) {
1945                                 /* we could deadlock if we called
1946                                    filemap_fdatawait from here so tell
1947                                    reopen_file not to flush data to
1948                                    server now */
1949                                 rc = cifs_reopen_file(open_file, false);
1950                                 if (rc != 0)
1951                                         break;
1952                         }
1953
1954                         len = min(server->ops->wp_retry_size(d_inode(dentry)),
1955                                   (unsigned int)write_size - total_written);
1956                         /* iov[0] is reserved for smb header */
1957                         iov[1].iov_base = (char *)write_data + total_written;
1958                         iov[1].iov_len = len;
1959                         io_parms.pid = pid;
1960                         io_parms.tcon = tcon;
1961                         io_parms.offset = *offset;
1962                         io_parms.length = len;
1963                         rc = server->ops->sync_write(xid, &open_file->fid,
1964                                         &io_parms, &bytes_written, iov, 1);
1965                 }
1966                 if (rc || (bytes_written == 0)) {
1967                         if (total_written)
1968                                 break;
1969                         else {
1970                                 free_xid(xid);
1971                                 return rc;
1972                         }
1973                 } else {
1974                         spin_lock(&d_inode(dentry)->i_lock);
1975                         cifs_update_eof(cifsi, *offset, bytes_written);
1976                         spin_unlock(&d_inode(dentry)->i_lock);
1977                         *offset += bytes_written;
1978                 }
1979         }
1980
1981         cifs_stats_bytes_written(tcon, total_written);
1982
1983         if (total_written > 0) {
1984                 spin_lock(&d_inode(dentry)->i_lock);
1985                 if (*offset > d_inode(dentry)->i_size) {
1986                         i_size_write(d_inode(dentry), *offset);
1987                         d_inode(dentry)->i_blocks = (512 - 1 + *offset) >> 9;
1988                 }
1989                 spin_unlock(&d_inode(dentry)->i_lock);
1990         }
1991         mark_inode_dirty_sync(d_inode(dentry));
1992         free_xid(xid);
1993         return total_written;
1994 }
1995
1996 struct cifsFileInfo *find_readable_file(struct cifsInodeInfo *cifs_inode,
1997                                         bool fsuid_only)
1998 {
1999         struct cifsFileInfo *open_file = NULL;
2000         struct cifs_sb_info *cifs_sb = CIFS_SB(cifs_inode->vfs_inode.i_sb);
2001
2002         /* only filter by fsuid on multiuser mounts */
2003         if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
2004                 fsuid_only = false;
2005
2006         spin_lock(&cifs_inode->open_file_lock);
2007         /* we could simply get the first_list_entry since write-only entries
2008            are always at the end of the list but since the first entry might
2009            have a close pending, we go through the whole list */
2010         list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
2011                 if (fsuid_only && !uid_eq(open_file->uid, current_fsuid()))
2012                         continue;
2013                 if (OPEN_FMODE(open_file->f_flags) & FMODE_READ) {
2014                         if ((!open_file->invalidHandle)) {
2015                                 /* found a good file */
2016                                 /* lock it so it will not be closed on us */
2017                                 cifsFileInfo_get(open_file);
2018                                 spin_unlock(&cifs_inode->open_file_lock);
2019                                 return open_file;
2020                         } /* else might as well continue, and look for
2021                              another, or simply have the caller reopen it
2022                              again rather than trying to fix this handle */
2023                 } else /* write only file */
2024                         break; /* write only files are last so must be done */
2025         }
2026         spin_unlock(&cifs_inode->open_file_lock);
2027         return NULL;
2028 }
2029
2030 /* Return -EBADF if no handle is found and general rc otherwise */
2031 int
2032 cifs_get_writable_file(struct cifsInodeInfo *cifs_inode, int flags,
2033                        struct cifsFileInfo **ret_file)
2034 {
2035         struct cifsFileInfo *open_file, *inv_file = NULL;
2036         struct cifs_sb_info *cifs_sb;
2037         bool any_available = false;
2038         int rc = -EBADF;
2039         unsigned int refind = 0;
2040         bool fsuid_only = flags & FIND_WR_FSUID_ONLY;
2041         bool with_delete = flags & FIND_WR_WITH_DELETE;
2042         *ret_file = NULL;
2043
2044         /*
2045          * Having a null inode here (because mapping->host was set to zero by
2046          * the VFS or MM) should not happen but we had reports of on oops (due
2047          * to it being zero) during stress testcases so we need to check for it
2048          */
2049
2050         if (cifs_inode == NULL) {
2051                 cifs_dbg(VFS, "Null inode passed to cifs_writeable_file\n");
2052                 dump_stack();
2053                 return rc;
2054         }
2055
2056         cifs_sb = CIFS_SB(cifs_inode->vfs_inode.i_sb);
2057
2058         /* only filter by fsuid on multiuser mounts */
2059         if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
2060                 fsuid_only = false;
2061
2062         spin_lock(&cifs_inode->open_file_lock);
2063 refind_writable:
2064         if (refind > MAX_REOPEN_ATT) {
2065                 spin_unlock(&cifs_inode->open_file_lock);
2066                 return rc;
2067         }
2068         list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
2069                 if (!any_available && open_file->pid != current->tgid)
2070                         continue;
2071                 if (fsuid_only && !uid_eq(open_file->uid, current_fsuid()))
2072                         continue;
2073                 if (with_delete && !(open_file->fid.access & DELETE))
2074                         continue;
2075                 if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) {
2076                         if (!open_file->invalidHandle) {
2077                                 /* found a good writable file */
2078                                 cifsFileInfo_get(open_file);
2079                                 spin_unlock(&cifs_inode->open_file_lock);
2080                                 *ret_file = open_file;
2081                                 return 0;
2082                         } else {
2083                                 if (!inv_file)
2084                                         inv_file = open_file;
2085                         }
2086                 }
2087         }
2088         /* couldn't find useable FH with same pid, try any available */
2089         if (!any_available) {
2090                 any_available = true;
2091                 goto refind_writable;
2092         }
2093
2094         if (inv_file) {
2095                 any_available = false;
2096                 cifsFileInfo_get(inv_file);
2097         }
2098
2099         spin_unlock(&cifs_inode->open_file_lock);
2100
2101         if (inv_file) {
2102                 rc = cifs_reopen_file(inv_file, false);
2103                 if (!rc) {
2104                         *ret_file = inv_file;
2105                         return 0;
2106                 }
2107
2108                 spin_lock(&cifs_inode->open_file_lock);
2109                 list_move_tail(&inv_file->flist, &cifs_inode->openFileList);
2110                 spin_unlock(&cifs_inode->open_file_lock);
2111                 cifsFileInfo_put(inv_file);
2112                 ++refind;
2113                 inv_file = NULL;
2114                 spin_lock(&cifs_inode->open_file_lock);
2115                 goto refind_writable;
2116         }
2117
2118         return rc;
2119 }
2120
2121 struct cifsFileInfo *
2122 find_writable_file(struct cifsInodeInfo *cifs_inode, int flags)
2123 {
2124         struct cifsFileInfo *cfile;
2125         int rc;
2126
2127         rc = cifs_get_writable_file(cifs_inode, flags, &cfile);
2128         if (rc)
2129                 cifs_dbg(FYI, "Couldn't find writable handle rc=%d\n", rc);
2130
2131         return cfile;
2132 }
2133
2134 int
2135 cifs_get_writable_path(struct cifs_tcon *tcon, const char *name,
2136                        int flags,
2137                        struct cifsFileInfo **ret_file)
2138 {
2139         struct cifsFileInfo *cfile;
2140         void *page = alloc_dentry_path();
2141
2142         *ret_file = NULL;
2143
2144         spin_lock(&tcon->open_file_lock);
2145         list_for_each_entry(cfile, &tcon->openFileList, tlist) {
2146                 struct cifsInodeInfo *cinode;
2147                 const char *full_path = build_path_from_dentry(cfile->dentry, page);
2148                 if (IS_ERR(full_path)) {
2149                         spin_unlock(&tcon->open_file_lock);
2150                         free_dentry_path(page);
2151                         return PTR_ERR(full_path);
2152                 }
2153                 if (strcmp(full_path, name))
2154                         continue;
2155
2156                 cinode = CIFS_I(d_inode(cfile->dentry));
2157                 spin_unlock(&tcon->open_file_lock);
2158                 free_dentry_path(page);
2159                 return cifs_get_writable_file(cinode, flags, ret_file);
2160         }
2161
2162         spin_unlock(&tcon->open_file_lock);
2163         free_dentry_path(page);
2164         return -ENOENT;
2165 }
2166
2167 int
2168 cifs_get_readable_path(struct cifs_tcon *tcon, const char *name,
2169                        struct cifsFileInfo **ret_file)
2170 {
2171         struct cifsFileInfo *cfile;
2172         void *page = alloc_dentry_path();
2173
2174         *ret_file = NULL;
2175
2176         spin_lock(&tcon->open_file_lock);
2177         list_for_each_entry(cfile, &tcon->openFileList, tlist) {
2178                 struct cifsInodeInfo *cinode;
2179                 const char *full_path = build_path_from_dentry(cfile->dentry, page);
2180                 if (IS_ERR(full_path)) {
2181                         spin_unlock(&tcon->open_file_lock);
2182                         free_dentry_path(page);
2183                         return PTR_ERR(full_path);
2184                 }
2185                 if (strcmp(full_path, name))
2186                         continue;
2187
2188                 cinode = CIFS_I(d_inode(cfile->dentry));
2189                 spin_unlock(&tcon->open_file_lock);
2190                 free_dentry_path(page);
2191                 *ret_file = find_readable_file(cinode, 0);
2192                 return *ret_file ? 0 : -ENOENT;
2193         }
2194
2195         spin_unlock(&tcon->open_file_lock);
2196         free_dentry_path(page);
2197         return -ENOENT;
2198 }
2199
2200 static int cifs_partialpagewrite(struct page *page, unsigned from, unsigned to)
2201 {
2202         struct address_space *mapping = page->mapping;
2203         loff_t offset = (loff_t)page->index << PAGE_SHIFT;
2204         char *write_data;
2205         int rc = -EFAULT;
2206         int bytes_written = 0;
2207         struct inode *inode;
2208         struct cifsFileInfo *open_file;
2209
2210         if (!mapping || !mapping->host)
2211                 return -EFAULT;
2212
2213         inode = page->mapping->host;
2214
2215         offset += (loff_t)from;
2216         write_data = kmap(page);
2217         write_data += from;
2218
2219         if ((to > PAGE_SIZE) || (from > to)) {
2220                 kunmap(page);
2221                 return -EIO;
2222         }
2223
2224         /* racing with truncate? */
2225         if (offset > mapping->host->i_size) {
2226                 kunmap(page);
2227                 return 0; /* don't care */
2228         }
2229
2230         /* check to make sure that we are not extending the file */
2231         if (mapping->host->i_size - offset < (loff_t)to)
2232                 to = (unsigned)(mapping->host->i_size - offset);
2233
2234         rc = cifs_get_writable_file(CIFS_I(mapping->host), FIND_WR_ANY,
2235                                     &open_file);
2236         if (!rc) {
2237                 bytes_written = cifs_write(open_file, open_file->pid,
2238                                            write_data, to - from, &offset);
2239                 cifsFileInfo_put(open_file);
2240                 /* Does mm or vfs already set times? */
2241                 inode->i_atime = inode->i_mtime = current_time(inode);
2242                 if ((bytes_written > 0) && (offset))
2243                         rc = 0;
2244                 else if (bytes_written < 0)
2245                         rc = bytes_written;
2246                 else
2247                         rc = -EFAULT;
2248         } else {
2249                 cifs_dbg(FYI, "No writable handle for write page rc=%d\n", rc);
2250                 if (!is_retryable_error(rc))
2251                         rc = -EIO;
2252         }
2253
2254         kunmap(page);
2255         return rc;
2256 }
2257
2258 static struct cifs_writedata *
2259 wdata_alloc_and_fillpages(pgoff_t tofind, struct address_space *mapping,
2260                           pgoff_t end, pgoff_t *index,
2261                           unsigned int *found_pages)
2262 {
2263         struct cifs_writedata *wdata;
2264
2265         wdata = cifs_writedata_alloc((unsigned int)tofind,
2266                                      cifs_writev_complete);
2267         if (!wdata)
2268                 return NULL;
2269
2270         *found_pages = find_get_pages_range_tag(mapping, index, end,
2271                                 PAGECACHE_TAG_DIRTY, tofind, wdata->pages);
2272         return wdata;
2273 }
2274
2275 static unsigned int
2276 wdata_prepare_pages(struct cifs_writedata *wdata, unsigned int found_pages,
2277                     struct address_space *mapping,
2278                     struct writeback_control *wbc,
2279                     pgoff_t end, pgoff_t *index, pgoff_t *next, bool *done)
2280 {
2281         unsigned int nr_pages = 0, i;
2282         struct page *page;
2283
2284         for (i = 0; i < found_pages; i++) {
2285                 page = wdata->pages[i];
2286                 /*
2287                  * At this point we hold neither the i_pages lock nor the
2288                  * page lock: the page may be truncated or invalidated
2289                  * (changing page->mapping to NULL), or even swizzled
2290                  * back from swapper_space to tmpfs file mapping
2291                  */
2292
2293                 if (nr_pages == 0)
2294                         lock_page(page);
2295                 else if (!trylock_page(page))
2296                         break;
2297
2298                 if (unlikely(page->mapping != mapping)) {
2299                         unlock_page(page);
2300                         break;
2301                 }
2302
2303                 if (!wbc->range_cyclic && page->index > end) {
2304                         *done = true;
2305                         unlock_page(page);
2306                         break;
2307                 }
2308
2309                 if (*next && (page->index != *next)) {
2310                         /* Not next consecutive page */
2311                         unlock_page(page);
2312                         break;
2313                 }
2314
2315                 if (wbc->sync_mode != WB_SYNC_NONE)
2316                         wait_on_page_writeback(page);
2317
2318                 if (PageWriteback(page) ||
2319                                 !clear_page_dirty_for_io(page)) {
2320                         unlock_page(page);
2321                         break;
2322                 }
2323
2324                 /*
2325                  * This actually clears the dirty bit in the radix tree.
2326                  * See cifs_writepage() for more commentary.
2327                  */
2328                 set_page_writeback(page);
2329                 if (page_offset(page) >= i_size_read(mapping->host)) {
2330                         *done = true;
2331                         unlock_page(page);
2332                         end_page_writeback(page);
2333                         break;
2334                 }
2335
2336                 wdata->pages[i] = page;
2337                 *next = page->index + 1;
2338                 ++nr_pages;
2339         }
2340
2341         /* reset index to refind any pages skipped */
2342         if (nr_pages == 0)
2343                 *index = wdata->pages[0]->index + 1;
2344
2345         /* put any pages we aren't going to use */
2346         for (i = nr_pages; i < found_pages; i++) {
2347                 put_page(wdata->pages[i]);
2348                 wdata->pages[i] = NULL;
2349         }
2350
2351         return nr_pages;
2352 }
2353
2354 static int
2355 wdata_send_pages(struct cifs_writedata *wdata, unsigned int nr_pages,
2356                  struct address_space *mapping, struct writeback_control *wbc)
2357 {
2358         int rc;
2359
2360         wdata->sync_mode = wbc->sync_mode;
2361         wdata->nr_pages = nr_pages;
2362         wdata->offset = page_offset(wdata->pages[0]);
2363         wdata->pagesz = PAGE_SIZE;
2364         wdata->tailsz = min(i_size_read(mapping->host) -
2365                         page_offset(wdata->pages[nr_pages - 1]),
2366                         (loff_t)PAGE_SIZE);
2367         wdata->bytes = ((nr_pages - 1) * PAGE_SIZE) + wdata->tailsz;
2368         wdata->pid = wdata->cfile->pid;
2369
2370         rc = adjust_credits(wdata->server, &wdata->credits, wdata->bytes);
2371         if (rc)
2372                 return rc;
2373
2374         if (wdata->cfile->invalidHandle)
2375                 rc = -EAGAIN;
2376         else
2377                 rc = wdata->server->ops->async_writev(wdata,
2378                                                       cifs_writedata_release);
2379
2380         return rc;
2381 }
2382
2383 static int cifs_writepages(struct address_space *mapping,
2384                            struct writeback_control *wbc)
2385 {
2386         struct inode *inode = mapping->host;
2387         struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
2388         struct TCP_Server_Info *server;
2389         bool done = false, scanned = false, range_whole = false;
2390         pgoff_t end, index;
2391         struct cifs_writedata *wdata;
2392         struct cifsFileInfo *cfile = NULL;
2393         int rc = 0;
2394         int saved_rc = 0;
2395         unsigned int xid;
2396
2397         /*
2398          * If wsize is smaller than the page cache size, default to writing
2399          * one page at a time via cifs_writepage
2400          */
2401         if (cifs_sb->ctx->wsize < PAGE_SIZE)
2402                 return generic_writepages(mapping, wbc);
2403
2404         xid = get_xid();
2405         if (wbc->range_cyclic) {
2406                 index = mapping->writeback_index; /* Start from prev offset */
2407                 end = -1;
2408         } else {
2409                 index = wbc->range_start >> PAGE_SHIFT;
2410                 end = wbc->range_end >> PAGE_SHIFT;
2411                 if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
2412                         range_whole = true;
2413                 scanned = true;
2414         }
2415         server = cifs_pick_channel(cifs_sb_master_tcon(cifs_sb)->ses);
2416
2417 retry:
2418         while (!done && index <= end) {
2419                 unsigned int i, nr_pages, found_pages, wsize;
2420                 pgoff_t next = 0, tofind, saved_index = index;
2421                 struct cifs_credits credits_on_stack;
2422                 struct cifs_credits *credits = &credits_on_stack;
2423                 int get_file_rc = 0;
2424
2425                 if (cfile)
2426                         cifsFileInfo_put(cfile);
2427
2428                 rc = cifs_get_writable_file(CIFS_I(inode), FIND_WR_ANY, &cfile);
2429
2430                 /* in case of an error store it to return later */
2431                 if (rc)
2432                         get_file_rc = rc;
2433
2434                 rc = server->ops->wait_mtu_credits(server, cifs_sb->ctx->wsize,
2435                                                    &wsize, credits);
2436                 if (rc != 0) {
2437                         done = true;
2438                         break;
2439                 }
2440
2441                 tofind = min((wsize / PAGE_SIZE) - 1, end - index) + 1;
2442
2443                 wdata = wdata_alloc_and_fillpages(tofind, mapping, end, &index,
2444                                                   &found_pages);
2445                 if (!wdata) {
2446                         rc = -ENOMEM;
2447                         done = true;
2448                         add_credits_and_wake_if(server, credits, 0);
2449                         break;
2450                 }
2451
2452                 if (found_pages == 0) {
2453                         kref_put(&wdata->refcount, cifs_writedata_release);
2454                         add_credits_and_wake_if(server, credits, 0);
2455                         break;
2456                 }
2457
2458                 nr_pages = wdata_prepare_pages(wdata, found_pages, mapping, wbc,
2459                                                end, &index, &next, &done);
2460
2461                 /* nothing to write? */
2462                 if (nr_pages == 0) {
2463                         kref_put(&wdata->refcount, cifs_writedata_release);
2464                         add_credits_and_wake_if(server, credits, 0);
2465                         continue;
2466                 }
2467
2468                 wdata->credits = credits_on_stack;
2469                 wdata->cfile = cfile;
2470                 wdata->server = server;
2471                 cfile = NULL;
2472
2473                 if (!wdata->cfile) {
2474                         cifs_dbg(VFS, "No writable handle in writepages rc=%d\n",
2475                                  get_file_rc);
2476                         if (is_retryable_error(get_file_rc))
2477                                 rc = get_file_rc;
2478                         else
2479                                 rc = -EBADF;
2480                 } else
2481                         rc = wdata_send_pages(wdata, nr_pages, mapping, wbc);
2482
2483                 for (i = 0; i < nr_pages; ++i)
2484                         unlock_page(wdata->pages[i]);
2485
2486                 /* send failure -- clean up the mess */
2487                 if (rc != 0) {
2488                         add_credits_and_wake_if(server, &wdata->credits, 0);
2489                         for (i = 0; i < nr_pages; ++i) {
2490                                 if (is_retryable_error(rc))
2491                                         redirty_page_for_writepage(wbc,
2492                                                            wdata->pages[i]);
2493                                 else
2494                                         SetPageError(wdata->pages[i]);
2495                                 end_page_writeback(wdata->pages[i]);
2496                                 put_page(wdata->pages[i]);
2497                         }
2498                         if (!is_retryable_error(rc))
2499                                 mapping_set_error(mapping, rc);
2500                 }
2501                 kref_put(&wdata->refcount, cifs_writedata_release);
2502
2503                 if (wbc->sync_mode == WB_SYNC_ALL && rc == -EAGAIN) {
2504                         index = saved_index;
2505                         continue;
2506                 }
2507
2508                 /* Return immediately if we received a signal during writing */
2509                 if (is_interrupt_error(rc)) {
2510                         done = true;
2511                         break;
2512                 }
2513
2514                 if (rc != 0 && saved_rc == 0)
2515                         saved_rc = rc;
2516
2517                 wbc->nr_to_write -= nr_pages;
2518                 if (wbc->nr_to_write <= 0)
2519                         done = true;
2520
2521                 index = next;
2522         }
2523
2524         if (!scanned && !done) {
2525                 /*
2526                  * We hit the last page and there is more work to be done: wrap
2527                  * back to the start of the file
2528                  */
2529                 scanned = true;
2530                 index = 0;
2531                 goto retry;
2532         }
2533
2534         if (saved_rc != 0)
2535                 rc = saved_rc;
2536
2537         if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0))
2538                 mapping->writeback_index = index;
2539
2540         if (cfile)
2541                 cifsFileInfo_put(cfile);
2542         free_xid(xid);
2543         /* Indication to update ctime and mtime as close is deferred */
2544         set_bit(CIFS_INO_MODIFIED_ATTR, &CIFS_I(inode)->flags);
2545         return rc;
2546 }
2547
2548 static int
2549 cifs_writepage_locked(struct page *page, struct writeback_control *wbc)
2550 {
2551         int rc;
2552         unsigned int xid;
2553
2554         xid = get_xid();
2555 /* BB add check for wbc flags */
2556         get_page(page);
2557         if (!PageUptodate(page))
2558                 cifs_dbg(FYI, "ppw - page not up to date\n");
2559
2560         /*
2561          * Set the "writeback" flag, and clear "dirty" in the radix tree.
2562          *
2563          * A writepage() implementation always needs to do either this,
2564          * or re-dirty the page with "redirty_page_for_writepage()" in
2565          * the case of a failure.
2566          *
2567          * Just unlocking the page will cause the radix tree tag-bits
2568          * to fail to update with the state of the page correctly.
2569          */
2570         set_page_writeback(page);
2571 retry_write:
2572         rc = cifs_partialpagewrite(page, 0, PAGE_SIZE);
2573         if (is_retryable_error(rc)) {
2574                 if (wbc->sync_mode == WB_SYNC_ALL && rc == -EAGAIN)
2575                         goto retry_write;
2576                 redirty_page_for_writepage(wbc, page);
2577         } else if (rc != 0) {
2578                 SetPageError(page);
2579                 mapping_set_error(page->mapping, rc);
2580         } else {
2581                 SetPageUptodate(page);
2582         }
2583         end_page_writeback(page);
2584         put_page(page);
2585         free_xid(xid);
2586         return rc;
2587 }
2588
2589 static int cifs_writepage(struct page *page, struct writeback_control *wbc)
2590 {
2591         int rc = cifs_writepage_locked(page, wbc);
2592         unlock_page(page);
2593         return rc;
2594 }
2595
2596 static int cifs_write_end(struct file *file, struct address_space *mapping,
2597                         loff_t pos, unsigned len, unsigned copied,
2598                         struct page *page, void *fsdata)
2599 {
2600         int rc;
2601         struct inode *inode = mapping->host;
2602         struct cifsFileInfo *cfile = file->private_data;
2603         struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
2604         __u32 pid;
2605
2606         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
2607                 pid = cfile->pid;
2608         else
2609                 pid = current->tgid;
2610
2611         cifs_dbg(FYI, "write_end for page %p from pos %lld with %d bytes\n",
2612                  page, pos, copied);
2613
2614         if (PageChecked(page)) {
2615                 if (copied == len)
2616                         SetPageUptodate(page);
2617                 ClearPageChecked(page);
2618         } else if (!PageUptodate(page) && copied == PAGE_SIZE)
2619                 SetPageUptodate(page);
2620
2621         if (!PageUptodate(page)) {
2622                 char *page_data;
2623                 unsigned offset = pos & (PAGE_SIZE - 1);
2624                 unsigned int xid;
2625
2626                 xid = get_xid();
2627                 /* this is probably better than directly calling
2628                    partialpage_write since in this function the file handle is
2629                    known which we might as well leverage */
2630                 /* BB check if anything else missing out of ppw
2631                    such as updating last write time */
2632                 page_data = kmap(page);
2633                 rc = cifs_write(cfile, pid, page_data + offset, copied, &pos);
2634                 /* if (rc < 0) should we set writebehind rc? */
2635                 kunmap(page);
2636
2637                 free_xid(xid);
2638         } else {
2639                 rc = copied;
2640                 pos += copied;
2641                 set_page_dirty(page);
2642         }
2643
2644         if (rc > 0) {
2645                 spin_lock(&inode->i_lock);
2646                 if (pos > inode->i_size) {
2647                         i_size_write(inode, pos);
2648                         inode->i_blocks = (512 - 1 + pos) >> 9;
2649                 }
2650                 spin_unlock(&inode->i_lock);
2651         }
2652
2653         unlock_page(page);
2654         put_page(page);
2655         /* Indication to update ctime and mtime as close is deferred */
2656         set_bit(CIFS_INO_MODIFIED_ATTR, &CIFS_I(inode)->flags);
2657
2658         return rc;
2659 }
2660
2661 int cifs_strict_fsync(struct file *file, loff_t start, loff_t end,
2662                       int datasync)
2663 {
2664         unsigned int xid;
2665         int rc = 0;
2666         struct cifs_tcon *tcon;
2667         struct TCP_Server_Info *server;
2668         struct cifsFileInfo *smbfile = file->private_data;
2669         struct inode *inode = file_inode(file);
2670         struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
2671
2672         rc = file_write_and_wait_range(file, start, end);
2673         if (rc) {
2674                 trace_cifs_fsync_err(inode->i_ino, rc);
2675                 return rc;
2676         }
2677
2678         xid = get_xid();
2679
2680         cifs_dbg(FYI, "Sync file - name: %pD datasync: 0x%x\n",
2681                  file, datasync);
2682
2683         if (!CIFS_CACHE_READ(CIFS_I(inode))) {
2684                 rc = cifs_zap_mapping(inode);
2685                 if (rc) {
2686                         cifs_dbg(FYI, "rc: %d during invalidate phase\n", rc);
2687                         rc = 0; /* don't care about it in fsync */
2688                 }
2689         }
2690
2691         tcon = tlink_tcon(smbfile->tlink);
2692         if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) {
2693                 server = tcon->ses->server;
2694                 if (server->ops->flush)
2695                         rc = server->ops->flush(xid, tcon, &smbfile->fid);
2696                 else
2697                         rc = -ENOSYS;
2698         }
2699
2700         free_xid(xid);
2701         return rc;
2702 }
2703
2704 int cifs_fsync(struct file *file, loff_t start, loff_t end, int datasync)
2705 {
2706         unsigned int xid;
2707         int rc = 0;
2708         struct cifs_tcon *tcon;
2709         struct TCP_Server_Info *server;
2710         struct cifsFileInfo *smbfile = file->private_data;
2711         struct cifs_sb_info *cifs_sb = CIFS_FILE_SB(file);
2712
2713         rc = file_write_and_wait_range(file, start, end);
2714         if (rc) {
2715                 trace_cifs_fsync_err(file_inode(file)->i_ino, rc);
2716                 return rc;
2717         }
2718
2719         xid = get_xid();
2720
2721         cifs_dbg(FYI, "Sync file - name: %pD datasync: 0x%x\n",
2722                  file, datasync);
2723
2724         tcon = tlink_tcon(smbfile->tlink);
2725         if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) {
2726                 server = tcon->ses->server;
2727                 if (server->ops->flush)
2728                         rc = server->ops->flush(xid, tcon, &smbfile->fid);
2729                 else
2730                         rc = -ENOSYS;
2731         }
2732
2733         free_xid(xid);
2734         return rc;
2735 }
2736
2737 /*
2738  * As file closes, flush all cached write data for this inode checking
2739  * for write behind errors.
2740  */
2741 int cifs_flush(struct file *file, fl_owner_t id)
2742 {
2743         struct inode *inode = file_inode(file);
2744         int rc = 0;
2745
2746         if (file->f_mode & FMODE_WRITE)
2747                 rc = filemap_write_and_wait(inode->i_mapping);
2748
2749         cifs_dbg(FYI, "Flush inode %p file %p rc %d\n", inode, file, rc);
2750         if (rc)
2751                 trace_cifs_flush_err(inode->i_ino, rc);
2752         return rc;
2753 }
2754
2755 static int
2756 cifs_write_allocate_pages(struct page **pages, unsigned long num_pages)
2757 {
2758         int rc = 0;
2759         unsigned long i;
2760
2761         for (i = 0; i < num_pages; i++) {
2762                 pages[i] = alloc_page(GFP_KERNEL|__GFP_HIGHMEM);
2763                 if (!pages[i]) {
2764                         /*
2765                          * save number of pages we have already allocated and
2766                          * return with ENOMEM error
2767                          */
2768                         num_pages = i;
2769                         rc = -ENOMEM;
2770                         break;
2771                 }
2772         }
2773
2774         if (rc) {
2775                 for (i = 0; i < num_pages; i++)
2776                         put_page(pages[i]);
2777         }
2778         return rc;
2779 }
2780
2781 static inline
2782 size_t get_numpages(const size_t wsize, const size_t len, size_t *cur_len)
2783 {
2784         size_t num_pages;
2785         size_t clen;
2786
2787         clen = min_t(const size_t, len, wsize);
2788         num_pages = DIV_ROUND_UP(clen, PAGE_SIZE);
2789
2790         if (cur_len)
2791                 *cur_len = clen;
2792
2793         return num_pages;
2794 }
2795
2796 static void
2797 cifs_uncached_writedata_release(struct kref *refcount)
2798 {
2799         int i;
2800         struct cifs_writedata *wdata = container_of(refcount,
2801                                         struct cifs_writedata, refcount);
2802
2803         kref_put(&wdata->ctx->refcount, cifs_aio_ctx_release);
2804         for (i = 0; i < wdata->nr_pages; i++)
2805                 put_page(wdata->pages[i]);
2806         cifs_writedata_release(refcount);
2807 }
2808
2809 static void collect_uncached_write_data(struct cifs_aio_ctx *ctx);
2810
2811 static void
2812 cifs_uncached_writev_complete(struct work_struct *work)
2813 {
2814         struct cifs_writedata *wdata = container_of(work,
2815                                         struct cifs_writedata, work);
2816         struct inode *inode = d_inode(wdata->cfile->dentry);
2817         struct cifsInodeInfo *cifsi = CIFS_I(inode);
2818
2819         spin_lock(&inode->i_lock);
2820         cifs_update_eof(cifsi, wdata->offset, wdata->bytes);
2821         if (cifsi->server_eof > inode->i_size)
2822                 i_size_write(inode, cifsi->server_eof);
2823         spin_unlock(&inode->i_lock);
2824
2825         complete(&wdata->done);
2826         collect_uncached_write_data(wdata->ctx);
2827         /* the below call can possibly free the last ref to aio ctx */
2828         kref_put(&wdata->refcount, cifs_uncached_writedata_release);
2829 }
2830
2831 static int
2832 wdata_fill_from_iovec(struct cifs_writedata *wdata, struct iov_iter *from,
2833                       size_t *len, unsigned long *num_pages)
2834 {
2835         size_t save_len, copied, bytes, cur_len = *len;
2836         unsigned long i, nr_pages = *num_pages;
2837
2838         save_len = cur_len;
2839         for (i = 0; i < nr_pages; i++) {
2840                 bytes = min_t(const size_t, cur_len, PAGE_SIZE);
2841                 copied = copy_page_from_iter(wdata->pages[i], 0, bytes, from);
2842                 cur_len -= copied;
2843                 /*
2844                  * If we didn't copy as much as we expected, then that
2845                  * may mean we trod into an unmapped area. Stop copying
2846                  * at that point. On the next pass through the big
2847                  * loop, we'll likely end up getting a zero-length
2848                  * write and bailing out of it.
2849                  */
2850                 if (copied < bytes)
2851                         break;
2852         }
2853         cur_len = save_len - cur_len;
2854         *len = cur_len;
2855
2856         /*
2857          * If we have no data to send, then that probably means that
2858          * the copy above failed altogether. That's most likely because
2859          * the address in the iovec was bogus. Return -EFAULT and let
2860          * the caller free anything we allocated and bail out.
2861          */
2862         if (!cur_len)
2863                 return -EFAULT;
2864
2865         /*
2866          * i + 1 now represents the number of pages we actually used in
2867          * the copy phase above.
2868          */
2869         *num_pages = i + 1;
2870         return 0;
2871 }
2872
2873 static int
2874 cifs_resend_wdata(struct cifs_writedata *wdata, struct list_head *wdata_list,
2875         struct cifs_aio_ctx *ctx)
2876 {
2877         unsigned int wsize;
2878         struct cifs_credits credits;
2879         int rc;
2880         struct TCP_Server_Info *server = wdata->server;
2881
2882         do {
2883                 if (wdata->cfile->invalidHandle) {
2884                         rc = cifs_reopen_file(wdata->cfile, false);
2885                         if (rc == -EAGAIN)
2886                                 continue;
2887                         else if (rc)
2888                                 break;
2889                 }
2890
2891
2892                 /*
2893                  * Wait for credits to resend this wdata.
2894                  * Note: we are attempting to resend the whole wdata not in
2895                  * segments
2896                  */
2897                 do {
2898                         rc = server->ops->wait_mtu_credits(server, wdata->bytes,
2899                                                 &wsize, &credits);
2900                         if (rc)
2901                                 goto fail;
2902
2903                         if (wsize < wdata->bytes) {
2904                                 add_credits_and_wake_if(server, &credits, 0);
2905                                 msleep(1000);
2906                         }
2907                 } while (wsize < wdata->bytes);
2908                 wdata->credits = credits;
2909
2910                 rc = adjust_credits(server, &wdata->credits, wdata->bytes);
2911
2912                 if (!rc) {
2913                         if (wdata->cfile->invalidHandle)
2914                                 rc = -EAGAIN;
2915                         else {
2916 #ifdef CONFIG_CIFS_SMB_DIRECT
2917                                 if (wdata->mr) {
2918                                         wdata->mr->need_invalidate = true;
2919                                         smbd_deregister_mr(wdata->mr);
2920                                         wdata->mr = NULL;
2921                                 }
2922 #endif
2923                                 rc = server->ops->async_writev(wdata,
2924                                         cifs_uncached_writedata_release);
2925                         }
2926                 }
2927
2928                 /* If the write was successfully sent, we are done */
2929                 if (!rc) {
2930                         list_add_tail(&wdata->list, wdata_list);
2931                         return 0;
2932                 }
2933
2934                 /* Roll back credits and retry if needed */
2935                 add_credits_and_wake_if(server, &wdata->credits, 0);
2936         } while (rc == -EAGAIN);
2937
2938 fail:
2939         kref_put(&wdata->refcount, cifs_uncached_writedata_release);
2940         return rc;
2941 }
2942
2943 static int
2944 cifs_write_from_iter(loff_t offset, size_t len, struct iov_iter *from,
2945                      struct cifsFileInfo *open_file,
2946                      struct cifs_sb_info *cifs_sb, struct list_head *wdata_list,
2947                      struct cifs_aio_ctx *ctx)
2948 {
2949         int rc = 0;
2950         size_t cur_len;
2951         unsigned long nr_pages, num_pages, i;
2952         struct cifs_writedata *wdata;
2953         struct iov_iter saved_from = *from;
2954         loff_t saved_offset = offset;
2955         pid_t pid;
2956         struct TCP_Server_Info *server;
2957         struct page **pagevec;
2958         size_t start;
2959         unsigned int xid;
2960
2961         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
2962                 pid = open_file->pid;
2963         else
2964                 pid = current->tgid;
2965
2966         server = cifs_pick_channel(tlink_tcon(open_file->tlink)->ses);
2967         xid = get_xid();
2968
2969         do {
2970                 unsigned int wsize;
2971                 struct cifs_credits credits_on_stack;
2972                 struct cifs_credits *credits = &credits_on_stack;
2973
2974                 if (open_file->invalidHandle) {
2975                         rc = cifs_reopen_file(open_file, false);
2976                         if (rc == -EAGAIN)
2977                                 continue;
2978                         else if (rc)
2979                                 break;
2980                 }
2981
2982                 rc = server->ops->wait_mtu_credits(server, cifs_sb->ctx->wsize,
2983                                                    &wsize, credits);
2984                 if (rc)
2985                         break;
2986
2987                 cur_len = min_t(const size_t, len, wsize);
2988
2989                 if (ctx->direct_io) {
2990                         ssize_t result;
2991
2992                         result = iov_iter_get_pages_alloc(
2993                                 from, &pagevec, cur_len, &start);
2994                         if (result < 0) {
2995                                 cifs_dbg(VFS,
2996                                          "direct_writev couldn't get user pages (rc=%zd) iter type %d iov_offset %zd count %zd\n",
2997                                          result, iov_iter_type(from),
2998                                          from->iov_offset, from->count);
2999                                 dump_stack();
3000
3001                                 rc = result;
3002                                 add_credits_and_wake_if(server, credits, 0);
3003                                 break;
3004                         }
3005                         cur_len = (size_t)result;
3006                         iov_iter_advance(from, cur_len);
3007
3008                         nr_pages =
3009                                 (cur_len + start + PAGE_SIZE - 1) / PAGE_SIZE;
3010
3011                         wdata = cifs_writedata_direct_alloc(pagevec,
3012                                              cifs_uncached_writev_complete);
3013                         if (!wdata) {
3014                                 rc = -ENOMEM;
3015                                 add_credits_and_wake_if(server, credits, 0);
3016                                 break;
3017                         }
3018
3019
3020                         wdata->page_offset = start;
3021                         wdata->tailsz =
3022                                 nr_pages > 1 ?
3023                                         cur_len - (PAGE_SIZE - start) -
3024                                         (nr_pages - 2) * PAGE_SIZE :
3025                                         cur_len;
3026                 } else {
3027                         nr_pages = get_numpages(wsize, len, &cur_len);
3028                         wdata = cifs_writedata_alloc(nr_pages,
3029                                              cifs_uncached_writev_complete);
3030                         if (!wdata) {
3031                                 rc = -ENOMEM;
3032                                 add_credits_and_wake_if(server, credits, 0);
3033                                 break;
3034                         }
3035
3036                         rc = cifs_write_allocate_pages(wdata->pages, nr_pages);
3037                         if (rc) {
3038                                 kvfree(wdata->pages);
3039                                 kfree(wdata);
3040                                 add_credits_and_wake_if(server, credits, 0);
3041                                 break;
3042                         }
3043
3044                         num_pages = nr_pages;
3045                         rc = wdata_fill_from_iovec(
3046                                 wdata, from, &cur_len, &num_pages);
3047                         if (rc) {
3048                                 for (i = 0; i < nr_pages; i++)
3049                                         put_page(wdata->pages[i]);
3050                                 kvfree(wdata->pages);
3051                                 kfree(wdata);
3052                                 add_credits_and_wake_if(server, credits, 0);
3053                                 break;
3054                         }
3055
3056                         /*
3057                          * Bring nr_pages down to the number of pages we
3058                          * actually used, and free any pages that we didn't use.
3059                          */
3060                         for ( ; nr_pages > num_pages; nr_pages--)
3061                                 put_page(wdata->pages[nr_pages - 1]);
3062
3063                         wdata->tailsz = cur_len - ((nr_pages - 1) * PAGE_SIZE);
3064                 }
3065
3066                 wdata->sync_mode = WB_SYNC_ALL;
3067                 wdata->nr_pages = nr_pages;
3068                 wdata->offset = (__u64)offset;
3069                 wdata->cfile = cifsFileInfo_get(open_file);
3070                 wdata->server = server;
3071                 wdata->pid = pid;
3072                 wdata->bytes = cur_len;
3073                 wdata->pagesz = PAGE_SIZE;
3074                 wdata->credits = credits_on_stack;
3075                 wdata->ctx = ctx;
3076                 kref_get(&ctx->refcount);
3077
3078                 rc = adjust_credits(server, &wdata->credits, wdata->bytes);
3079
3080                 if (!rc) {
3081                         if (wdata->cfile->invalidHandle)
3082                                 rc = -EAGAIN;
3083                         else
3084                                 rc = server->ops->async_writev(wdata,
3085                                         cifs_uncached_writedata_release);
3086                 }
3087
3088                 if (rc) {
3089                         add_credits_and_wake_if(server, &wdata->credits, 0);
3090                         kref_put(&wdata->refcount,
3091                                  cifs_uncached_writedata_release);
3092                         if (rc == -EAGAIN) {
3093                                 *from = saved_from;
3094                                 iov_iter_advance(from, offset - saved_offset);
3095                                 continue;
3096                         }
3097                         break;
3098                 }
3099
3100                 list_add_tail(&wdata->list, wdata_list);
3101                 offset += cur_len;
3102                 len -= cur_len;
3103         } while (len > 0);
3104
3105         free_xid(xid);
3106         return rc;
3107 }
3108
3109 static void collect_uncached_write_data(struct cifs_aio_ctx *ctx)
3110 {
3111         struct cifs_writedata *wdata, *tmp;
3112         struct cifs_tcon *tcon;
3113         struct cifs_sb_info *cifs_sb;
3114         struct dentry *dentry = ctx->cfile->dentry;
3115         int rc;
3116
3117         tcon = tlink_tcon(ctx->cfile->tlink);
3118         cifs_sb = CIFS_SB(dentry->d_sb);
3119
3120         mutex_lock(&ctx->aio_mutex);
3121
3122         if (list_empty(&ctx->list)) {
3123                 mutex_unlock(&ctx->aio_mutex);
3124                 return;
3125         }
3126
3127         rc = ctx->rc;
3128         /*
3129          * Wait for and collect replies for any successful sends in order of
3130          * increasing offset. Once an error is hit, then return without waiting
3131          * for any more replies.
3132          */
3133 restart_loop:
3134         list_for_each_entry_safe(wdata, tmp, &ctx->list, list) {
3135                 if (!rc) {
3136                         if (!try_wait_for_completion(&wdata->done)) {
3137                                 mutex_unlock(&ctx->aio_mutex);
3138                                 return;
3139                         }
3140
3141                         if (wdata->result)
3142                                 rc = wdata->result;
3143                         else
3144                                 ctx->total_len += wdata->bytes;
3145
3146                         /* resend call if it's a retryable error */
3147                         if (rc == -EAGAIN) {
3148                                 struct list_head tmp_list;
3149                                 struct iov_iter tmp_from = ctx->iter;
3150
3151                                 INIT_LIST_HEAD(&tmp_list);
3152                                 list_del_init(&wdata->list);
3153
3154                                 if (ctx->direct_io)
3155                                         rc = cifs_resend_wdata(
3156                                                 wdata, &tmp_list, ctx);
3157                                 else {
3158                                         iov_iter_advance(&tmp_from,
3159                                                  wdata->offset - ctx->pos);
3160
3161                                         rc = cifs_write_from_iter(wdata->offset,
3162                                                 wdata->bytes, &tmp_from,
3163                                                 ctx->cfile, cifs_sb, &tmp_list,
3164                                                 ctx);
3165
3166                                         kref_put(&wdata->refcount,
3167                                                 cifs_uncached_writedata_release);
3168                                 }
3169
3170                                 list_splice(&tmp_list, &ctx->list);
3171                                 goto restart_loop;
3172                         }
3173                 }
3174                 list_del_init(&wdata->list);
3175                 kref_put(&wdata->refcount, cifs_uncached_writedata_release);
3176         }
3177
3178         cifs_stats_bytes_written(tcon, ctx->total_len);
3179         set_bit(CIFS_INO_INVALID_MAPPING, &CIFS_I(dentry->d_inode)->flags);
3180
3181         ctx->rc = (rc == 0) ? ctx->total_len : rc;
3182
3183         mutex_unlock(&ctx->aio_mutex);
3184
3185         if (ctx->iocb && ctx->iocb->ki_complete)
3186                 ctx->iocb->ki_complete(ctx->iocb, ctx->rc, 0);
3187         else
3188                 complete(&ctx->done);
3189 }
3190
3191 static ssize_t __cifs_writev(
3192         struct kiocb *iocb, struct iov_iter *from, bool direct)
3193 {
3194         struct file *file = iocb->ki_filp;
3195         ssize_t total_written = 0;
3196         struct cifsFileInfo *cfile;
3197         struct cifs_tcon *tcon;
3198         struct cifs_sb_info *cifs_sb;
3199         struct cifs_aio_ctx *ctx;
3200         struct iov_iter saved_from = *from;
3201         size_t len = iov_iter_count(from);
3202         int rc;
3203
3204         /*
3205          * iov_iter_get_pages_alloc doesn't work with ITER_KVEC.
3206          * In this case, fall back to non-direct write function.
3207          * this could be improved by getting pages directly in ITER_KVEC
3208          */
3209         if (direct && iov_iter_is_kvec(from)) {
3210                 cifs_dbg(FYI, "use non-direct cifs_writev for kvec I/O\n");
3211                 direct = false;
3212         }
3213
3214         rc = generic_write_checks(iocb, from);
3215         if (rc <= 0)
3216                 return rc;
3217
3218         cifs_sb = CIFS_FILE_SB(file);
3219         cfile = file->private_data;
3220         tcon = tlink_tcon(cfile->tlink);
3221
3222         if (!tcon->ses->server->ops->async_writev)
3223                 return -ENOSYS;
3224
3225         ctx = cifs_aio_ctx_alloc();
3226         if (!ctx)
3227                 return -ENOMEM;
3228
3229         ctx->cfile = cifsFileInfo_get(cfile);
3230
3231         if (!is_sync_kiocb(iocb))
3232                 ctx->iocb = iocb;
3233
3234         ctx->pos = iocb->ki_pos;
3235
3236         if (direct) {
3237                 ctx->direct_io = true;
3238                 ctx->iter = *from;
3239                 ctx->len = len;
3240         } else {
3241                 rc = setup_aio_ctx_iter(ctx, from, WRITE);
3242                 if (rc) {
3243                         kref_put(&ctx->refcount, cifs_aio_ctx_release);
3244                         return rc;
3245                 }
3246         }
3247
3248         /* grab a lock here due to read response handlers can access ctx */
3249         mutex_lock(&ctx->aio_mutex);
3250
3251         rc = cifs_write_from_iter(iocb->ki_pos, ctx->len, &saved_from,
3252                                   cfile, cifs_sb, &ctx->list, ctx);
3253
3254         /*
3255          * If at least one write was successfully sent, then discard any rc
3256          * value from the later writes. If the other write succeeds, then
3257          * we'll end up returning whatever was written. If it fails, then
3258          * we'll get a new rc value from that.
3259          */
3260         if (!list_empty(&ctx->list))
3261                 rc = 0;
3262
3263         mutex_unlock(&ctx->aio_mutex);
3264
3265         if (rc) {
3266                 kref_put(&ctx->refcount, cifs_aio_ctx_release);
3267                 return rc;
3268         }
3269
3270         if (!is_sync_kiocb(iocb)) {
3271                 kref_put(&ctx->refcount, cifs_aio_ctx_release);
3272                 return -EIOCBQUEUED;
3273         }
3274
3275         rc = wait_for_completion_killable(&ctx->done);
3276         if (rc) {
3277                 mutex_lock(&ctx->aio_mutex);
3278                 ctx->rc = rc = -EINTR;
3279                 total_written = ctx->total_len;
3280                 mutex_unlock(&ctx->aio_mutex);
3281         } else {
3282                 rc = ctx->rc;
3283                 total_written = ctx->total_len;
3284         }
3285
3286         kref_put(&ctx->refcount, cifs_aio_ctx_release);
3287
3288         if (unlikely(!total_written))
3289                 return rc;
3290
3291         iocb->ki_pos += total_written;
3292         return total_written;
3293 }
3294
3295 ssize_t cifs_direct_writev(struct kiocb *iocb, struct iov_iter *from)
3296 {
3297         return __cifs_writev(iocb, from, true);
3298 }
3299
3300 ssize_t cifs_user_writev(struct kiocb *iocb, struct iov_iter *from)
3301 {
3302         return __cifs_writev(iocb, from, false);
3303 }
3304
3305 static ssize_t
3306 cifs_writev(struct kiocb *iocb, struct iov_iter *from)
3307 {
3308         struct file *file = iocb->ki_filp;
3309         struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
3310         struct inode *inode = file->f_mapping->host;
3311         struct cifsInodeInfo *cinode = CIFS_I(inode);
3312         struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
3313         ssize_t rc;
3314
3315         inode_lock(inode);
3316         /*
3317          * We need to hold the sem to be sure nobody modifies lock list
3318          * with a brlock that prevents writing.
3319          */
3320         down_read(&cinode->lock_sem);
3321
3322         rc = generic_write_checks(iocb, from);
3323         if (rc <= 0)
3324                 goto out;
3325
3326         if (!cifs_find_lock_conflict(cfile, iocb->ki_pos, iov_iter_count(from),
3327                                      server->vals->exclusive_lock_type, 0,
3328                                      NULL, CIFS_WRITE_OP))
3329                 rc = __generic_file_write_iter(iocb, from);
3330         else
3331                 rc = -EACCES;
3332 out:
3333         up_read(&cinode->lock_sem);
3334         inode_unlock(inode);
3335
3336         if (rc > 0)
3337                 rc = generic_write_sync(iocb, rc);
3338         return rc;
3339 }
3340
3341 ssize_t
3342 cifs_strict_writev(struct kiocb *iocb, struct iov_iter *from)
3343 {
3344         struct inode *inode = file_inode(iocb->ki_filp);
3345         struct cifsInodeInfo *cinode = CIFS_I(inode);
3346         struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
3347         struct cifsFileInfo *cfile = (struct cifsFileInfo *)
3348                                                 iocb->ki_filp->private_data;
3349         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
3350         ssize_t written;
3351
3352         written = cifs_get_writer(cinode);
3353         if (written)
3354                 return written;
3355
3356         if (CIFS_CACHE_WRITE(cinode)) {
3357                 if (cap_unix(tcon->ses) &&
3358                 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability))
3359                   && ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0)) {
3360                         written = generic_file_write_iter(iocb, from);
3361                         goto out;
3362                 }
3363                 written = cifs_writev(iocb, from);
3364                 goto out;
3365         }
3366         /*
3367          * For non-oplocked files in strict cache mode we need to write the data
3368          * to the server exactly from the pos to pos+len-1 rather than flush all
3369          * affected pages because it may cause a error with mandatory locks on
3370          * these pages but not on the region from pos to ppos+len-1.
3371          */
3372         written = cifs_user_writev(iocb, from);
3373         if (CIFS_CACHE_READ(cinode)) {
3374                 /*
3375                  * We have read level caching and we have just sent a write
3376                  * request to the server thus making data in the cache stale.
3377                  * Zap the cache and set oplock/lease level to NONE to avoid
3378                  * reading stale data from the cache. All subsequent read
3379                  * operations will read new data from the server.
3380                  */
3381                 cifs_zap_mapping(inode);
3382                 cifs_dbg(FYI, "Set Oplock/Lease to NONE for inode=%p after write\n",
3383                          inode);
3384                 cinode->oplock = 0;
3385         }
3386 out:
3387         cifs_put_writer(cinode);
3388         return written;
3389 }
3390
3391 static struct cifs_readdata *
3392 cifs_readdata_direct_alloc(struct page **pages, work_func_t complete)
3393 {
3394         struct cifs_readdata *rdata;
3395
3396         rdata = kzalloc(sizeof(*rdata), GFP_KERNEL);
3397         if (rdata != NULL) {
3398                 rdata->pages = pages;
3399                 kref_init(&rdata->refcount);
3400                 INIT_LIST_HEAD(&rdata->list);
3401                 init_completion(&rdata->done);
3402                 INIT_WORK(&rdata->work, complete);
3403         }
3404
3405         return rdata;
3406 }
3407
3408 static struct cifs_readdata *
3409 cifs_readdata_alloc(unsigned int nr_pages, work_func_t complete)
3410 {
3411         struct page **pages =
3412                 kcalloc(nr_pages, sizeof(struct page *), GFP_KERNEL);
3413         struct cifs_readdata *ret = NULL;
3414
3415         if (pages) {
3416                 ret = cifs_readdata_direct_alloc(pages, complete);
3417                 if (!ret)
3418                         kfree(pages);
3419         }
3420
3421         return ret;
3422 }
3423
3424 void
3425 cifs_readdata_release(struct kref *refcount)
3426 {
3427         struct cifs_readdata *rdata = container_of(refcount,
3428                                         struct cifs_readdata, refcount);
3429 #ifdef CONFIG_CIFS_SMB_DIRECT
3430         if (rdata->mr) {
3431                 smbd_deregister_mr(rdata->mr);
3432                 rdata->mr = NULL;
3433         }
3434 #endif
3435         if (rdata->cfile)
3436                 cifsFileInfo_put(rdata->cfile);
3437
3438         kvfree(rdata->pages);
3439         kfree(rdata);
3440 }
3441
3442 static int
3443 cifs_read_allocate_pages(struct cifs_readdata *rdata, unsigned int nr_pages)
3444 {
3445         int rc = 0;
3446         struct page *page;
3447         unsigned int i;
3448
3449         for (i = 0; i < nr_pages; i++) {
3450                 page = alloc_page(GFP_KERNEL|__GFP_HIGHMEM);
3451                 if (!page) {
3452                         rc = -ENOMEM;
3453                         break;
3454                 }
3455                 rdata->pages[i] = page;
3456         }
3457
3458         if (rc) {
3459                 unsigned int nr_page_failed = i;
3460
3461                 for (i = 0; i < nr_page_failed; i++) {
3462                         put_page(rdata->pages[i]);
3463                         rdata->pages[i] = NULL;
3464                 }
3465         }
3466         return rc;
3467 }
3468
3469 static void
3470 cifs_uncached_readdata_release(struct kref *refcount)
3471 {
3472         struct cifs_readdata *rdata = container_of(refcount,
3473                                         struct cifs_readdata, refcount);
3474         unsigned int i;
3475
3476         kref_put(&rdata->ctx->refcount, cifs_aio_ctx_release);
3477         for (i = 0; i < rdata->nr_pages; i++) {
3478                 put_page(rdata->pages[i]);
3479         }
3480         cifs_readdata_release(refcount);
3481 }
3482
3483 /**
3484  * cifs_readdata_to_iov - copy data from pages in response to an iovec
3485  * @rdata:      the readdata response with list of pages holding data
3486  * @iter:       destination for our data
3487  *
3488  * This function copies data from a list of pages in a readdata response into
3489  * an array of iovecs. It will first calculate where the data should go
3490  * based on the info in the readdata and then copy the data into that spot.
3491  */
3492 static int
3493 cifs_readdata_to_iov(struct cifs_readdata *rdata, struct iov_iter *iter)
3494 {
3495         size_t remaining = rdata->got_bytes;
3496         unsigned int i;
3497
3498         for (i = 0; i < rdata->nr_pages; i++) {
3499                 struct page *page = rdata->pages[i];
3500                 size_t copy = min_t(size_t, remaining, PAGE_SIZE);
3501                 size_t written;
3502
3503                 if (unlikely(iov_iter_is_pipe(iter))) {
3504                         void *addr = kmap_atomic(page);
3505
3506                         written = copy_to_iter(addr, copy, iter);
3507                         kunmap_atomic(addr);
3508                 } else
3509                         written = copy_page_to_iter(page, 0, copy, iter);
3510                 remaining -= written;
3511                 if (written < copy && iov_iter_count(iter) > 0)
3512                         break;
3513         }
3514         return remaining ? -EFAULT : 0;
3515 }
3516
3517 static void collect_uncached_read_data(struct cifs_aio_ctx *ctx);
3518
3519 static void
3520 cifs_uncached_readv_complete(struct work_struct *work)
3521 {
3522         struct cifs_readdata *rdata = container_of(work,
3523                                                 struct cifs_readdata, work);
3524
3525         complete(&rdata->done);
3526         collect_uncached_read_data(rdata->ctx);
3527         /* the below call can possibly free the last ref to aio ctx */
3528         kref_put(&rdata->refcount, cifs_uncached_readdata_release);
3529 }
3530
3531 static int
3532 uncached_fill_pages(struct TCP_Server_Info *server,
3533                     struct cifs_readdata *rdata, struct iov_iter *iter,
3534                     unsigned int len)
3535 {
3536         int result = 0;
3537         unsigned int i;
3538         unsigned int nr_pages = rdata->nr_pages;
3539         unsigned int page_offset = rdata->page_offset;
3540
3541         rdata->got_bytes = 0;
3542         rdata->tailsz = PAGE_SIZE;
3543         for (i = 0; i < nr_pages; i++) {
3544                 struct page *page = rdata->pages[i];
3545                 size_t n;
3546                 unsigned int segment_size = rdata->pagesz;
3547
3548                 if (i == 0)
3549                         segment_size -= page_offset;
3550                 else
3551                         page_offset = 0;
3552
3553
3554                 if (len <= 0) {
3555                         /* no need to hold page hostage */
3556                         rdata->pages[i] = NULL;
3557                         rdata->nr_pages--;
3558                         put_page(page);
3559                         continue;
3560                 }
3561
3562                 n = len;
3563                 if (len >= segment_size)
3564                         /* enough data to fill the page */
3565                         n = segment_size;
3566                 else
3567                         rdata->tailsz = len;
3568                 len -= n;
3569
3570                 if (iter)
3571                         result = copy_page_from_iter(
3572                                         page, page_offset, n, iter);
3573 #ifdef CONFIG_CIFS_SMB_DIRECT
3574                 else if (rdata->mr)
3575                         result = n;
3576 #endif
3577                 else
3578                         result = cifs_read_page_from_socket(
3579                                         server, page, page_offset, n);
3580                 if (result < 0)
3581                         break;
3582
3583                 rdata->got_bytes += result;
3584         }
3585
3586         return rdata->got_bytes > 0 && result != -ECONNABORTED ?
3587                                                 rdata->got_bytes : result;
3588 }
3589
3590 static int
3591 cifs_uncached_read_into_pages(struct TCP_Server_Info *server,
3592                               struct cifs_readdata *rdata, unsigned int len)
3593 {
3594         return uncached_fill_pages(server, rdata, NULL, len);
3595 }
3596
3597 static int
3598 cifs_uncached_copy_into_pages(struct TCP_Server_Info *server,
3599                               struct cifs_readdata *rdata,
3600                               struct iov_iter *iter)
3601 {
3602         return uncached_fill_pages(server, rdata, iter, iter->count);
3603 }
3604
3605 static int cifs_resend_rdata(struct cifs_readdata *rdata,
3606                         struct list_head *rdata_list,
3607                         struct cifs_aio_ctx *ctx)
3608 {
3609         unsigned int rsize;
3610         struct cifs_credits credits;
3611         int rc;
3612         struct TCP_Server_Info *server;
3613
3614         /* XXX: should we pick a new channel here? */
3615         server = rdata->server;
3616
3617         do {
3618                 if (rdata->cfile->invalidHandle) {
3619                         rc = cifs_reopen_file(rdata->cfile, true);
3620                         if (rc == -EAGAIN)
3621                                 continue;
3622                         else if (rc)
3623                                 break;
3624                 }
3625
3626                 /*
3627                  * Wait for credits to resend this rdata.
3628                  * Note: we are attempting to resend the whole rdata not in
3629                  * segments
3630                  */
3631                 do {
3632                         rc = server->ops->wait_mtu_credits(server, rdata->bytes,
3633                                                 &rsize, &credits);
3634
3635                         if (rc)
3636                                 goto fail;
3637
3638                         if (rsize < rdata->bytes) {
3639                                 add_credits_and_wake_if(server, &credits, 0);
3640                                 msleep(1000);
3641                         }
3642                 } while (rsize < rdata->bytes);
3643                 rdata->credits = credits;
3644
3645                 rc = adjust_credits(server, &rdata->credits, rdata->bytes);
3646                 if (!rc) {
3647                         if (rdata->cfile->invalidHandle)
3648                                 rc = -EAGAIN;
3649                         else {
3650 #ifdef CONFIG_CIFS_SMB_DIRECT
3651                                 if (rdata->mr) {
3652                                         rdata->mr->need_invalidate = true;
3653                                         smbd_deregister_mr(rdata->mr);
3654                                         rdata->mr = NULL;
3655                                 }
3656 #endif
3657                                 rc = server->ops->async_readv(rdata);
3658                         }
3659                 }
3660
3661                 /* If the read was successfully sent, we are done */
3662                 if (!rc) {
3663                         /* Add to aio pending list */
3664                         list_add_tail(&rdata->list, rdata_list);
3665                         return 0;
3666                 }
3667
3668                 /* Roll back credits and retry if needed */
3669                 add_credits_and_wake_if(server, &rdata->credits, 0);
3670         } while (rc == -EAGAIN);
3671
3672 fail:
3673         kref_put(&rdata->refcount, cifs_uncached_readdata_release);
3674         return rc;
3675 }
3676
3677 static int
3678 cifs_send_async_read(loff_t offset, size_t len, struct cifsFileInfo *open_file,
3679                      struct cifs_sb_info *cifs_sb, struct list_head *rdata_list,
3680                      struct cifs_aio_ctx *ctx)
3681 {
3682         struct cifs_readdata *rdata;
3683         unsigned int npages, rsize;
3684         struct cifs_credits credits_on_stack;
3685         struct cifs_credits *credits = &credits_on_stack;
3686         size_t cur_len;
3687         int rc;
3688         pid_t pid;
3689         struct TCP_Server_Info *server;
3690         struct page **pagevec;
3691         size_t start;
3692         struct iov_iter direct_iov = ctx->iter;
3693
3694         server = cifs_pick_channel(tlink_tcon(open_file->tlink)->ses);
3695
3696         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
3697                 pid = open_file->pid;
3698         else
3699                 pid = current->tgid;
3700
3701         if (ctx->direct_io)
3702                 iov_iter_advance(&direct_iov, offset - ctx->pos);
3703
3704         do {
3705                 if (open_file->invalidHandle) {
3706                         rc = cifs_reopen_file(open_file, true);
3707                         if (rc == -EAGAIN)
3708                                 continue;
3709                         else if (rc)
3710                                 break;
3711                 }
3712
3713                 rc = server->ops->wait_mtu_credits(server, cifs_sb->ctx->rsize,
3714                                                    &rsize, credits);
3715                 if (rc)
3716                         break;
3717
3718                 cur_len = min_t(const size_t, len, rsize);
3719
3720                 if (ctx->direct_io) {
3721                         ssize_t result;
3722
3723                         result = iov_iter_get_pages_alloc(
3724                                         &direct_iov, &pagevec,
3725                                         cur_len, &start);
3726                         if (result < 0) {
3727                                 cifs_dbg(VFS,
3728                                          "Couldn't get user pages (rc=%zd) iter type %d iov_offset %zd count %zd\n",
3729                                          result, iov_iter_type(&direct_iov),
3730                                          direct_iov.iov_offset,
3731                                          direct_iov.count);
3732                                 dump_stack();
3733
3734                                 rc = result;
3735                                 add_credits_and_wake_if(server, credits, 0);
3736                                 break;
3737                         }
3738                         cur_len = (size_t)result;
3739                         iov_iter_advance(&direct_iov, cur_len);
3740
3741                         rdata = cifs_readdata_direct_alloc(
3742                                         pagevec, cifs_uncached_readv_complete);
3743                         if (!rdata) {
3744                                 add_credits_and_wake_if(server, credits, 0);
3745                                 rc = -ENOMEM;
3746                                 break;
3747                         }
3748
3749                         npages = (cur_len + start + PAGE_SIZE-1) / PAGE_SIZE;
3750                         rdata->page_offset = start;
3751                         rdata->tailsz = npages > 1 ?
3752                                 cur_len-(PAGE_SIZE-start)-(npages-2)*PAGE_SIZE :
3753                                 cur_len;
3754
3755                 } else {
3756
3757                         npages = DIV_ROUND_UP(cur_len, PAGE_SIZE);
3758                         /* allocate a readdata struct */
3759                         rdata = cifs_readdata_alloc(npages,
3760                                             cifs_uncached_readv_complete);
3761                         if (!rdata) {
3762                                 add_credits_and_wake_if(server, credits, 0);
3763                                 rc = -ENOMEM;
3764                                 break;
3765                         }
3766
3767                         rc = cifs_read_allocate_pages(rdata, npages);
3768                         if (rc) {
3769                                 kvfree(rdata->pages);
3770                                 kfree(rdata);
3771                                 add_credits_and_wake_if(server, credits, 0);
3772                                 break;
3773                         }
3774
3775                         rdata->tailsz = PAGE_SIZE;
3776                 }
3777
3778                 rdata->server = server;
3779                 rdata->cfile = cifsFileInfo_get(open_file);
3780                 rdata->nr_pages = npages;
3781                 rdata->offset = offset;
3782                 rdata->bytes = cur_len;
3783                 rdata->pid = pid;
3784                 rdata->pagesz = PAGE_SIZE;
3785                 rdata->read_into_pages = cifs_uncached_read_into_pages;
3786                 rdata->copy_into_pages = cifs_uncached_copy_into_pages;
3787                 rdata->credits = credits_on_stack;
3788                 rdata->ctx = ctx;
3789                 kref_get(&ctx->refcount);
3790
3791                 rc = adjust_credits(server, &rdata->credits, rdata->bytes);
3792
3793                 if (!rc) {
3794                         if (rdata->cfile->invalidHandle)
3795                                 rc = -EAGAIN;
3796                         else
3797                                 rc = server->ops->async_readv(rdata);
3798                 }
3799
3800                 if (rc) {
3801                         add_credits_and_wake_if(server, &rdata->credits, 0);
3802                         kref_put(&rdata->refcount,
3803                                 cifs_uncached_readdata_release);
3804                         if (rc == -EAGAIN) {
3805                                 iov_iter_revert(&direct_iov, cur_len);
3806                                 continue;
3807                         }
3808                         break;
3809                 }
3810
3811                 list_add_tail(&rdata->list, rdata_list);
3812                 offset += cur_len;
3813                 len -= cur_len;
3814         } while (len > 0);
3815
3816         return rc;
3817 }
3818
3819 static void
3820 collect_uncached_read_data(struct cifs_aio_ctx *ctx)
3821 {
3822         struct cifs_readdata *rdata, *tmp;
3823         struct iov_iter *to = &ctx->iter;
3824         struct cifs_sb_info *cifs_sb;
3825         int rc;
3826
3827         cifs_sb = CIFS_SB(ctx->cfile->dentry->d_sb);
3828
3829         mutex_lock(&ctx->aio_mutex);
3830
3831         if (list_empty(&ctx->list)) {
3832                 mutex_unlock(&ctx->aio_mutex);
3833                 return;
3834         }
3835
3836         rc = ctx->rc;
3837         /* the loop below should proceed in the order of increasing offsets */
3838 again:
3839         list_for_each_entry_safe(rdata, tmp, &ctx->list, list) {
3840                 if (!rc) {
3841                         if (!try_wait_for_completion(&rdata->done)) {
3842                                 mutex_unlock(&ctx->aio_mutex);
3843                                 return;
3844                         }
3845
3846                         if (rdata->result == -EAGAIN) {
3847                                 /* resend call if it's a retryable error */
3848                                 struct list_head tmp_list;
3849                                 unsigned int got_bytes = rdata->got_bytes;
3850
3851                                 list_del_init(&rdata->list);
3852                                 INIT_LIST_HEAD(&tmp_list);
3853
3854                                 /*
3855                                  * Got a part of data and then reconnect has
3856                                  * happened -- fill the buffer and continue
3857                                  * reading.
3858                                  */
3859                                 if (got_bytes && got_bytes < rdata->bytes) {
3860                                         rc = 0;
3861                                         if (!ctx->direct_io)
3862                                                 rc = cifs_readdata_to_iov(rdata, to);
3863                                         if (rc) {
3864                                                 kref_put(&rdata->refcount,
3865                                                         cifs_uncached_readdata_release);
3866                                                 continue;
3867                                         }
3868                                 }
3869
3870                                 if (ctx->direct_io) {
3871                                         /*
3872                                          * Re-use rdata as this is a
3873                                          * direct I/O
3874                                          */
3875                                         rc = cifs_resend_rdata(
3876                                                 rdata,
3877                                                 &tmp_list, ctx);
3878                                 } else {
3879                                         rc = cifs_send_async_read(
3880                                                 rdata->offset + got_bytes,
3881                                                 rdata->bytes - got_bytes,
3882                                                 rdata->cfile, cifs_sb,
3883                                                 &tmp_list, ctx);
3884
3885                                         kref_put(&rdata->refcount,
3886                                                 cifs_uncached_readdata_release);
3887                                 }
3888
3889                                 list_splice(&tmp_list, &ctx->list);
3890
3891                                 goto again;
3892                         } else if (rdata->result)
3893                                 rc = rdata->result;
3894                         else if (!ctx->direct_io)
3895                                 rc = cifs_readdata_to_iov(rdata, to);
3896
3897                         /* if there was a short read -- discard anything left */
3898                         if (rdata->got_bytes && rdata->got_bytes < rdata->bytes)
3899                                 rc = -ENODATA;
3900
3901                         ctx->total_len += rdata->got_bytes;
3902                 }
3903                 list_del_init(&rdata->list);
3904                 kref_put(&rdata->refcount, cifs_uncached_readdata_release);
3905         }
3906
3907         if (!ctx->direct_io)
3908                 ctx->total_len = ctx->len - iov_iter_count(to);
3909
3910         /* mask nodata case */
3911         if (rc == -ENODATA)
3912                 rc = 0;
3913
3914         ctx->rc = (rc == 0) ? (ssize_t)ctx->total_len : rc;
3915
3916         mutex_unlock(&ctx->aio_mutex);
3917
3918         if (ctx->iocb && ctx->iocb->ki_complete)
3919                 ctx->iocb->ki_complete(ctx->iocb, ctx->rc, 0);
3920         else
3921                 complete(&ctx->done);
3922 }
3923
3924 static ssize_t __cifs_readv(
3925         struct kiocb *iocb, struct iov_iter *to, bool direct)
3926 {
3927         size_t len;
3928         struct file *file = iocb->ki_filp;
3929         struct cifs_sb_info *cifs_sb;
3930         struct cifsFileInfo *cfile;
3931         struct cifs_tcon *tcon;
3932         ssize_t rc, total_read = 0;
3933         loff_t offset = iocb->ki_pos;
3934         struct cifs_aio_ctx *ctx;
3935
3936         /*
3937          * iov_iter_get_pages_alloc() doesn't work with ITER_KVEC,
3938          * fall back to data copy read path
3939          * this could be improved by getting pages directly in ITER_KVEC
3940          */
3941         if (direct && iov_iter_is_kvec(to)) {
3942                 cifs_dbg(FYI, "use non-direct cifs_user_readv for kvec I/O\n");
3943                 direct = false;
3944         }
3945
3946         len = iov_iter_count(to);
3947         if (!len)
3948                 return 0;
3949
3950         cifs_sb = CIFS_FILE_SB(file);
3951         cfile = file->private_data;
3952         tcon = tlink_tcon(cfile->tlink);
3953
3954         if (!tcon->ses->server->ops->async_readv)
3955                 return -ENOSYS;
3956
3957         if ((file->f_flags & O_ACCMODE) == O_WRONLY)
3958                 cifs_dbg(FYI, "attempting read on write only file instance\n");
3959
3960         ctx = cifs_aio_ctx_alloc();
3961         if (!ctx)
3962                 return -ENOMEM;
3963
3964         ctx->cfile = cifsFileInfo_get(cfile);
3965
3966         if (!is_sync_kiocb(iocb))
3967                 ctx->iocb = iocb;
3968
3969         if (iter_is_iovec(to))
3970                 ctx->should_dirty = true;
3971
3972         if (direct) {
3973                 ctx->pos = offset;
3974                 ctx->direct_io = true;
3975                 ctx->iter = *to;
3976                 ctx->len = len;
3977         } else {
3978                 rc = setup_aio_ctx_iter(ctx, to, READ);
3979                 if (rc) {
3980                         kref_put(&ctx->refcount, cifs_aio_ctx_release);
3981                         return rc;
3982                 }
3983                 len = ctx->len;
3984         }
3985
3986         /* grab a lock here due to read response handlers can access ctx */
3987         mutex_lock(&ctx->aio_mutex);
3988
3989         rc = cifs_send_async_read(offset, len, cfile, cifs_sb, &ctx->list, ctx);
3990
3991         /* if at least one read request send succeeded, then reset rc */
3992         if (!list_empty(&ctx->list))
3993                 rc = 0;
3994
3995         mutex_unlock(&ctx->aio_mutex);
3996
3997         if (rc) {
3998                 kref_put(&ctx->refcount, cifs_aio_ctx_release);
3999                 return rc;
4000         }
4001
4002         if (!is_sync_kiocb(iocb)) {
4003                 kref_put(&ctx->refcount, cifs_aio_ctx_release);
4004                 return -EIOCBQUEUED;
4005         }
4006
4007         rc = wait_for_completion_killable(&ctx->done);
4008         if (rc) {
4009                 mutex_lock(&ctx->aio_mutex);
4010                 ctx->rc = rc = -EINTR;
4011                 total_read = ctx->total_len;
4012                 mutex_unlock(&ctx->aio_mutex);
4013         } else {
4014                 rc = ctx->rc;
4015                 total_read = ctx->total_len;
4016         }
4017
4018         kref_put(&ctx->refcount, cifs_aio_ctx_release);
4019
4020         if (total_read) {
4021                 iocb->ki_pos += total_read;
4022                 return total_read;
4023         }
4024         return rc;
4025 }
4026
4027 ssize_t cifs_direct_readv(struct kiocb *iocb, struct iov_iter *to)
4028 {
4029         return __cifs_readv(iocb, to, true);
4030 }
4031
4032 ssize_t cifs_user_readv(struct kiocb *iocb, struct iov_iter *to)
4033 {
4034         return __cifs_readv(iocb, to, false);
4035 }
4036
4037 ssize_t
4038 cifs_strict_readv(struct kiocb *iocb, struct iov_iter *to)
4039 {
4040         struct inode *inode = file_inode(iocb->ki_filp);
4041         struct cifsInodeInfo *cinode = CIFS_I(inode);
4042         struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
4043         struct cifsFileInfo *cfile = (struct cifsFileInfo *)
4044                                                 iocb->ki_filp->private_data;
4045         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
4046         int rc = -EACCES;
4047
4048         /*
4049          * In strict cache mode we need to read from the server all the time
4050          * if we don't have level II oplock because the server can delay mtime
4051          * change - so we can't make a decision about inode invalidating.
4052          * And we can also fail with pagereading if there are mandatory locks
4053          * on pages affected by this read but not on the region from pos to
4054          * pos+len-1.
4055          */
4056         if (!CIFS_CACHE_READ(cinode))
4057                 return cifs_user_readv(iocb, to);
4058
4059         if (cap_unix(tcon->ses) &&
4060             (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
4061             ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
4062                 return generic_file_read_iter(iocb, to);
4063
4064         /*
4065          * We need to hold the sem to be sure nobody modifies lock list
4066          * with a brlock that prevents reading.
4067          */
4068         down_read(&cinode->lock_sem);
4069         if (!cifs_find_lock_conflict(cfile, iocb->ki_pos, iov_iter_count(to),
4070                                      tcon->ses->server->vals->shared_lock_type,
4071                                      0, NULL, CIFS_READ_OP))
4072                 rc = generic_file_read_iter(iocb, to);
4073         up_read(&cinode->lock_sem);
4074         return rc;
4075 }
4076
4077 static ssize_t
4078 cifs_read(struct file *file, char *read_data, size_t read_size, loff_t *offset)
4079 {
4080         int rc = -EACCES;
4081         unsigned int bytes_read = 0;
4082         unsigned int total_read;
4083         unsigned int current_read_size;
4084         unsigned int rsize;
4085         struct cifs_sb_info *cifs_sb;
4086         struct cifs_tcon *tcon;
4087         struct TCP_Server_Info *server;
4088         unsigned int xid;
4089         char *cur_offset;
4090         struct cifsFileInfo *open_file;
4091         struct cifs_io_parms io_parms = {0};
4092         int buf_type = CIFS_NO_BUFFER;
4093         __u32 pid;
4094
4095         xid = get_xid();
4096         cifs_sb = CIFS_FILE_SB(file);
4097
4098         /* FIXME: set up handlers for larger reads and/or convert to async */
4099         rsize = min_t(unsigned int, cifs_sb->ctx->rsize, CIFSMaxBufSize);
4100
4101         if (file->private_data == NULL) {
4102                 rc = -EBADF;
4103                 free_xid(xid);
4104                 return rc;
4105         }
4106         open_file = file->private_data;
4107         tcon = tlink_tcon(open_file->tlink);
4108         server = cifs_pick_channel(tcon->ses);
4109
4110         if (!server->ops->sync_read) {
4111                 free_xid(xid);
4112                 return -ENOSYS;
4113         }
4114
4115         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
4116                 pid = open_file->pid;
4117         else
4118                 pid = current->tgid;
4119
4120         if ((file->f_flags & O_ACCMODE) == O_WRONLY)
4121                 cifs_dbg(FYI, "attempting read on write only file instance\n");
4122
4123         for (total_read = 0, cur_offset = read_data; read_size > total_read;
4124              total_read += bytes_read, cur_offset += bytes_read) {
4125                 do {
4126                         current_read_size = min_t(uint, read_size - total_read,
4127                                                   rsize);
4128                         /*
4129                          * For windows me and 9x we do not want to request more
4130                          * than it negotiated since it will refuse the read
4131                          * then.
4132                          */
4133                         if (!(tcon->ses->capabilities &
4134                                 tcon->ses->server->vals->cap_large_files)) {
4135                                 current_read_size = min_t(uint,
4136                                         current_read_size, CIFSMaxBufSize);
4137                         }
4138                         if (open_file->invalidHandle) {
4139                                 rc = cifs_reopen_file(open_file, true);
4140                                 if (rc != 0)
4141                                         break;
4142                         }
4143                         io_parms.pid = pid;
4144                         io_parms.tcon = tcon;
4145                         io_parms.offset = *offset;
4146                         io_parms.length = current_read_size;
4147                         io_parms.server = server;
4148                         rc = server->ops->sync_read(xid, &open_file->fid, &io_parms,
4149                                                     &bytes_read, &cur_offset,
4150                                                     &buf_type);
4151                 } while (rc == -EAGAIN);
4152
4153                 if (rc || (bytes_read == 0)) {
4154                         if (total_read) {
4155                                 break;
4156                         } else {
4157                                 free_xid(xid);
4158                                 return rc;
4159                         }
4160                 } else {
4161                         cifs_stats_bytes_read(tcon, total_read);
4162                         *offset += bytes_read;
4163                 }
4164         }
4165         free_xid(xid);
4166         return total_read;
4167 }
4168
4169 /*
4170  * If the page is mmap'ed into a process' page tables, then we need to make
4171  * sure that it doesn't change while being written back.
4172  */
4173 static vm_fault_t
4174 cifs_page_mkwrite(struct vm_fault *vmf)
4175 {
4176         struct page *page = vmf->page;
4177         struct file *file = vmf->vma->vm_file;
4178         struct inode *inode = file_inode(file);
4179
4180         cifs_fscache_wait_on_page_write(inode, page);
4181
4182         lock_page(page);
4183         return VM_FAULT_LOCKED;
4184 }
4185
4186 static const struct vm_operations_struct cifs_file_vm_ops = {
4187         .fault = filemap_fault,
4188         .map_pages = filemap_map_pages,
4189         .page_mkwrite = cifs_page_mkwrite,
4190 };
4191
4192 int cifs_file_strict_mmap(struct file *file, struct vm_area_struct *vma)
4193 {
4194         int xid, rc = 0;
4195         struct inode *inode = file_inode(file);
4196
4197         xid = get_xid();
4198
4199         if (!CIFS_CACHE_READ(CIFS_I(inode)))
4200                 rc = cifs_zap_mapping(inode);
4201         if (!rc)
4202                 rc = generic_file_mmap(file, vma);
4203         if (!rc)
4204                 vma->vm_ops = &cifs_file_vm_ops;
4205
4206         free_xid(xid);
4207         return rc;
4208 }
4209
4210 int cifs_file_mmap(struct file *file, struct vm_area_struct *vma)
4211 {
4212         int rc, xid;
4213
4214         xid = get_xid();
4215
4216         rc = cifs_revalidate_file(file);
4217         if (rc)
4218                 cifs_dbg(FYI, "Validation prior to mmap failed, error=%d\n",
4219                          rc);
4220         if (!rc)
4221                 rc = generic_file_mmap(file, vma);
4222         if (!rc)
4223                 vma->vm_ops = &cifs_file_vm_ops;
4224
4225         free_xid(xid);
4226         return rc;
4227 }
4228
4229 static void
4230 cifs_readv_complete(struct work_struct *work)
4231 {
4232         unsigned int i, got_bytes;
4233         struct cifs_readdata *rdata = container_of(work,
4234                                                 struct cifs_readdata, work);
4235
4236         got_bytes = rdata->got_bytes;
4237         for (i = 0; i < rdata->nr_pages; i++) {
4238                 struct page *page = rdata->pages[i];
4239
4240                 lru_cache_add(page);
4241
4242                 if (rdata->result == 0 ||
4243                     (rdata->result == -EAGAIN && got_bytes)) {
4244                         flush_dcache_page(page);
4245                         SetPageUptodate(page);
4246                 } else
4247                         SetPageError(page);
4248
4249                 unlock_page(page);
4250
4251                 if (rdata->result == 0 ||
4252                     (rdata->result == -EAGAIN && got_bytes))
4253                         cifs_readpage_to_fscache(rdata->mapping->host, page);
4254                 else
4255                         cifs_fscache_uncache_page(rdata->mapping->host, page);
4256
4257                 got_bytes -= min_t(unsigned int, PAGE_SIZE, got_bytes);
4258
4259                 put_page(page);
4260                 rdata->pages[i] = NULL;
4261         }
4262         kref_put(&rdata->refcount, cifs_readdata_release);
4263 }
4264
4265 static int
4266 readpages_fill_pages(struct TCP_Server_Info *server,
4267                      struct cifs_readdata *rdata, struct iov_iter *iter,
4268                      unsigned int len)
4269 {
4270         int result = 0;
4271         unsigned int i;
4272         u64 eof;
4273         pgoff_t eof_index;
4274         unsigned int nr_pages = rdata->nr_pages;
4275         unsigned int page_offset = rdata->page_offset;
4276
4277         /* determine the eof that the server (probably) has */
4278         eof = CIFS_I(rdata->mapping->host)->server_eof;
4279         eof_index = eof ? (eof - 1) >> PAGE_SHIFT : 0;
4280         cifs_dbg(FYI, "eof=%llu eof_index=%lu\n", eof, eof_index);
4281
4282         rdata->got_bytes = 0;
4283         rdata->tailsz = PAGE_SIZE;
4284         for (i = 0; i < nr_pages; i++) {
4285                 struct page *page = rdata->pages[i];
4286                 unsigned int to_read = rdata->pagesz;
4287                 size_t n;
4288
4289                 if (i == 0)
4290                         to_read -= page_offset;
4291                 else
4292                         page_offset = 0;
4293
4294                 n = to_read;
4295
4296                 if (len >= to_read) {
4297                         len -= to_read;
4298                 } else if (len > 0) {
4299                         /* enough for partial page, fill and zero the rest */
4300                         zero_user(page, len + page_offset, to_read - len);
4301                         n = rdata->tailsz = len;
4302                         len = 0;
4303                 } else if (page->index > eof_index) {
4304                         /*
4305                          * The VFS will not try to do readahead past the
4306                          * i_size, but it's possible that we have outstanding
4307                          * writes with gaps in the middle and the i_size hasn't
4308                          * caught up yet. Populate those with zeroed out pages
4309                          * to prevent the VFS from repeatedly attempting to
4310                          * fill them until the writes are flushed.
4311                          */
4312                         zero_user(page, 0, PAGE_SIZE);
4313                         lru_cache_add(page);
4314                         flush_dcache_page(page);
4315                         SetPageUptodate(page);
4316                         unlock_page(page);
4317                         put_page(page);
4318                         rdata->pages[i] = NULL;
4319                         rdata->nr_pages--;
4320                         continue;
4321                 } else {
4322                         /* no need to hold page hostage */
4323                         lru_cache_add(page);
4324                         unlock_page(page);
4325                         put_page(page);
4326                         rdata->pages[i] = NULL;
4327                         rdata->nr_pages--;
4328                         continue;
4329                 }
4330
4331                 if (iter)
4332                         result = copy_page_from_iter(
4333                                         page, page_offset, n, iter);
4334 #ifdef CONFIG_CIFS_SMB_DIRECT
4335                 else if (rdata->mr)
4336                         result = n;
4337 #endif
4338                 else
4339                         result = cifs_read_page_from_socket(
4340                                         server, page, page_offset, n);
4341                 if (result < 0)
4342                         break;
4343
4344                 rdata->got_bytes += result;
4345         }
4346
4347         return rdata->got_bytes > 0 && result != -ECONNABORTED ?
4348                                                 rdata->got_bytes : result;
4349 }
4350
4351 static int
4352 cifs_readpages_read_into_pages(struct TCP_Server_Info *server,
4353                                struct cifs_readdata *rdata, unsigned int len)
4354 {
4355         return readpages_fill_pages(server, rdata, NULL, len);
4356 }
4357
4358 static int
4359 cifs_readpages_copy_into_pages(struct TCP_Server_Info *server,
4360                                struct cifs_readdata *rdata,
4361                                struct iov_iter *iter)
4362 {
4363         return readpages_fill_pages(server, rdata, iter, iter->count);
4364 }
4365
4366 static int
4367 readpages_get_pages(struct address_space *mapping, struct list_head *page_list,
4368                     unsigned int rsize, struct list_head *tmplist,
4369                     unsigned int *nr_pages, loff_t *offset, unsigned int *bytes)
4370 {
4371         struct page *page, *tpage;
4372         unsigned int expected_index;
4373         int rc;
4374         gfp_t gfp = readahead_gfp_mask(mapping);
4375
4376         INIT_LIST_HEAD(tmplist);
4377
4378         page = lru_to_page(page_list);
4379
4380         /*
4381          * Lock the page and put it in the cache. Since no one else
4382          * should have access to this page, we're safe to simply set
4383          * PG_locked without checking it first.
4384          */
4385         __SetPageLocked(page);
4386         rc = add_to_page_cache_locked(page, mapping,
4387                                       page->index, gfp);
4388
4389         /* give up if we can't stick it in the cache */
4390         if (rc) {
4391                 __ClearPageLocked(page);
4392                 return rc;
4393         }
4394
4395         /* move first page to the tmplist */
4396         *offset = (loff_t)page->index << PAGE_SHIFT;
4397         *bytes = PAGE_SIZE;
4398         *nr_pages = 1;
4399         list_move_tail(&page->lru, tmplist);
4400
4401         /* now try and add more pages onto the request */
4402         expected_index = page->index + 1;
4403         list_for_each_entry_safe_reverse(page, tpage, page_list, lru) {
4404                 /* discontinuity ? */
4405                 if (page->index != expected_index)
4406                         break;
4407
4408                 /* would this page push the read over the rsize? */
4409                 if (*bytes + PAGE_SIZE > rsize)
4410                         break;
4411
4412                 __SetPageLocked(page);
4413                 rc = add_to_page_cache_locked(page, mapping, page->index, gfp);
4414                 if (rc) {
4415                         __ClearPageLocked(page);
4416                         break;
4417                 }
4418                 list_move_tail(&page->lru, tmplist);
4419                 (*bytes) += PAGE_SIZE;
4420                 expected_index++;
4421                 (*nr_pages)++;
4422         }
4423         return rc;
4424 }
4425
4426 static int cifs_readpages(struct file *file, struct address_space *mapping,
4427         struct list_head *page_list, unsigned num_pages)
4428 {
4429         int rc;
4430         int err = 0;
4431         struct list_head tmplist;
4432         struct cifsFileInfo *open_file = file->private_data;
4433         struct cifs_sb_info *cifs_sb = CIFS_FILE_SB(file);
4434         struct TCP_Server_Info *server;
4435         pid_t pid;
4436         unsigned int xid;
4437
4438         xid = get_xid();
4439         /*
4440          * Reads as many pages as possible from fscache. Returns -ENOBUFS
4441          * immediately if the cookie is negative
4442          *
4443          * After this point, every page in the list might have PG_fscache set,
4444          * so we will need to clean that up off of every page we don't use.
4445          */
4446         rc = cifs_readpages_from_fscache(mapping->host, mapping, page_list,
4447                                          &num_pages);
4448         if (rc == 0) {
4449                 free_xid(xid);
4450                 return rc;
4451         }
4452
4453         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
4454                 pid = open_file->pid;
4455         else
4456                 pid = current->tgid;
4457
4458         rc = 0;
4459         server = cifs_pick_channel(tlink_tcon(open_file->tlink)->ses);
4460
4461         cifs_dbg(FYI, "%s: file=%p mapping=%p num_pages=%u\n",
4462                  __func__, file, mapping, num_pages);
4463
4464         /*
4465          * Start with the page at end of list and move it to private
4466          * list. Do the same with any following pages until we hit
4467          * the rsize limit, hit an index discontinuity, or run out of
4468          * pages. Issue the async read and then start the loop again
4469          * until the list is empty.
4470          *
4471          * Note that list order is important. The page_list is in
4472          * the order of declining indexes. When we put the pages in
4473          * the rdata->pages, then we want them in increasing order.
4474          */
4475         while (!list_empty(page_list) && !err) {
4476                 unsigned int i, nr_pages, bytes, rsize;
4477                 loff_t offset;
4478                 struct page *page, *tpage;
4479                 struct cifs_readdata *rdata;
4480                 struct cifs_credits credits_on_stack;
4481                 struct cifs_credits *credits = &credits_on_stack;
4482
4483                 if (open_file->invalidHandle) {
4484                         rc = cifs_reopen_file(open_file, true);
4485                         if (rc == -EAGAIN)
4486                                 continue;
4487                         else if (rc)
4488                                 break;
4489                 }
4490
4491                 rc = server->ops->wait_mtu_credits(server, cifs_sb->ctx->rsize,
4492                                                    &rsize, credits);
4493                 if (rc)
4494                         break;
4495
4496                 /*
4497                  * Give up immediately if rsize is too small to read an entire
4498                  * page. The VFS will fall back to readpage. We should never
4499                  * reach this point however since we set ra_pages to 0 when the
4500                  * rsize is smaller than a cache page.
4501                  */
4502                 if (unlikely(rsize < PAGE_SIZE)) {
4503                         add_credits_and_wake_if(server, credits, 0);
4504                         free_xid(xid);
4505                         return 0;
4506                 }
4507
4508                 nr_pages = 0;
4509                 err = readpages_get_pages(mapping, page_list, rsize, &tmplist,
4510                                          &nr_pages, &offset, &bytes);
4511                 if (!nr_pages) {
4512                         add_credits_and_wake_if(server, credits, 0);
4513                         break;
4514                 }
4515
4516                 rdata = cifs_readdata_alloc(nr_pages, cifs_readv_complete);
4517                 if (!rdata) {
4518                         /* best to give up if we're out of mem */
4519                         list_for_each_entry_safe(page, tpage, &tmplist, lru) {
4520                                 list_del(&page->lru);
4521                                 lru_cache_add(page);
4522                                 unlock_page(page);
4523                                 put_page(page);
4524                         }
4525                         rc = -ENOMEM;
4526                         add_credits_and_wake_if(server, credits, 0);
4527                         break;
4528                 }
4529
4530                 rdata->cfile = cifsFileInfo_get(open_file);
4531                 rdata->server = server;
4532                 rdata->mapping = mapping;
4533                 rdata->offset = offset;
4534                 rdata->bytes = bytes;
4535                 rdata->pid = pid;
4536                 rdata->pagesz = PAGE_SIZE;
4537                 rdata->tailsz = PAGE_SIZE;
4538                 rdata->read_into_pages = cifs_readpages_read_into_pages;
4539                 rdata->copy_into_pages = cifs_readpages_copy_into_pages;
4540                 rdata->credits = credits_on_stack;
4541
4542                 list_for_each_entry_safe(page, tpage, &tmplist, lru) {
4543                         list_del(&page->lru);
4544                         rdata->pages[rdata->nr_pages++] = page;
4545                 }
4546
4547                 rc = adjust_credits(server, &rdata->credits, rdata->bytes);
4548
4549                 if (!rc) {
4550                         if (rdata->cfile->invalidHandle)
4551                                 rc = -EAGAIN;
4552                         else
4553                                 rc = server->ops->async_readv(rdata);
4554                 }
4555
4556                 if (rc) {
4557                         add_credits_and_wake_if(server, &rdata->credits, 0);
4558                         for (i = 0; i < rdata->nr_pages; i++) {
4559                                 page = rdata->pages[i];
4560                                 lru_cache_add(page);
4561                                 unlock_page(page);
4562                                 put_page(page);
4563                         }
4564                         /* Fallback to the readpage in error/reconnect cases */
4565                         kref_put(&rdata->refcount, cifs_readdata_release);
4566                         break;
4567                 }
4568
4569                 kref_put(&rdata->refcount, cifs_readdata_release);
4570         }
4571
4572         /* Any pages that have been shown to fscache but didn't get added to
4573          * the pagecache must be uncached before they get returned to the
4574          * allocator.
4575          */
4576         cifs_fscache_readpages_cancel(mapping->host, page_list);
4577         free_xid(xid);
4578         return rc;
4579 }
4580
4581 /*
4582  * cifs_readpage_worker must be called with the page pinned
4583  */
4584 static int cifs_readpage_worker(struct file *file, struct page *page,
4585         loff_t *poffset)
4586 {
4587         char *read_data;
4588         int rc;
4589
4590         /* Is the page cached? */
4591         rc = cifs_readpage_from_fscache(file_inode(file), page);
4592         if (rc == 0)
4593                 goto read_complete;
4594
4595         read_data = kmap(page);
4596         /* for reads over a certain size could initiate async read ahead */
4597
4598         rc = cifs_read(file, read_data, PAGE_SIZE, poffset);
4599
4600         if (rc < 0)
4601                 goto io_error;
4602         else
4603                 cifs_dbg(FYI, "Bytes read %d\n", rc);
4604
4605         /* we do not want atime to be less than mtime, it broke some apps */
4606         file_inode(file)->i_atime = current_time(file_inode(file));
4607         if (timespec64_compare(&(file_inode(file)->i_atime), &(file_inode(file)->i_mtime)))
4608                 file_inode(file)->i_atime = file_inode(file)->i_mtime;
4609         else
4610                 file_inode(file)->i_atime = current_time(file_inode(file));
4611
4612         if (PAGE_SIZE > rc)
4613                 memset(read_data + rc, 0, PAGE_SIZE - rc);
4614
4615         flush_dcache_page(page);
4616         SetPageUptodate(page);
4617
4618         /* send this page to the cache */
4619         cifs_readpage_to_fscache(file_inode(file), page);
4620
4621         rc = 0;
4622
4623 io_error:
4624         kunmap(page);
4625         unlock_page(page);
4626
4627 read_complete:
4628         return rc;
4629 }
4630
4631 static int cifs_readpage(struct file *file, struct page *page)
4632 {
4633         loff_t offset = page_file_offset(page);
4634         int rc = -EACCES;
4635         unsigned int xid;
4636
4637         xid = get_xid();
4638
4639         if (file->private_data == NULL) {
4640                 rc = -EBADF;
4641                 free_xid(xid);
4642                 return rc;
4643         }
4644
4645         cifs_dbg(FYI, "readpage %p at offset %d 0x%x\n",
4646                  page, (int)offset, (int)offset);
4647
4648         rc = cifs_readpage_worker(file, page, &offset);
4649
4650         free_xid(xid);
4651         return rc;
4652 }
4653
4654 static int is_inode_writable(struct cifsInodeInfo *cifs_inode)
4655 {
4656         struct cifsFileInfo *open_file;
4657
4658         spin_lock(&cifs_inode->open_file_lock);
4659         list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
4660                 if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) {
4661                         spin_unlock(&cifs_inode->open_file_lock);
4662                         return 1;
4663                 }
4664         }
4665         spin_unlock(&cifs_inode->open_file_lock);
4666         return 0;
4667 }
4668
4669 /* We do not want to update the file size from server for inodes
4670    open for write - to avoid races with writepage extending
4671    the file - in the future we could consider allowing
4672    refreshing the inode only on increases in the file size
4673    but this is tricky to do without racing with writebehind
4674    page caching in the current Linux kernel design */
4675 bool is_size_safe_to_change(struct cifsInodeInfo *cifsInode, __u64 end_of_file)
4676 {
4677         if (!cifsInode)
4678                 return true;
4679
4680         if (is_inode_writable(cifsInode)) {
4681                 /* This inode is open for write at least once */
4682                 struct cifs_sb_info *cifs_sb;
4683
4684                 cifs_sb = CIFS_SB(cifsInode->vfs_inode.i_sb);
4685                 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DIRECT_IO) {
4686                         /* since no page cache to corrupt on directio
4687                         we can change size safely */
4688                         return true;
4689                 }
4690
4691                 if (i_size_read(&cifsInode->vfs_inode) < end_of_file)
4692                         return true;
4693
4694                 return false;
4695         } else
4696                 return true;
4697 }
4698
4699 static int cifs_write_begin(struct file *file, struct address_space *mapping,
4700                         loff_t pos, unsigned len, unsigned flags,
4701                         struct page **pagep, void **fsdata)
4702 {
4703         int oncethru = 0;
4704         pgoff_t index = pos >> PAGE_SHIFT;
4705         loff_t offset = pos & (PAGE_SIZE - 1);
4706         loff_t page_start = pos & PAGE_MASK;
4707         loff_t i_size;
4708         struct page *page;
4709         int rc = 0;
4710
4711         cifs_dbg(FYI, "write_begin from %lld len %d\n", (long long)pos, len);
4712
4713 start:
4714         page = grab_cache_page_write_begin(mapping, index, flags);
4715         if (!page) {
4716                 rc = -ENOMEM;
4717                 goto out;
4718         }
4719
4720         if (PageUptodate(page))
4721                 goto out;
4722
4723         /*
4724          * If we write a full page it will be up to date, no need to read from
4725          * the server. If the write is short, we'll end up doing a sync write
4726          * instead.
4727          */
4728         if (len == PAGE_SIZE)
4729                 goto out;
4730
4731         /*
4732          * optimize away the read when we have an oplock, and we're not
4733          * expecting to use any of the data we'd be reading in. That
4734          * is, when the page lies beyond the EOF, or straddles the EOF
4735          * and the write will cover all of the existing data.
4736          */
4737         if (CIFS_CACHE_READ(CIFS_I(mapping->host))) {
4738                 i_size = i_size_read(mapping->host);
4739                 if (page_start >= i_size ||
4740                     (offset == 0 && (pos + len) >= i_size)) {
4741                         zero_user_segments(page, 0, offset,
4742                                            offset + len,
4743                                            PAGE_SIZE);
4744                         /*
4745                          * PageChecked means that the parts of the page
4746                          * to which we're not writing are considered up
4747                          * to date. Once the data is copied to the
4748                          * page, it can be set uptodate.
4749                          */
4750                         SetPageChecked(page);
4751                         goto out;
4752                 }
4753         }
4754
4755         if ((file->f_flags & O_ACCMODE) != O_WRONLY && !oncethru) {
4756                 /*
4757                  * might as well read a page, it is fast enough. If we get
4758                  * an error, we don't need to return it. cifs_write_end will
4759                  * do a sync write instead since PG_uptodate isn't set.
4760                  */
4761                 cifs_readpage_worker(file, page, &page_start);
4762                 put_page(page);
4763                 oncethru = 1;
4764                 goto start;
4765         } else {
4766                 /* we could try using another file handle if there is one -
4767                    but how would we lock it to prevent close of that handle
4768                    racing with this read? In any case
4769                    this will be written out by write_end so is fine */
4770         }
4771 out:
4772         *pagep = page;
4773         return rc;
4774 }
4775
4776 static int cifs_release_page(struct page *page, gfp_t gfp)
4777 {
4778         if (PagePrivate(page))
4779                 return 0;
4780
4781         return cifs_fscache_release_page(page, gfp);
4782 }
4783
4784 static void cifs_invalidate_page(struct page *page, unsigned int offset,
4785                                  unsigned int length)
4786 {
4787         struct cifsInodeInfo *cifsi = CIFS_I(page->mapping->host);
4788
4789         if (offset == 0 && length == PAGE_SIZE)
4790                 cifs_fscache_invalidate_page(page, &cifsi->vfs_inode);
4791 }
4792
4793 static int cifs_launder_page(struct page *page)
4794 {
4795         int rc = 0;
4796         loff_t range_start = page_offset(page);
4797         loff_t range_end = range_start + (loff_t)(PAGE_SIZE - 1);
4798         struct writeback_control wbc = {
4799                 .sync_mode = WB_SYNC_ALL,
4800                 .nr_to_write = 0,
4801                 .range_start = range_start,
4802                 .range_end = range_end,
4803         };
4804
4805         cifs_dbg(FYI, "Launder page: %p\n", page);
4806
4807         if (clear_page_dirty_for_io(page))
4808                 rc = cifs_writepage_locked(page, &wbc);
4809
4810         cifs_fscache_invalidate_page(page, page->mapping->host);
4811         return rc;
4812 }
4813
4814 void cifs_oplock_break(struct work_struct *work)
4815 {
4816         struct cifsFileInfo *cfile = container_of(work, struct cifsFileInfo,
4817                                                   oplock_break);
4818         struct inode *inode = d_inode(cfile->dentry);
4819         struct cifsInodeInfo *cinode = CIFS_I(inode);
4820         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
4821         struct TCP_Server_Info *server = tcon->ses->server;
4822         int rc = 0;
4823         bool purge_cache = false;
4824         bool is_deferred = false;
4825         struct cifs_deferred_close *dclose;
4826
4827         wait_on_bit(&cinode->flags, CIFS_INODE_PENDING_WRITERS,
4828                         TASK_UNINTERRUPTIBLE);
4829
4830         server->ops->downgrade_oplock(server, cinode, cfile->oplock_level,
4831                                       cfile->oplock_epoch, &purge_cache);
4832
4833         if (!CIFS_CACHE_WRITE(cinode) && CIFS_CACHE_READ(cinode) &&
4834                                                 cifs_has_mand_locks(cinode)) {
4835                 cifs_dbg(FYI, "Reset oplock to None for inode=%p due to mand locks\n",
4836                          inode);
4837                 cinode->oplock = 0;
4838         }
4839
4840         if (inode && S_ISREG(inode->i_mode)) {
4841                 if (CIFS_CACHE_READ(cinode))
4842                         break_lease(inode, O_RDONLY);
4843                 else
4844                         break_lease(inode, O_WRONLY);
4845                 rc = filemap_fdatawrite(inode->i_mapping);
4846                 if (!CIFS_CACHE_READ(cinode) || purge_cache) {
4847                         rc = filemap_fdatawait(inode->i_mapping);
4848                         mapping_set_error(inode->i_mapping, rc);
4849                         cifs_zap_mapping(inode);
4850                 }
4851                 cifs_dbg(FYI, "Oplock flush inode %p rc %d\n", inode, rc);
4852                 if (CIFS_CACHE_WRITE(cinode))
4853                         goto oplock_break_ack;
4854         }
4855
4856         rc = cifs_push_locks(cfile);
4857         if (rc)
4858                 cifs_dbg(VFS, "Push locks rc = %d\n", rc);
4859
4860 oplock_break_ack:
4861         /*
4862          * When oplock break is received and there are no active
4863          * file handles but cached, then schedule deferred close immediately.
4864          * So, new open will not use cached handle.
4865          */
4866         spin_lock(&CIFS_I(inode)->deferred_lock);
4867         is_deferred = cifs_is_deferred_close(cfile, &dclose);
4868         spin_unlock(&CIFS_I(inode)->deferred_lock);
4869         if (is_deferred &&
4870             cfile->deferred_close_scheduled &&
4871             delayed_work_pending(&cfile->deferred)) {
4872                 if (cancel_delayed_work(&cfile->deferred)) {
4873                         _cifsFileInfo_put(cfile, false, false);
4874                         goto oplock_break_done;
4875                 }
4876         }
4877         /*
4878          * releasing stale oplock after recent reconnect of smb session using
4879          * a now incorrect file handle is not a data integrity issue but do
4880          * not bother sending an oplock release if session to server still is
4881          * disconnected since oplock already released by the server
4882          */
4883         if (!cfile->oplock_break_cancelled) {
4884                 rc = tcon->ses->server->ops->oplock_response(tcon, &cfile->fid,
4885                                                              cinode);
4886                 cifs_dbg(FYI, "Oplock release rc = %d\n", rc);
4887         }
4888 oplock_break_done:
4889         _cifsFileInfo_put(cfile, false /* do not wait for ourself */, false);
4890         cifs_done_oplock_break(cinode);
4891 }
4892
4893 /*
4894  * The presence of cifs_direct_io() in the address space ops vector
4895  * allowes open() O_DIRECT flags which would have failed otherwise.
4896  *
4897  * In the non-cached mode (mount with cache=none), we shunt off direct read and write requests
4898  * so this method should never be called.
4899  *
4900  * Direct IO is not yet supported in the cached mode. 
4901  */
4902 static ssize_t
4903 cifs_direct_io(struct kiocb *iocb, struct iov_iter *iter)
4904 {
4905         /*
4906          * FIXME
4907          * Eventually need to support direct IO for non forcedirectio mounts
4908          */
4909         return -EINVAL;
4910 }
4911
4912 static int cifs_swap_activate(struct swap_info_struct *sis,
4913                               struct file *swap_file, sector_t *span)
4914 {
4915         struct cifsFileInfo *cfile = swap_file->private_data;
4916         struct inode *inode = swap_file->f_mapping->host;
4917         unsigned long blocks;
4918         long long isize;
4919
4920         cifs_dbg(FYI, "swap activate\n");
4921
4922         spin_lock(&inode->i_lock);
4923         blocks = inode->i_blocks;
4924         isize = inode->i_size;
4925         spin_unlock(&inode->i_lock);
4926         if (blocks*512 < isize) {
4927                 pr_warn("swap activate: swapfile has holes\n");
4928                 return -EINVAL;
4929         }
4930         *span = sis->pages;
4931
4932         pr_warn_once("Swap support over SMB3 is experimental\n");
4933
4934         /*
4935          * TODO: consider adding ACL (or documenting how) to prevent other
4936          * users (on this or other systems) from reading it
4937          */
4938
4939
4940         /* TODO: add sk_set_memalloc(inet) or similar */
4941
4942         if (cfile)
4943                 cfile->swapfile = true;
4944         /*
4945          * TODO: Since file already open, we can't open with DENY_ALL here
4946          * but we could add call to grab a byte range lock to prevent others
4947          * from reading or writing the file
4948          */
4949
4950         return 0;
4951 }
4952
4953 static void cifs_swap_deactivate(struct file *file)
4954 {
4955         struct cifsFileInfo *cfile = file->private_data;
4956
4957         cifs_dbg(FYI, "swap deactivate\n");
4958
4959         /* TODO: undo sk_set_memalloc(inet) will eventually be needed */
4960
4961         if (cfile)
4962                 cfile->swapfile = false;
4963
4964         /* do we need to unpin (or unlock) the file */
4965 }
4966
4967 const struct address_space_operations cifs_addr_ops = {
4968         .readpage = cifs_readpage,
4969         .readpages = cifs_readpages,
4970         .writepage = cifs_writepage,
4971         .writepages = cifs_writepages,
4972         .write_begin = cifs_write_begin,
4973         .write_end = cifs_write_end,
4974         .set_page_dirty = __set_page_dirty_nobuffers,
4975         .releasepage = cifs_release_page,
4976         .direct_IO = cifs_direct_io,
4977         .invalidatepage = cifs_invalidate_page,
4978         .launder_page = cifs_launder_page,
4979         /*
4980          * TODO: investigate and if useful we could add an cifs_migratePage
4981          * helper (under an CONFIG_MIGRATION) in the future, and also
4982          * investigate and add an is_dirty_writeback helper if needed
4983          */
4984         .swap_activate = cifs_swap_activate,
4985         .swap_deactivate = cifs_swap_deactivate,
4986 };
4987
4988 /*
4989  * cifs_readpages requires the server to support a buffer large enough to
4990  * contain the header plus one complete page of data.  Otherwise, we need
4991  * to leave cifs_readpages out of the address space operations.
4992  */
4993 const struct address_space_operations cifs_addr_ops_smallbuf = {
4994         .readpage = cifs_readpage,
4995         .writepage = cifs_writepage,
4996         .writepages = cifs_writepages,
4997         .write_begin = cifs_write_begin,
4998         .write_end = cifs_write_end,
4999         .set_page_dirty = __set_page_dirty_nobuffers,
5000         .releasepage = cifs_release_page,
5001         .invalidatepage = cifs_invalidate_page,
5002         .launder_page = cifs_launder_page,
5003 };