Merge tag 'at91-fixes-6.0' of git://git.kernel.org/pub/scm/linux/kernel/git/at91...
[platform/kernel/linux-starfive.git] / fs / cifs / file.c
1 // SPDX-License-Identifier: LGPL-2.1
2 /*
3  *
4  *   vfs operations that deal with files
5  *
6  *   Copyright (C) International Business Machines  Corp., 2002,2010
7  *   Author(s): Steve French (sfrench@us.ibm.com)
8  *              Jeremy Allison (jra@samba.org)
9  *
10  */
11 #include <linux/fs.h>
12 #include <linux/backing-dev.h>
13 #include <linux/stat.h>
14 #include <linux/fcntl.h>
15 #include <linux/pagemap.h>
16 #include <linux/pagevec.h>
17 #include <linux/writeback.h>
18 #include <linux/task_io_accounting_ops.h>
19 #include <linux/delay.h>
20 #include <linux/mount.h>
21 #include <linux/slab.h>
22 #include <linux/swap.h>
23 #include <linux/mm.h>
24 #include <asm/div64.h>
25 #include "cifsfs.h"
26 #include "cifspdu.h"
27 #include "cifsglob.h"
28 #include "cifsproto.h"
29 #include "smb2proto.h"
30 #include "cifs_unicode.h"
31 #include "cifs_debug.h"
32 #include "cifs_fs_sb.h"
33 #include "fscache.h"
34 #include "smbdirect.h"
35 #include "fs_context.h"
36 #include "cifs_ioctl.h"
37 #include "cached_dir.h"
38
39 /*
40  * Mark as invalid, all open files on tree connections since they
41  * were closed when session to server was lost.
42  */
43 void
44 cifs_mark_open_files_invalid(struct cifs_tcon *tcon)
45 {
46         struct cifsFileInfo *open_file = NULL;
47         struct list_head *tmp;
48         struct list_head *tmp1;
49
50         /* only send once per connect */
51         spin_lock(&tcon->ses->ses_lock);
52         if ((tcon->ses->ses_status != SES_GOOD) || (tcon->status != TID_NEED_RECON)) {
53                 spin_unlock(&tcon->ses->ses_lock);
54                 return;
55         }
56         tcon->status = TID_IN_FILES_INVALIDATE;
57         spin_unlock(&tcon->ses->ses_lock);
58
59         /* list all files open on tree connection and mark them invalid */
60         spin_lock(&tcon->open_file_lock);
61         list_for_each_safe(tmp, tmp1, &tcon->openFileList) {
62                 open_file = list_entry(tmp, struct cifsFileInfo, tlist);
63                 open_file->invalidHandle = true;
64                 open_file->oplock_break_cancelled = true;
65         }
66         spin_unlock(&tcon->open_file_lock);
67
68         invalidate_all_cached_dirs(tcon);
69         spin_lock(&tcon->tc_lock);
70         if (tcon->status == TID_IN_FILES_INVALIDATE)
71                 tcon->status = TID_NEED_TCON;
72         spin_unlock(&tcon->tc_lock);
73
74         /*
75          * BB Add call to invalidate_inodes(sb) for all superblocks mounted
76          * to this tcon.
77          */
78 }
79
80 static inline int cifs_convert_flags(unsigned int flags)
81 {
82         if ((flags & O_ACCMODE) == O_RDONLY)
83                 return GENERIC_READ;
84         else if ((flags & O_ACCMODE) == O_WRONLY)
85                 return GENERIC_WRITE;
86         else if ((flags & O_ACCMODE) == O_RDWR) {
87                 /* GENERIC_ALL is too much permission to request
88                    can cause unnecessary access denied on create */
89                 /* return GENERIC_ALL; */
90                 return (GENERIC_READ | GENERIC_WRITE);
91         }
92
93         return (READ_CONTROL | FILE_WRITE_ATTRIBUTES | FILE_READ_ATTRIBUTES |
94                 FILE_WRITE_EA | FILE_APPEND_DATA | FILE_WRITE_DATA |
95                 FILE_READ_DATA);
96 }
97
98 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
99 static u32 cifs_posix_convert_flags(unsigned int flags)
100 {
101         u32 posix_flags = 0;
102
103         if ((flags & O_ACCMODE) == O_RDONLY)
104                 posix_flags = SMB_O_RDONLY;
105         else if ((flags & O_ACCMODE) == O_WRONLY)
106                 posix_flags = SMB_O_WRONLY;
107         else if ((flags & O_ACCMODE) == O_RDWR)
108                 posix_flags = SMB_O_RDWR;
109
110         if (flags & O_CREAT) {
111                 posix_flags |= SMB_O_CREAT;
112                 if (flags & O_EXCL)
113                         posix_flags |= SMB_O_EXCL;
114         } else if (flags & O_EXCL)
115                 cifs_dbg(FYI, "Application %s pid %d has incorrectly set O_EXCL flag but not O_CREAT on file open. Ignoring O_EXCL\n",
116                          current->comm, current->tgid);
117
118         if (flags & O_TRUNC)
119                 posix_flags |= SMB_O_TRUNC;
120         /* be safe and imply O_SYNC for O_DSYNC */
121         if (flags & O_DSYNC)
122                 posix_flags |= SMB_O_SYNC;
123         if (flags & O_DIRECTORY)
124                 posix_flags |= SMB_O_DIRECTORY;
125         if (flags & O_NOFOLLOW)
126                 posix_flags |= SMB_O_NOFOLLOW;
127         if (flags & O_DIRECT)
128                 posix_flags |= SMB_O_DIRECT;
129
130         return posix_flags;
131 }
132 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
133
134 static inline int cifs_get_disposition(unsigned int flags)
135 {
136         if ((flags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL))
137                 return FILE_CREATE;
138         else if ((flags & (O_CREAT | O_TRUNC)) == (O_CREAT | O_TRUNC))
139                 return FILE_OVERWRITE_IF;
140         else if ((flags & O_CREAT) == O_CREAT)
141                 return FILE_OPEN_IF;
142         else if ((flags & O_TRUNC) == O_TRUNC)
143                 return FILE_OVERWRITE;
144         else
145                 return FILE_OPEN;
146 }
147
148 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
149 int cifs_posix_open(const char *full_path, struct inode **pinode,
150                         struct super_block *sb, int mode, unsigned int f_flags,
151                         __u32 *poplock, __u16 *pnetfid, unsigned int xid)
152 {
153         int rc;
154         FILE_UNIX_BASIC_INFO *presp_data;
155         __u32 posix_flags = 0;
156         struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
157         struct cifs_fattr fattr;
158         struct tcon_link *tlink;
159         struct cifs_tcon *tcon;
160
161         cifs_dbg(FYI, "posix open %s\n", full_path);
162
163         presp_data = kzalloc(sizeof(FILE_UNIX_BASIC_INFO), GFP_KERNEL);
164         if (presp_data == NULL)
165                 return -ENOMEM;
166
167         tlink = cifs_sb_tlink(cifs_sb);
168         if (IS_ERR(tlink)) {
169                 rc = PTR_ERR(tlink);
170                 goto posix_open_ret;
171         }
172
173         tcon = tlink_tcon(tlink);
174         mode &= ~current_umask();
175
176         posix_flags = cifs_posix_convert_flags(f_flags);
177         rc = CIFSPOSIXCreate(xid, tcon, posix_flags, mode, pnetfid, presp_data,
178                              poplock, full_path, cifs_sb->local_nls,
179                              cifs_remap(cifs_sb));
180         cifs_put_tlink(tlink);
181
182         if (rc)
183                 goto posix_open_ret;
184
185         if (presp_data->Type == cpu_to_le32(-1))
186                 goto posix_open_ret; /* open ok, caller does qpathinfo */
187
188         if (!pinode)
189                 goto posix_open_ret; /* caller does not need info */
190
191         cifs_unix_basic_to_fattr(&fattr, presp_data, cifs_sb);
192
193         /* get new inode and set it up */
194         if (*pinode == NULL) {
195                 cifs_fill_uniqueid(sb, &fattr);
196                 *pinode = cifs_iget(sb, &fattr);
197                 if (!*pinode) {
198                         rc = -ENOMEM;
199                         goto posix_open_ret;
200                 }
201         } else {
202                 cifs_revalidate_mapping(*pinode);
203                 rc = cifs_fattr_to_inode(*pinode, &fattr);
204         }
205
206 posix_open_ret:
207         kfree(presp_data);
208         return rc;
209 }
210 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
211
212 static int
213 cifs_nt_open(const char *full_path, struct inode *inode, struct cifs_sb_info *cifs_sb,
214              struct cifs_tcon *tcon, unsigned int f_flags, __u32 *oplock,
215              struct cifs_fid *fid, unsigned int xid)
216 {
217         int rc;
218         int desired_access;
219         int disposition;
220         int create_options = CREATE_NOT_DIR;
221         FILE_ALL_INFO *buf;
222         struct TCP_Server_Info *server = tcon->ses->server;
223         struct cifs_open_parms oparms;
224
225         if (!server->ops->open)
226                 return -ENOSYS;
227
228         desired_access = cifs_convert_flags(f_flags);
229
230 /*********************************************************************
231  *  open flag mapping table:
232  *
233  *      POSIX Flag            CIFS Disposition
234  *      ----------            ----------------
235  *      O_CREAT               FILE_OPEN_IF
236  *      O_CREAT | O_EXCL      FILE_CREATE
237  *      O_CREAT | O_TRUNC     FILE_OVERWRITE_IF
238  *      O_TRUNC               FILE_OVERWRITE
239  *      none of the above     FILE_OPEN
240  *
241  *      Note that there is not a direct match between disposition
242  *      FILE_SUPERSEDE (ie create whether or not file exists although
243  *      O_CREAT | O_TRUNC is similar but truncates the existing
244  *      file rather than creating a new file as FILE_SUPERSEDE does
245  *      (which uses the attributes / metadata passed in on open call)
246  *?
247  *?  O_SYNC is a reasonable match to CIFS writethrough flag
248  *?  and the read write flags match reasonably.  O_LARGEFILE
249  *?  is irrelevant because largefile support is always used
250  *?  by this client. Flags O_APPEND, O_DIRECT, O_DIRECTORY,
251  *       O_FASYNC, O_NOFOLLOW, O_NONBLOCK need further investigation
252  *********************************************************************/
253
254         disposition = cifs_get_disposition(f_flags);
255
256         /* BB pass O_SYNC flag through on file attributes .. BB */
257
258         buf = kmalloc(sizeof(FILE_ALL_INFO), GFP_KERNEL);
259         if (!buf)
260                 return -ENOMEM;
261
262         /* O_SYNC also has bit for O_DSYNC so following check picks up either */
263         if (f_flags & O_SYNC)
264                 create_options |= CREATE_WRITE_THROUGH;
265
266         if (f_flags & O_DIRECT)
267                 create_options |= CREATE_NO_BUFFER;
268
269         oparms.tcon = tcon;
270         oparms.cifs_sb = cifs_sb;
271         oparms.desired_access = desired_access;
272         oparms.create_options = cifs_create_options(cifs_sb, create_options);
273         oparms.disposition = disposition;
274         oparms.path = full_path;
275         oparms.fid = fid;
276         oparms.reconnect = false;
277
278         rc = server->ops->open(xid, &oparms, oplock, buf);
279
280         if (rc)
281                 goto out;
282
283         /* TODO: Add support for calling posix query info but with passing in fid */
284         if (tcon->unix_ext)
285                 rc = cifs_get_inode_info_unix(&inode, full_path, inode->i_sb,
286                                               xid);
287         else
288                 rc = cifs_get_inode_info(&inode, full_path, buf, inode->i_sb,
289                                          xid, fid);
290
291         if (rc) {
292                 server->ops->close(xid, tcon, fid);
293                 if (rc == -ESTALE)
294                         rc = -EOPENSTALE;
295         }
296
297 out:
298         kfree(buf);
299         return rc;
300 }
301
302 static bool
303 cifs_has_mand_locks(struct cifsInodeInfo *cinode)
304 {
305         struct cifs_fid_locks *cur;
306         bool has_locks = false;
307
308         down_read(&cinode->lock_sem);
309         list_for_each_entry(cur, &cinode->llist, llist) {
310                 if (!list_empty(&cur->locks)) {
311                         has_locks = true;
312                         break;
313                 }
314         }
315         up_read(&cinode->lock_sem);
316         return has_locks;
317 }
318
319 void
320 cifs_down_write(struct rw_semaphore *sem)
321 {
322         while (!down_write_trylock(sem))
323                 msleep(10);
324 }
325
326 static void cifsFileInfo_put_work(struct work_struct *work);
327
328 struct cifsFileInfo *
329 cifs_new_fileinfo(struct cifs_fid *fid, struct file *file,
330                   struct tcon_link *tlink, __u32 oplock)
331 {
332         struct dentry *dentry = file_dentry(file);
333         struct inode *inode = d_inode(dentry);
334         struct cifsInodeInfo *cinode = CIFS_I(inode);
335         struct cifsFileInfo *cfile;
336         struct cifs_fid_locks *fdlocks;
337         struct cifs_tcon *tcon = tlink_tcon(tlink);
338         struct TCP_Server_Info *server = tcon->ses->server;
339
340         cfile = kzalloc(sizeof(struct cifsFileInfo), GFP_KERNEL);
341         if (cfile == NULL)
342                 return cfile;
343
344         fdlocks = kzalloc(sizeof(struct cifs_fid_locks), GFP_KERNEL);
345         if (!fdlocks) {
346                 kfree(cfile);
347                 return NULL;
348         }
349
350         INIT_LIST_HEAD(&fdlocks->locks);
351         fdlocks->cfile = cfile;
352         cfile->llist = fdlocks;
353
354         cfile->count = 1;
355         cfile->pid = current->tgid;
356         cfile->uid = current_fsuid();
357         cfile->dentry = dget(dentry);
358         cfile->f_flags = file->f_flags;
359         cfile->invalidHandle = false;
360         cfile->deferred_close_scheduled = false;
361         cfile->tlink = cifs_get_tlink(tlink);
362         INIT_WORK(&cfile->oplock_break, cifs_oplock_break);
363         INIT_WORK(&cfile->put, cifsFileInfo_put_work);
364         INIT_DELAYED_WORK(&cfile->deferred, smb2_deferred_work_close);
365         mutex_init(&cfile->fh_mutex);
366         spin_lock_init(&cfile->file_info_lock);
367
368         cifs_sb_active(inode->i_sb);
369
370         /*
371          * If the server returned a read oplock and we have mandatory brlocks,
372          * set oplock level to None.
373          */
374         if (server->ops->is_read_op(oplock) && cifs_has_mand_locks(cinode)) {
375                 cifs_dbg(FYI, "Reset oplock val from read to None due to mand locks\n");
376                 oplock = 0;
377         }
378
379         cifs_down_write(&cinode->lock_sem);
380         list_add(&fdlocks->llist, &cinode->llist);
381         up_write(&cinode->lock_sem);
382
383         spin_lock(&tcon->open_file_lock);
384         if (fid->pending_open->oplock != CIFS_OPLOCK_NO_CHANGE && oplock)
385                 oplock = fid->pending_open->oplock;
386         list_del(&fid->pending_open->olist);
387
388         fid->purge_cache = false;
389         server->ops->set_fid(cfile, fid, oplock);
390
391         list_add(&cfile->tlist, &tcon->openFileList);
392         atomic_inc(&tcon->num_local_opens);
393
394         /* if readable file instance put first in list*/
395         spin_lock(&cinode->open_file_lock);
396         if (file->f_mode & FMODE_READ)
397                 list_add(&cfile->flist, &cinode->openFileList);
398         else
399                 list_add_tail(&cfile->flist, &cinode->openFileList);
400         spin_unlock(&cinode->open_file_lock);
401         spin_unlock(&tcon->open_file_lock);
402
403         if (fid->purge_cache)
404                 cifs_zap_mapping(inode);
405
406         file->private_data = cfile;
407         return cfile;
408 }
409
410 struct cifsFileInfo *
411 cifsFileInfo_get(struct cifsFileInfo *cifs_file)
412 {
413         spin_lock(&cifs_file->file_info_lock);
414         cifsFileInfo_get_locked(cifs_file);
415         spin_unlock(&cifs_file->file_info_lock);
416         return cifs_file;
417 }
418
419 static void cifsFileInfo_put_final(struct cifsFileInfo *cifs_file)
420 {
421         struct inode *inode = d_inode(cifs_file->dentry);
422         struct cifsInodeInfo *cifsi = CIFS_I(inode);
423         struct cifsLockInfo *li, *tmp;
424         struct super_block *sb = inode->i_sb;
425
426         /*
427          * Delete any outstanding lock records. We'll lose them when the file
428          * is closed anyway.
429          */
430         cifs_down_write(&cifsi->lock_sem);
431         list_for_each_entry_safe(li, tmp, &cifs_file->llist->locks, llist) {
432                 list_del(&li->llist);
433                 cifs_del_lock_waiters(li);
434                 kfree(li);
435         }
436         list_del(&cifs_file->llist->llist);
437         kfree(cifs_file->llist);
438         up_write(&cifsi->lock_sem);
439
440         cifs_put_tlink(cifs_file->tlink);
441         dput(cifs_file->dentry);
442         cifs_sb_deactive(sb);
443         kfree(cifs_file);
444 }
445
446 static void cifsFileInfo_put_work(struct work_struct *work)
447 {
448         struct cifsFileInfo *cifs_file = container_of(work,
449                         struct cifsFileInfo, put);
450
451         cifsFileInfo_put_final(cifs_file);
452 }
453
454 /**
455  * cifsFileInfo_put - release a reference of file priv data
456  *
457  * Always potentially wait for oplock handler. See _cifsFileInfo_put().
458  *
459  * @cifs_file:  cifs/smb3 specific info (eg refcounts) for an open file
460  */
461 void cifsFileInfo_put(struct cifsFileInfo *cifs_file)
462 {
463         _cifsFileInfo_put(cifs_file, true, true);
464 }
465
466 /**
467  * _cifsFileInfo_put - release a reference of file priv data
468  *
469  * This may involve closing the filehandle @cifs_file out on the
470  * server. Must be called without holding tcon->open_file_lock,
471  * cinode->open_file_lock and cifs_file->file_info_lock.
472  *
473  * If @wait_for_oplock_handler is true and we are releasing the last
474  * reference, wait for any running oplock break handler of the file
475  * and cancel any pending one.
476  *
477  * @cifs_file:  cifs/smb3 specific info (eg refcounts) for an open file
478  * @wait_oplock_handler: must be false if called from oplock_break_handler
479  * @offload:    not offloaded on close and oplock breaks
480  *
481  */
482 void _cifsFileInfo_put(struct cifsFileInfo *cifs_file,
483                        bool wait_oplock_handler, bool offload)
484 {
485         struct inode *inode = d_inode(cifs_file->dentry);
486         struct cifs_tcon *tcon = tlink_tcon(cifs_file->tlink);
487         struct TCP_Server_Info *server = tcon->ses->server;
488         struct cifsInodeInfo *cifsi = CIFS_I(inode);
489         struct super_block *sb = inode->i_sb;
490         struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
491         struct cifs_fid fid;
492         struct cifs_pending_open open;
493         bool oplock_break_cancelled;
494
495         spin_lock(&tcon->open_file_lock);
496         spin_lock(&cifsi->open_file_lock);
497         spin_lock(&cifs_file->file_info_lock);
498         if (--cifs_file->count > 0) {
499                 spin_unlock(&cifs_file->file_info_lock);
500                 spin_unlock(&cifsi->open_file_lock);
501                 spin_unlock(&tcon->open_file_lock);
502                 return;
503         }
504         spin_unlock(&cifs_file->file_info_lock);
505
506         if (server->ops->get_lease_key)
507                 server->ops->get_lease_key(inode, &fid);
508
509         /* store open in pending opens to make sure we don't miss lease break */
510         cifs_add_pending_open_locked(&fid, cifs_file->tlink, &open);
511
512         /* remove it from the lists */
513         list_del(&cifs_file->flist);
514         list_del(&cifs_file->tlist);
515         atomic_dec(&tcon->num_local_opens);
516
517         if (list_empty(&cifsi->openFileList)) {
518                 cifs_dbg(FYI, "closing last open instance for inode %p\n",
519                          d_inode(cifs_file->dentry));
520                 /*
521                  * In strict cache mode we need invalidate mapping on the last
522                  * close  because it may cause a error when we open this file
523                  * again and get at least level II oplock.
524                  */
525                 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO)
526                         set_bit(CIFS_INO_INVALID_MAPPING, &cifsi->flags);
527                 cifs_set_oplock_level(cifsi, 0);
528         }
529
530         spin_unlock(&cifsi->open_file_lock);
531         spin_unlock(&tcon->open_file_lock);
532
533         oplock_break_cancelled = wait_oplock_handler ?
534                 cancel_work_sync(&cifs_file->oplock_break) : false;
535
536         if (!tcon->need_reconnect && !cifs_file->invalidHandle) {
537                 struct TCP_Server_Info *server = tcon->ses->server;
538                 unsigned int xid;
539
540                 xid = get_xid();
541                 if (server->ops->close_getattr)
542                         server->ops->close_getattr(xid, tcon, cifs_file);
543                 else if (server->ops->close)
544                         server->ops->close(xid, tcon, &cifs_file->fid);
545                 _free_xid(xid);
546         }
547
548         if (oplock_break_cancelled)
549                 cifs_done_oplock_break(cifsi);
550
551         cifs_del_pending_open(&open);
552
553         if (offload)
554                 queue_work(fileinfo_put_wq, &cifs_file->put);
555         else
556                 cifsFileInfo_put_final(cifs_file);
557 }
558
559 int cifs_open(struct inode *inode, struct file *file)
560
561 {
562         int rc = -EACCES;
563         unsigned int xid;
564         __u32 oplock;
565         struct cifs_sb_info *cifs_sb;
566         struct TCP_Server_Info *server;
567         struct cifs_tcon *tcon;
568         struct tcon_link *tlink;
569         struct cifsFileInfo *cfile = NULL;
570         void *page;
571         const char *full_path;
572         bool posix_open_ok = false;
573         struct cifs_fid fid;
574         struct cifs_pending_open open;
575
576         xid = get_xid();
577
578         cifs_sb = CIFS_SB(inode->i_sb);
579         if (unlikely(cifs_forced_shutdown(cifs_sb))) {
580                 free_xid(xid);
581                 return -EIO;
582         }
583
584         tlink = cifs_sb_tlink(cifs_sb);
585         if (IS_ERR(tlink)) {
586                 free_xid(xid);
587                 return PTR_ERR(tlink);
588         }
589         tcon = tlink_tcon(tlink);
590         server = tcon->ses->server;
591
592         page = alloc_dentry_path();
593         full_path = build_path_from_dentry(file_dentry(file), page);
594         if (IS_ERR(full_path)) {
595                 rc = PTR_ERR(full_path);
596                 goto out;
597         }
598
599         cifs_dbg(FYI, "inode = 0x%p file flags are 0x%x for %s\n",
600                  inode, file->f_flags, full_path);
601
602         if (file->f_flags & O_DIRECT &&
603             cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO) {
604                 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_BRL)
605                         file->f_op = &cifs_file_direct_nobrl_ops;
606                 else
607                         file->f_op = &cifs_file_direct_ops;
608         }
609
610         /* Get the cached handle as SMB2 close is deferred */
611         rc = cifs_get_readable_path(tcon, full_path, &cfile);
612         if (rc == 0) {
613                 if (file->f_flags == cfile->f_flags) {
614                         file->private_data = cfile;
615                         spin_lock(&CIFS_I(inode)->deferred_lock);
616                         cifs_del_deferred_close(cfile);
617                         spin_unlock(&CIFS_I(inode)->deferred_lock);
618                         goto use_cache;
619                 } else {
620                         _cifsFileInfo_put(cfile, true, false);
621                 }
622         }
623
624         if (server->oplocks)
625                 oplock = REQ_OPLOCK;
626         else
627                 oplock = 0;
628
629 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
630         if (!tcon->broken_posix_open && tcon->unix_ext &&
631             cap_unix(tcon->ses) && (CIFS_UNIX_POSIX_PATH_OPS_CAP &
632                                 le64_to_cpu(tcon->fsUnixInfo.Capability))) {
633                 /* can not refresh inode info since size could be stale */
634                 rc = cifs_posix_open(full_path, &inode, inode->i_sb,
635                                 cifs_sb->ctx->file_mode /* ignored */,
636                                 file->f_flags, &oplock, &fid.netfid, xid);
637                 if (rc == 0) {
638                         cifs_dbg(FYI, "posix open succeeded\n");
639                         posix_open_ok = true;
640                 } else if ((rc == -EINVAL) || (rc == -EOPNOTSUPP)) {
641                         if (tcon->ses->serverNOS)
642                                 cifs_dbg(VFS, "server %s of type %s returned unexpected error on SMB posix open, disabling posix open support. Check if server update available.\n",
643                                          tcon->ses->ip_addr,
644                                          tcon->ses->serverNOS);
645                         tcon->broken_posix_open = true;
646                 } else if ((rc != -EIO) && (rc != -EREMOTE) &&
647                          (rc != -EOPNOTSUPP)) /* path not found or net err */
648                         goto out;
649                 /*
650                  * Else fallthrough to retry open the old way on network i/o
651                  * or DFS errors.
652                  */
653         }
654 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
655
656         if (server->ops->get_lease_key)
657                 server->ops->get_lease_key(inode, &fid);
658
659         cifs_add_pending_open(&fid, tlink, &open);
660
661         if (!posix_open_ok) {
662                 if (server->ops->get_lease_key)
663                         server->ops->get_lease_key(inode, &fid);
664
665                 rc = cifs_nt_open(full_path, inode, cifs_sb, tcon,
666                                   file->f_flags, &oplock, &fid, xid);
667                 if (rc) {
668                         cifs_del_pending_open(&open);
669                         goto out;
670                 }
671         }
672
673         cfile = cifs_new_fileinfo(&fid, file, tlink, oplock);
674         if (cfile == NULL) {
675                 if (server->ops->close)
676                         server->ops->close(xid, tcon, &fid);
677                 cifs_del_pending_open(&open);
678                 rc = -ENOMEM;
679                 goto out;
680         }
681
682 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
683         if ((oplock & CIFS_CREATE_ACTION) && !posix_open_ok && tcon->unix_ext) {
684                 /*
685                  * Time to set mode which we can not set earlier due to
686                  * problems creating new read-only files.
687                  */
688                 struct cifs_unix_set_info_args args = {
689                         .mode   = inode->i_mode,
690                         .uid    = INVALID_UID, /* no change */
691                         .gid    = INVALID_GID, /* no change */
692                         .ctime  = NO_CHANGE_64,
693                         .atime  = NO_CHANGE_64,
694                         .mtime  = NO_CHANGE_64,
695                         .device = 0,
696                 };
697                 CIFSSMBUnixSetFileInfo(xid, tcon, &args, fid.netfid,
698                                        cfile->pid);
699         }
700 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
701
702 use_cache:
703         fscache_use_cookie(cifs_inode_cookie(file_inode(file)),
704                            file->f_mode & FMODE_WRITE);
705         if (file->f_flags & O_DIRECT &&
706             (!((file->f_flags & O_ACCMODE) != O_RDONLY) ||
707              file->f_flags & O_APPEND))
708                 cifs_invalidate_cache(file_inode(file),
709                                       FSCACHE_INVAL_DIO_WRITE);
710
711 out:
712         free_dentry_path(page);
713         free_xid(xid);
714         cifs_put_tlink(tlink);
715         return rc;
716 }
717
718 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
719 static int cifs_push_posix_locks(struct cifsFileInfo *cfile);
720 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
721
722 /*
723  * Try to reacquire byte range locks that were released when session
724  * to server was lost.
725  */
726 static int
727 cifs_relock_file(struct cifsFileInfo *cfile)
728 {
729         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
730         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
731         int rc = 0;
732 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
733         struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
734 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
735
736         down_read_nested(&cinode->lock_sem, SINGLE_DEPTH_NESTING);
737         if (cinode->can_cache_brlcks) {
738                 /* can cache locks - no need to relock */
739                 up_read(&cinode->lock_sem);
740                 return rc;
741         }
742
743 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
744         if (cap_unix(tcon->ses) &&
745             (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
746             ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
747                 rc = cifs_push_posix_locks(cfile);
748         else
749 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
750                 rc = tcon->ses->server->ops->push_mand_locks(cfile);
751
752         up_read(&cinode->lock_sem);
753         return rc;
754 }
755
756 static int
757 cifs_reopen_file(struct cifsFileInfo *cfile, bool can_flush)
758 {
759         int rc = -EACCES;
760         unsigned int xid;
761         __u32 oplock;
762         struct cifs_sb_info *cifs_sb;
763         struct cifs_tcon *tcon;
764         struct TCP_Server_Info *server;
765         struct cifsInodeInfo *cinode;
766         struct inode *inode;
767         void *page;
768         const char *full_path;
769         int desired_access;
770         int disposition = FILE_OPEN;
771         int create_options = CREATE_NOT_DIR;
772         struct cifs_open_parms oparms;
773
774         xid = get_xid();
775         mutex_lock(&cfile->fh_mutex);
776         if (!cfile->invalidHandle) {
777                 mutex_unlock(&cfile->fh_mutex);
778                 free_xid(xid);
779                 return 0;
780         }
781
782         inode = d_inode(cfile->dentry);
783         cifs_sb = CIFS_SB(inode->i_sb);
784         tcon = tlink_tcon(cfile->tlink);
785         server = tcon->ses->server;
786
787         /*
788          * Can not grab rename sem here because various ops, including those
789          * that already have the rename sem can end up causing writepage to get
790          * called and if the server was down that means we end up here, and we
791          * can never tell if the caller already has the rename_sem.
792          */
793         page = alloc_dentry_path();
794         full_path = build_path_from_dentry(cfile->dentry, page);
795         if (IS_ERR(full_path)) {
796                 mutex_unlock(&cfile->fh_mutex);
797                 free_dentry_path(page);
798                 free_xid(xid);
799                 return PTR_ERR(full_path);
800         }
801
802         cifs_dbg(FYI, "inode = 0x%p file flags 0x%x for %s\n",
803                  inode, cfile->f_flags, full_path);
804
805         if (tcon->ses->server->oplocks)
806                 oplock = REQ_OPLOCK;
807         else
808                 oplock = 0;
809
810 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
811         if (tcon->unix_ext && cap_unix(tcon->ses) &&
812             (CIFS_UNIX_POSIX_PATH_OPS_CAP &
813                                 le64_to_cpu(tcon->fsUnixInfo.Capability))) {
814                 /*
815                  * O_CREAT, O_EXCL and O_TRUNC already had their effect on the
816                  * original open. Must mask them off for a reopen.
817                  */
818                 unsigned int oflags = cfile->f_flags &
819                                                 ~(O_CREAT | O_EXCL | O_TRUNC);
820
821                 rc = cifs_posix_open(full_path, NULL, inode->i_sb,
822                                      cifs_sb->ctx->file_mode /* ignored */,
823                                      oflags, &oplock, &cfile->fid.netfid, xid);
824                 if (rc == 0) {
825                         cifs_dbg(FYI, "posix reopen succeeded\n");
826                         oparms.reconnect = true;
827                         goto reopen_success;
828                 }
829                 /*
830                  * fallthrough to retry open the old way on errors, especially
831                  * in the reconnect path it is important to retry hard
832                  */
833         }
834 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
835
836         desired_access = cifs_convert_flags(cfile->f_flags);
837
838         /* O_SYNC also has bit for O_DSYNC so following check picks up either */
839         if (cfile->f_flags & O_SYNC)
840                 create_options |= CREATE_WRITE_THROUGH;
841
842         if (cfile->f_flags & O_DIRECT)
843                 create_options |= CREATE_NO_BUFFER;
844
845         if (server->ops->get_lease_key)
846                 server->ops->get_lease_key(inode, &cfile->fid);
847
848         oparms.tcon = tcon;
849         oparms.cifs_sb = cifs_sb;
850         oparms.desired_access = desired_access;
851         oparms.create_options = cifs_create_options(cifs_sb, create_options);
852         oparms.disposition = disposition;
853         oparms.path = full_path;
854         oparms.fid = &cfile->fid;
855         oparms.reconnect = true;
856
857         /*
858          * Can not refresh inode by passing in file_info buf to be returned by
859          * ops->open and then calling get_inode_info with returned buf since
860          * file might have write behind data that needs to be flushed and server
861          * version of file size can be stale. If we knew for sure that inode was
862          * not dirty locally we could do this.
863          */
864         rc = server->ops->open(xid, &oparms, &oplock, NULL);
865         if (rc == -ENOENT && oparms.reconnect == false) {
866                 /* durable handle timeout is expired - open the file again */
867                 rc = server->ops->open(xid, &oparms, &oplock, NULL);
868                 /* indicate that we need to relock the file */
869                 oparms.reconnect = true;
870         }
871
872         if (rc) {
873                 mutex_unlock(&cfile->fh_mutex);
874                 cifs_dbg(FYI, "cifs_reopen returned 0x%x\n", rc);
875                 cifs_dbg(FYI, "oplock: %d\n", oplock);
876                 goto reopen_error_exit;
877         }
878
879 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
880 reopen_success:
881 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
882         cfile->invalidHandle = false;
883         mutex_unlock(&cfile->fh_mutex);
884         cinode = CIFS_I(inode);
885
886         if (can_flush) {
887                 rc = filemap_write_and_wait(inode->i_mapping);
888                 if (!is_interrupt_error(rc))
889                         mapping_set_error(inode->i_mapping, rc);
890
891                 if (tcon->posix_extensions)
892                         rc = smb311_posix_get_inode_info(&inode, full_path, inode->i_sb, xid);
893                 else if (tcon->unix_ext)
894                         rc = cifs_get_inode_info_unix(&inode, full_path,
895                                                       inode->i_sb, xid);
896                 else
897                         rc = cifs_get_inode_info(&inode, full_path, NULL,
898                                                  inode->i_sb, xid, NULL);
899         }
900         /*
901          * Else we are writing out data to server already and could deadlock if
902          * we tried to flush data, and since we do not know if we have data that
903          * would invalidate the current end of file on the server we can not go
904          * to the server to get the new inode info.
905          */
906
907         /*
908          * If the server returned a read oplock and we have mandatory brlocks,
909          * set oplock level to None.
910          */
911         if (server->ops->is_read_op(oplock) && cifs_has_mand_locks(cinode)) {
912                 cifs_dbg(FYI, "Reset oplock val from read to None due to mand locks\n");
913                 oplock = 0;
914         }
915
916         server->ops->set_fid(cfile, &cfile->fid, oplock);
917         if (oparms.reconnect)
918                 cifs_relock_file(cfile);
919
920 reopen_error_exit:
921         free_dentry_path(page);
922         free_xid(xid);
923         return rc;
924 }
925
926 void smb2_deferred_work_close(struct work_struct *work)
927 {
928         struct cifsFileInfo *cfile = container_of(work,
929                         struct cifsFileInfo, deferred.work);
930
931         spin_lock(&CIFS_I(d_inode(cfile->dentry))->deferred_lock);
932         cifs_del_deferred_close(cfile);
933         cfile->deferred_close_scheduled = false;
934         spin_unlock(&CIFS_I(d_inode(cfile->dentry))->deferred_lock);
935         _cifsFileInfo_put(cfile, true, false);
936 }
937
938 int cifs_close(struct inode *inode, struct file *file)
939 {
940         struct cifsFileInfo *cfile;
941         struct cifsInodeInfo *cinode = CIFS_I(inode);
942         struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
943         struct cifs_deferred_close *dclose;
944
945         cifs_fscache_unuse_inode_cookie(inode, file->f_mode & FMODE_WRITE);
946
947         if (file->private_data != NULL) {
948                 cfile = file->private_data;
949                 file->private_data = NULL;
950                 dclose = kmalloc(sizeof(struct cifs_deferred_close), GFP_KERNEL);
951                 if ((cinode->oplock == CIFS_CACHE_RHW_FLG) &&
952                     cinode->lease_granted &&
953                     !test_bit(CIFS_INO_CLOSE_ON_LOCK, &cinode->flags) &&
954                     dclose) {
955                         if (test_and_clear_bit(CIFS_INO_MODIFIED_ATTR, &cinode->flags)) {
956                                 inode->i_ctime = inode->i_mtime = current_time(inode);
957                         }
958                         spin_lock(&cinode->deferred_lock);
959                         cifs_add_deferred_close(cfile, dclose);
960                         if (cfile->deferred_close_scheduled &&
961                             delayed_work_pending(&cfile->deferred)) {
962                                 /*
963                                  * If there is no pending work, mod_delayed_work queues new work.
964                                  * So, Increase the ref count to avoid use-after-free.
965                                  */
966                                 if (!mod_delayed_work(deferredclose_wq,
967                                                 &cfile->deferred, cifs_sb->ctx->closetimeo))
968                                         cifsFileInfo_get(cfile);
969                         } else {
970                                 /* Deferred close for files */
971                                 queue_delayed_work(deferredclose_wq,
972                                                 &cfile->deferred, cifs_sb->ctx->closetimeo);
973                                 cfile->deferred_close_scheduled = true;
974                                 spin_unlock(&cinode->deferred_lock);
975                                 return 0;
976                         }
977                         spin_unlock(&cinode->deferred_lock);
978                         _cifsFileInfo_put(cfile, true, false);
979                 } else {
980                         _cifsFileInfo_put(cfile, true, false);
981                         kfree(dclose);
982                 }
983         }
984
985         /* return code from the ->release op is always ignored */
986         return 0;
987 }
988
989 void
990 cifs_reopen_persistent_handles(struct cifs_tcon *tcon)
991 {
992         struct cifsFileInfo *open_file, *tmp;
993         struct list_head tmp_list;
994
995         if (!tcon->use_persistent || !tcon->need_reopen_files)
996                 return;
997
998         tcon->need_reopen_files = false;
999
1000         cifs_dbg(FYI, "Reopen persistent handles\n");
1001         INIT_LIST_HEAD(&tmp_list);
1002
1003         /* list all files open on tree connection, reopen resilient handles  */
1004         spin_lock(&tcon->open_file_lock);
1005         list_for_each_entry(open_file, &tcon->openFileList, tlist) {
1006                 if (!open_file->invalidHandle)
1007                         continue;
1008                 cifsFileInfo_get(open_file);
1009                 list_add_tail(&open_file->rlist, &tmp_list);
1010         }
1011         spin_unlock(&tcon->open_file_lock);
1012
1013         list_for_each_entry_safe(open_file, tmp, &tmp_list, rlist) {
1014                 if (cifs_reopen_file(open_file, false /* do not flush */))
1015                         tcon->need_reopen_files = true;
1016                 list_del_init(&open_file->rlist);
1017                 cifsFileInfo_put(open_file);
1018         }
1019 }
1020
1021 int cifs_closedir(struct inode *inode, struct file *file)
1022 {
1023         int rc = 0;
1024         unsigned int xid;
1025         struct cifsFileInfo *cfile = file->private_data;
1026         struct cifs_tcon *tcon;
1027         struct TCP_Server_Info *server;
1028         char *buf;
1029
1030         cifs_dbg(FYI, "Closedir inode = 0x%p\n", inode);
1031
1032         if (cfile == NULL)
1033                 return rc;
1034
1035         xid = get_xid();
1036         tcon = tlink_tcon(cfile->tlink);
1037         server = tcon->ses->server;
1038
1039         cifs_dbg(FYI, "Freeing private data in close dir\n");
1040         spin_lock(&cfile->file_info_lock);
1041         if (server->ops->dir_needs_close(cfile)) {
1042                 cfile->invalidHandle = true;
1043                 spin_unlock(&cfile->file_info_lock);
1044                 if (server->ops->close_dir)
1045                         rc = server->ops->close_dir(xid, tcon, &cfile->fid);
1046                 else
1047                         rc = -ENOSYS;
1048                 cifs_dbg(FYI, "Closing uncompleted readdir with rc %d\n", rc);
1049                 /* not much we can do if it fails anyway, ignore rc */
1050                 rc = 0;
1051         } else
1052                 spin_unlock(&cfile->file_info_lock);
1053
1054         buf = cfile->srch_inf.ntwrk_buf_start;
1055         if (buf) {
1056                 cifs_dbg(FYI, "closedir free smb buf in srch struct\n");
1057                 cfile->srch_inf.ntwrk_buf_start = NULL;
1058                 if (cfile->srch_inf.smallBuf)
1059                         cifs_small_buf_release(buf);
1060                 else
1061                         cifs_buf_release(buf);
1062         }
1063
1064         cifs_put_tlink(cfile->tlink);
1065         kfree(file->private_data);
1066         file->private_data = NULL;
1067         /* BB can we lock the filestruct while this is going on? */
1068         free_xid(xid);
1069         return rc;
1070 }
1071
1072 static struct cifsLockInfo *
1073 cifs_lock_init(__u64 offset, __u64 length, __u8 type, __u16 flags)
1074 {
1075         struct cifsLockInfo *lock =
1076                 kmalloc(sizeof(struct cifsLockInfo), GFP_KERNEL);
1077         if (!lock)
1078                 return lock;
1079         lock->offset = offset;
1080         lock->length = length;
1081         lock->type = type;
1082         lock->pid = current->tgid;
1083         lock->flags = flags;
1084         INIT_LIST_HEAD(&lock->blist);
1085         init_waitqueue_head(&lock->block_q);
1086         return lock;
1087 }
1088
1089 void
1090 cifs_del_lock_waiters(struct cifsLockInfo *lock)
1091 {
1092         struct cifsLockInfo *li, *tmp;
1093         list_for_each_entry_safe(li, tmp, &lock->blist, blist) {
1094                 list_del_init(&li->blist);
1095                 wake_up(&li->block_q);
1096         }
1097 }
1098
1099 #define CIFS_LOCK_OP    0
1100 #define CIFS_READ_OP    1
1101 #define CIFS_WRITE_OP   2
1102
1103 /* @rw_check : 0 - no op, 1 - read, 2 - write */
1104 static bool
1105 cifs_find_fid_lock_conflict(struct cifs_fid_locks *fdlocks, __u64 offset,
1106                             __u64 length, __u8 type, __u16 flags,
1107                             struct cifsFileInfo *cfile,
1108                             struct cifsLockInfo **conf_lock, int rw_check)
1109 {
1110         struct cifsLockInfo *li;
1111         struct cifsFileInfo *cur_cfile = fdlocks->cfile;
1112         struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
1113
1114         list_for_each_entry(li, &fdlocks->locks, llist) {
1115                 if (offset + length <= li->offset ||
1116                     offset >= li->offset + li->length)
1117                         continue;
1118                 if (rw_check != CIFS_LOCK_OP && current->tgid == li->pid &&
1119                     server->ops->compare_fids(cfile, cur_cfile)) {
1120                         /* shared lock prevents write op through the same fid */
1121                         if (!(li->type & server->vals->shared_lock_type) ||
1122                             rw_check != CIFS_WRITE_OP)
1123                                 continue;
1124                 }
1125                 if ((type & server->vals->shared_lock_type) &&
1126                     ((server->ops->compare_fids(cfile, cur_cfile) &&
1127                      current->tgid == li->pid) || type == li->type))
1128                         continue;
1129                 if (rw_check == CIFS_LOCK_OP &&
1130                     (flags & FL_OFDLCK) && (li->flags & FL_OFDLCK) &&
1131                     server->ops->compare_fids(cfile, cur_cfile))
1132                         continue;
1133                 if (conf_lock)
1134                         *conf_lock = li;
1135                 return true;
1136         }
1137         return false;
1138 }
1139
1140 bool
1141 cifs_find_lock_conflict(struct cifsFileInfo *cfile, __u64 offset, __u64 length,
1142                         __u8 type, __u16 flags,
1143                         struct cifsLockInfo **conf_lock, int rw_check)
1144 {
1145         bool rc = false;
1146         struct cifs_fid_locks *cur;
1147         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1148
1149         list_for_each_entry(cur, &cinode->llist, llist) {
1150                 rc = cifs_find_fid_lock_conflict(cur, offset, length, type,
1151                                                  flags, cfile, conf_lock,
1152                                                  rw_check);
1153                 if (rc)
1154                         break;
1155         }
1156
1157         return rc;
1158 }
1159
1160 /*
1161  * Check if there is another lock that prevents us to set the lock (mandatory
1162  * style). If such a lock exists, update the flock structure with its
1163  * properties. Otherwise, set the flock type to F_UNLCK if we can cache brlocks
1164  * or leave it the same if we can't. Returns 0 if we don't need to request to
1165  * the server or 1 otherwise.
1166  */
1167 static int
1168 cifs_lock_test(struct cifsFileInfo *cfile, __u64 offset, __u64 length,
1169                __u8 type, struct file_lock *flock)
1170 {
1171         int rc = 0;
1172         struct cifsLockInfo *conf_lock;
1173         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1174         struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
1175         bool exist;
1176
1177         down_read(&cinode->lock_sem);
1178
1179         exist = cifs_find_lock_conflict(cfile, offset, length, type,
1180                                         flock->fl_flags, &conf_lock,
1181                                         CIFS_LOCK_OP);
1182         if (exist) {
1183                 flock->fl_start = conf_lock->offset;
1184                 flock->fl_end = conf_lock->offset + conf_lock->length - 1;
1185                 flock->fl_pid = conf_lock->pid;
1186                 if (conf_lock->type & server->vals->shared_lock_type)
1187                         flock->fl_type = F_RDLCK;
1188                 else
1189                         flock->fl_type = F_WRLCK;
1190         } else if (!cinode->can_cache_brlcks)
1191                 rc = 1;
1192         else
1193                 flock->fl_type = F_UNLCK;
1194
1195         up_read(&cinode->lock_sem);
1196         return rc;
1197 }
1198
1199 static void
1200 cifs_lock_add(struct cifsFileInfo *cfile, struct cifsLockInfo *lock)
1201 {
1202         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1203         cifs_down_write(&cinode->lock_sem);
1204         list_add_tail(&lock->llist, &cfile->llist->locks);
1205         up_write(&cinode->lock_sem);
1206 }
1207
1208 /*
1209  * Set the byte-range lock (mandatory style). Returns:
1210  * 1) 0, if we set the lock and don't need to request to the server;
1211  * 2) 1, if no locks prevent us but we need to request to the server;
1212  * 3) -EACCES, if there is a lock that prevents us and wait is false.
1213  */
1214 static int
1215 cifs_lock_add_if(struct cifsFileInfo *cfile, struct cifsLockInfo *lock,
1216                  bool wait)
1217 {
1218         struct cifsLockInfo *conf_lock;
1219         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1220         bool exist;
1221         int rc = 0;
1222
1223 try_again:
1224         exist = false;
1225         cifs_down_write(&cinode->lock_sem);
1226
1227         exist = cifs_find_lock_conflict(cfile, lock->offset, lock->length,
1228                                         lock->type, lock->flags, &conf_lock,
1229                                         CIFS_LOCK_OP);
1230         if (!exist && cinode->can_cache_brlcks) {
1231                 list_add_tail(&lock->llist, &cfile->llist->locks);
1232                 up_write(&cinode->lock_sem);
1233                 return rc;
1234         }
1235
1236         if (!exist)
1237                 rc = 1;
1238         else if (!wait)
1239                 rc = -EACCES;
1240         else {
1241                 list_add_tail(&lock->blist, &conf_lock->blist);
1242                 up_write(&cinode->lock_sem);
1243                 rc = wait_event_interruptible(lock->block_q,
1244                                         (lock->blist.prev == &lock->blist) &&
1245                                         (lock->blist.next == &lock->blist));
1246                 if (!rc)
1247                         goto try_again;
1248                 cifs_down_write(&cinode->lock_sem);
1249                 list_del_init(&lock->blist);
1250         }
1251
1252         up_write(&cinode->lock_sem);
1253         return rc;
1254 }
1255
1256 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
1257 /*
1258  * Check if there is another lock that prevents us to set the lock (posix
1259  * style). If such a lock exists, update the flock structure with its
1260  * properties. Otherwise, set the flock type to F_UNLCK if we can cache brlocks
1261  * or leave it the same if we can't. Returns 0 if we don't need to request to
1262  * the server or 1 otherwise.
1263  */
1264 static int
1265 cifs_posix_lock_test(struct file *file, struct file_lock *flock)
1266 {
1267         int rc = 0;
1268         struct cifsInodeInfo *cinode = CIFS_I(file_inode(file));
1269         unsigned char saved_type = flock->fl_type;
1270
1271         if ((flock->fl_flags & FL_POSIX) == 0)
1272                 return 1;
1273
1274         down_read(&cinode->lock_sem);
1275         posix_test_lock(file, flock);
1276
1277         if (flock->fl_type == F_UNLCK && !cinode->can_cache_brlcks) {
1278                 flock->fl_type = saved_type;
1279                 rc = 1;
1280         }
1281
1282         up_read(&cinode->lock_sem);
1283         return rc;
1284 }
1285
1286 /*
1287  * Set the byte-range lock (posix style). Returns:
1288  * 1) <0, if the error occurs while setting the lock;
1289  * 2) 0, if we set the lock and don't need to request to the server;
1290  * 3) FILE_LOCK_DEFERRED, if we will wait for some other file_lock;
1291  * 4) FILE_LOCK_DEFERRED + 1, if we need to request to the server.
1292  */
1293 static int
1294 cifs_posix_lock_set(struct file *file, struct file_lock *flock)
1295 {
1296         struct cifsInodeInfo *cinode = CIFS_I(file_inode(file));
1297         int rc = FILE_LOCK_DEFERRED + 1;
1298
1299         if ((flock->fl_flags & FL_POSIX) == 0)
1300                 return rc;
1301
1302         cifs_down_write(&cinode->lock_sem);
1303         if (!cinode->can_cache_brlcks) {
1304                 up_write(&cinode->lock_sem);
1305                 return rc;
1306         }
1307
1308         rc = posix_lock_file(file, flock, NULL);
1309         up_write(&cinode->lock_sem);
1310         return rc;
1311 }
1312
1313 int
1314 cifs_push_mandatory_locks(struct cifsFileInfo *cfile)
1315 {
1316         unsigned int xid;
1317         int rc = 0, stored_rc;
1318         struct cifsLockInfo *li, *tmp;
1319         struct cifs_tcon *tcon;
1320         unsigned int num, max_num, max_buf;
1321         LOCKING_ANDX_RANGE *buf, *cur;
1322         static const int types[] = {
1323                 LOCKING_ANDX_LARGE_FILES,
1324                 LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES
1325         };
1326         int i;
1327
1328         xid = get_xid();
1329         tcon = tlink_tcon(cfile->tlink);
1330
1331         /*
1332          * Accessing maxBuf is racy with cifs_reconnect - need to store value
1333          * and check it before using.
1334          */
1335         max_buf = tcon->ses->server->maxBuf;
1336         if (max_buf < (sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE))) {
1337                 free_xid(xid);
1338                 return -EINVAL;
1339         }
1340
1341         BUILD_BUG_ON(sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE) >
1342                      PAGE_SIZE);
1343         max_buf = min_t(unsigned int, max_buf - sizeof(struct smb_hdr),
1344                         PAGE_SIZE);
1345         max_num = (max_buf - sizeof(struct smb_hdr)) /
1346                                                 sizeof(LOCKING_ANDX_RANGE);
1347         buf = kcalloc(max_num, sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
1348         if (!buf) {
1349                 free_xid(xid);
1350                 return -ENOMEM;
1351         }
1352
1353         for (i = 0; i < 2; i++) {
1354                 cur = buf;
1355                 num = 0;
1356                 list_for_each_entry_safe(li, tmp, &cfile->llist->locks, llist) {
1357                         if (li->type != types[i])
1358                                 continue;
1359                         cur->Pid = cpu_to_le16(li->pid);
1360                         cur->LengthLow = cpu_to_le32((u32)li->length);
1361                         cur->LengthHigh = cpu_to_le32((u32)(li->length>>32));
1362                         cur->OffsetLow = cpu_to_le32((u32)li->offset);
1363                         cur->OffsetHigh = cpu_to_le32((u32)(li->offset>>32));
1364                         if (++num == max_num) {
1365                                 stored_rc = cifs_lockv(xid, tcon,
1366                                                        cfile->fid.netfid,
1367                                                        (__u8)li->type, 0, num,
1368                                                        buf);
1369                                 if (stored_rc)
1370                                         rc = stored_rc;
1371                                 cur = buf;
1372                                 num = 0;
1373                         } else
1374                                 cur++;
1375                 }
1376
1377                 if (num) {
1378                         stored_rc = cifs_lockv(xid, tcon, cfile->fid.netfid,
1379                                                (__u8)types[i], 0, num, buf);
1380                         if (stored_rc)
1381                                 rc = stored_rc;
1382                 }
1383         }
1384
1385         kfree(buf);
1386         free_xid(xid);
1387         return rc;
1388 }
1389
1390 static __u32
1391 hash_lockowner(fl_owner_t owner)
1392 {
1393         return cifs_lock_secret ^ hash32_ptr((const void *)owner);
1394 }
1395 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
1396
1397 struct lock_to_push {
1398         struct list_head llist;
1399         __u64 offset;
1400         __u64 length;
1401         __u32 pid;
1402         __u16 netfid;
1403         __u8 type;
1404 };
1405
1406 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
1407 static int
1408 cifs_push_posix_locks(struct cifsFileInfo *cfile)
1409 {
1410         struct inode *inode = d_inode(cfile->dentry);
1411         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1412         struct file_lock *flock;
1413         struct file_lock_context *flctx = inode->i_flctx;
1414         unsigned int count = 0, i;
1415         int rc = 0, xid, type;
1416         struct list_head locks_to_send, *el;
1417         struct lock_to_push *lck, *tmp;
1418         __u64 length;
1419
1420         xid = get_xid();
1421
1422         if (!flctx)
1423                 goto out;
1424
1425         spin_lock(&flctx->flc_lock);
1426         list_for_each(el, &flctx->flc_posix) {
1427                 count++;
1428         }
1429         spin_unlock(&flctx->flc_lock);
1430
1431         INIT_LIST_HEAD(&locks_to_send);
1432
1433         /*
1434          * Allocating count locks is enough because no FL_POSIX locks can be
1435          * added to the list while we are holding cinode->lock_sem that
1436          * protects locking operations of this inode.
1437          */
1438         for (i = 0; i < count; i++) {
1439                 lck = kmalloc(sizeof(struct lock_to_push), GFP_KERNEL);
1440                 if (!lck) {
1441                         rc = -ENOMEM;
1442                         goto err_out;
1443                 }
1444                 list_add_tail(&lck->llist, &locks_to_send);
1445         }
1446
1447         el = locks_to_send.next;
1448         spin_lock(&flctx->flc_lock);
1449         list_for_each_entry(flock, &flctx->flc_posix, fl_list) {
1450                 if (el == &locks_to_send) {
1451                         /*
1452                          * The list ended. We don't have enough allocated
1453                          * structures - something is really wrong.
1454                          */
1455                         cifs_dbg(VFS, "Can't push all brlocks!\n");
1456                         break;
1457                 }
1458                 length = cifs_flock_len(flock);
1459                 if (flock->fl_type == F_RDLCK || flock->fl_type == F_SHLCK)
1460                         type = CIFS_RDLCK;
1461                 else
1462                         type = CIFS_WRLCK;
1463                 lck = list_entry(el, struct lock_to_push, llist);
1464                 lck->pid = hash_lockowner(flock->fl_owner);
1465                 lck->netfid = cfile->fid.netfid;
1466                 lck->length = length;
1467                 lck->type = type;
1468                 lck->offset = flock->fl_start;
1469         }
1470         spin_unlock(&flctx->flc_lock);
1471
1472         list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) {
1473                 int stored_rc;
1474
1475                 stored_rc = CIFSSMBPosixLock(xid, tcon, lck->netfid, lck->pid,
1476                                              lck->offset, lck->length, NULL,
1477                                              lck->type, 0);
1478                 if (stored_rc)
1479                         rc = stored_rc;
1480                 list_del(&lck->llist);
1481                 kfree(lck);
1482         }
1483
1484 out:
1485         free_xid(xid);
1486         return rc;
1487 err_out:
1488         list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) {
1489                 list_del(&lck->llist);
1490                 kfree(lck);
1491         }
1492         goto out;
1493 }
1494 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
1495
1496 static int
1497 cifs_push_locks(struct cifsFileInfo *cfile)
1498 {
1499         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1500         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1501         int rc = 0;
1502 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
1503         struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
1504 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
1505
1506         /* we are going to update can_cache_brlcks here - need a write access */
1507         cifs_down_write(&cinode->lock_sem);
1508         if (!cinode->can_cache_brlcks) {
1509                 up_write(&cinode->lock_sem);
1510                 return rc;
1511         }
1512
1513 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
1514         if (cap_unix(tcon->ses) &&
1515             (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
1516             ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
1517                 rc = cifs_push_posix_locks(cfile);
1518         else
1519 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
1520                 rc = tcon->ses->server->ops->push_mand_locks(cfile);
1521
1522         cinode->can_cache_brlcks = false;
1523         up_write(&cinode->lock_sem);
1524         return rc;
1525 }
1526
1527 static void
1528 cifs_read_flock(struct file_lock *flock, __u32 *type, int *lock, int *unlock,
1529                 bool *wait_flag, struct TCP_Server_Info *server)
1530 {
1531         if (flock->fl_flags & FL_POSIX)
1532                 cifs_dbg(FYI, "Posix\n");
1533         if (flock->fl_flags & FL_FLOCK)
1534                 cifs_dbg(FYI, "Flock\n");
1535         if (flock->fl_flags & FL_SLEEP) {
1536                 cifs_dbg(FYI, "Blocking lock\n");
1537                 *wait_flag = true;
1538         }
1539         if (flock->fl_flags & FL_ACCESS)
1540                 cifs_dbg(FYI, "Process suspended by mandatory locking - not implemented yet\n");
1541         if (flock->fl_flags & FL_LEASE)
1542                 cifs_dbg(FYI, "Lease on file - not implemented yet\n");
1543         if (flock->fl_flags &
1544             (~(FL_POSIX | FL_FLOCK | FL_SLEEP |
1545                FL_ACCESS | FL_LEASE | FL_CLOSE | FL_OFDLCK)))
1546                 cifs_dbg(FYI, "Unknown lock flags 0x%x\n", flock->fl_flags);
1547
1548         *type = server->vals->large_lock_type;
1549         if (flock->fl_type == F_WRLCK) {
1550                 cifs_dbg(FYI, "F_WRLCK\n");
1551                 *type |= server->vals->exclusive_lock_type;
1552                 *lock = 1;
1553         } else if (flock->fl_type == F_UNLCK) {
1554                 cifs_dbg(FYI, "F_UNLCK\n");
1555                 *type |= server->vals->unlock_lock_type;
1556                 *unlock = 1;
1557                 /* Check if unlock includes more than one lock range */
1558         } else if (flock->fl_type == F_RDLCK) {
1559                 cifs_dbg(FYI, "F_RDLCK\n");
1560                 *type |= server->vals->shared_lock_type;
1561                 *lock = 1;
1562         } else if (flock->fl_type == F_EXLCK) {
1563                 cifs_dbg(FYI, "F_EXLCK\n");
1564                 *type |= server->vals->exclusive_lock_type;
1565                 *lock = 1;
1566         } else if (flock->fl_type == F_SHLCK) {
1567                 cifs_dbg(FYI, "F_SHLCK\n");
1568                 *type |= server->vals->shared_lock_type;
1569                 *lock = 1;
1570         } else
1571                 cifs_dbg(FYI, "Unknown type of lock\n");
1572 }
1573
1574 static int
1575 cifs_getlk(struct file *file, struct file_lock *flock, __u32 type,
1576            bool wait_flag, bool posix_lck, unsigned int xid)
1577 {
1578         int rc = 0;
1579         __u64 length = cifs_flock_len(flock);
1580         struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
1581         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1582         struct TCP_Server_Info *server = tcon->ses->server;
1583 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
1584         __u16 netfid = cfile->fid.netfid;
1585
1586         if (posix_lck) {
1587                 int posix_lock_type;
1588
1589                 rc = cifs_posix_lock_test(file, flock);
1590                 if (!rc)
1591                         return rc;
1592
1593                 if (type & server->vals->shared_lock_type)
1594                         posix_lock_type = CIFS_RDLCK;
1595                 else
1596                         posix_lock_type = CIFS_WRLCK;
1597                 rc = CIFSSMBPosixLock(xid, tcon, netfid,
1598                                       hash_lockowner(flock->fl_owner),
1599                                       flock->fl_start, length, flock,
1600                                       posix_lock_type, wait_flag);
1601                 return rc;
1602         }
1603 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
1604
1605         rc = cifs_lock_test(cfile, flock->fl_start, length, type, flock);
1606         if (!rc)
1607                 return rc;
1608
1609         /* BB we could chain these into one lock request BB */
1610         rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length, type,
1611                                     1, 0, false);
1612         if (rc == 0) {
1613                 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1614                                             type, 0, 1, false);
1615                 flock->fl_type = F_UNLCK;
1616                 if (rc != 0)
1617                         cifs_dbg(VFS, "Error unlocking previously locked range %d during test of lock\n",
1618                                  rc);
1619                 return 0;
1620         }
1621
1622         if (type & server->vals->shared_lock_type) {
1623                 flock->fl_type = F_WRLCK;
1624                 return 0;
1625         }
1626
1627         type &= ~server->vals->exclusive_lock_type;
1628
1629         rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1630                                     type | server->vals->shared_lock_type,
1631                                     1, 0, false);
1632         if (rc == 0) {
1633                 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1634                         type | server->vals->shared_lock_type, 0, 1, false);
1635                 flock->fl_type = F_RDLCK;
1636                 if (rc != 0)
1637                         cifs_dbg(VFS, "Error unlocking previously locked range %d during test of lock\n",
1638                                  rc);
1639         } else
1640                 flock->fl_type = F_WRLCK;
1641
1642         return 0;
1643 }
1644
1645 void
1646 cifs_move_llist(struct list_head *source, struct list_head *dest)
1647 {
1648         struct list_head *li, *tmp;
1649         list_for_each_safe(li, tmp, source)
1650                 list_move(li, dest);
1651 }
1652
1653 void
1654 cifs_free_llist(struct list_head *llist)
1655 {
1656         struct cifsLockInfo *li, *tmp;
1657         list_for_each_entry_safe(li, tmp, llist, llist) {
1658                 cifs_del_lock_waiters(li);
1659                 list_del(&li->llist);
1660                 kfree(li);
1661         }
1662 }
1663
1664 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
1665 int
1666 cifs_unlock_range(struct cifsFileInfo *cfile, struct file_lock *flock,
1667                   unsigned int xid)
1668 {
1669         int rc = 0, stored_rc;
1670         static const int types[] = {
1671                 LOCKING_ANDX_LARGE_FILES,
1672                 LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES
1673         };
1674         unsigned int i;
1675         unsigned int max_num, num, max_buf;
1676         LOCKING_ANDX_RANGE *buf, *cur;
1677         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1678         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1679         struct cifsLockInfo *li, *tmp;
1680         __u64 length = cifs_flock_len(flock);
1681         struct list_head tmp_llist;
1682
1683         INIT_LIST_HEAD(&tmp_llist);
1684
1685         /*
1686          * Accessing maxBuf is racy with cifs_reconnect - need to store value
1687          * and check it before using.
1688          */
1689         max_buf = tcon->ses->server->maxBuf;
1690         if (max_buf < (sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE)))
1691                 return -EINVAL;
1692
1693         BUILD_BUG_ON(sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE) >
1694                      PAGE_SIZE);
1695         max_buf = min_t(unsigned int, max_buf - sizeof(struct smb_hdr),
1696                         PAGE_SIZE);
1697         max_num = (max_buf - sizeof(struct smb_hdr)) /
1698                                                 sizeof(LOCKING_ANDX_RANGE);
1699         buf = kcalloc(max_num, sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
1700         if (!buf)
1701                 return -ENOMEM;
1702
1703         cifs_down_write(&cinode->lock_sem);
1704         for (i = 0; i < 2; i++) {
1705                 cur = buf;
1706                 num = 0;
1707                 list_for_each_entry_safe(li, tmp, &cfile->llist->locks, llist) {
1708                         if (flock->fl_start > li->offset ||
1709                             (flock->fl_start + length) <
1710                             (li->offset + li->length))
1711                                 continue;
1712                         if (current->tgid != li->pid)
1713                                 continue;
1714                         if (types[i] != li->type)
1715                                 continue;
1716                         if (cinode->can_cache_brlcks) {
1717                                 /*
1718                                  * We can cache brlock requests - simply remove
1719                                  * a lock from the file's list.
1720                                  */
1721                                 list_del(&li->llist);
1722                                 cifs_del_lock_waiters(li);
1723                                 kfree(li);
1724                                 continue;
1725                         }
1726                         cur->Pid = cpu_to_le16(li->pid);
1727                         cur->LengthLow = cpu_to_le32((u32)li->length);
1728                         cur->LengthHigh = cpu_to_le32((u32)(li->length>>32));
1729                         cur->OffsetLow = cpu_to_le32((u32)li->offset);
1730                         cur->OffsetHigh = cpu_to_le32((u32)(li->offset>>32));
1731                         /*
1732                          * We need to save a lock here to let us add it again to
1733                          * the file's list if the unlock range request fails on
1734                          * the server.
1735                          */
1736                         list_move(&li->llist, &tmp_llist);
1737                         if (++num == max_num) {
1738                                 stored_rc = cifs_lockv(xid, tcon,
1739                                                        cfile->fid.netfid,
1740                                                        li->type, num, 0, buf);
1741                                 if (stored_rc) {
1742                                         /*
1743                                          * We failed on the unlock range
1744                                          * request - add all locks from the tmp
1745                                          * list to the head of the file's list.
1746                                          */
1747                                         cifs_move_llist(&tmp_llist,
1748                                                         &cfile->llist->locks);
1749                                         rc = stored_rc;
1750                                 } else
1751                                         /*
1752                                          * The unlock range request succeed -
1753                                          * free the tmp list.
1754                                          */
1755                                         cifs_free_llist(&tmp_llist);
1756                                 cur = buf;
1757                                 num = 0;
1758                         } else
1759                                 cur++;
1760                 }
1761                 if (num) {
1762                         stored_rc = cifs_lockv(xid, tcon, cfile->fid.netfid,
1763                                                types[i], num, 0, buf);
1764                         if (stored_rc) {
1765                                 cifs_move_llist(&tmp_llist,
1766                                                 &cfile->llist->locks);
1767                                 rc = stored_rc;
1768                         } else
1769                                 cifs_free_llist(&tmp_llist);
1770                 }
1771         }
1772
1773         up_write(&cinode->lock_sem);
1774         kfree(buf);
1775         return rc;
1776 }
1777 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
1778
1779 static int
1780 cifs_setlk(struct file *file, struct file_lock *flock, __u32 type,
1781            bool wait_flag, bool posix_lck, int lock, int unlock,
1782            unsigned int xid)
1783 {
1784         int rc = 0;
1785         __u64 length = cifs_flock_len(flock);
1786         struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
1787         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1788         struct TCP_Server_Info *server = tcon->ses->server;
1789         struct inode *inode = d_inode(cfile->dentry);
1790
1791 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
1792         if (posix_lck) {
1793                 int posix_lock_type;
1794
1795                 rc = cifs_posix_lock_set(file, flock);
1796                 if (rc <= FILE_LOCK_DEFERRED)
1797                         return rc;
1798
1799                 if (type & server->vals->shared_lock_type)
1800                         posix_lock_type = CIFS_RDLCK;
1801                 else
1802                         posix_lock_type = CIFS_WRLCK;
1803
1804                 if (unlock == 1)
1805                         posix_lock_type = CIFS_UNLCK;
1806
1807                 rc = CIFSSMBPosixLock(xid, tcon, cfile->fid.netfid,
1808                                       hash_lockowner(flock->fl_owner),
1809                                       flock->fl_start, length,
1810                                       NULL, posix_lock_type, wait_flag);
1811                 goto out;
1812         }
1813 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
1814         if (lock) {
1815                 struct cifsLockInfo *lock;
1816
1817                 lock = cifs_lock_init(flock->fl_start, length, type,
1818                                       flock->fl_flags);
1819                 if (!lock)
1820                         return -ENOMEM;
1821
1822                 rc = cifs_lock_add_if(cfile, lock, wait_flag);
1823                 if (rc < 0) {
1824                         kfree(lock);
1825                         return rc;
1826                 }
1827                 if (!rc)
1828                         goto out;
1829
1830                 /*
1831                  * Windows 7 server can delay breaking lease from read to None
1832                  * if we set a byte-range lock on a file - break it explicitly
1833                  * before sending the lock to the server to be sure the next
1834                  * read won't conflict with non-overlapted locks due to
1835                  * pagereading.
1836                  */
1837                 if (!CIFS_CACHE_WRITE(CIFS_I(inode)) &&
1838                                         CIFS_CACHE_READ(CIFS_I(inode))) {
1839                         cifs_zap_mapping(inode);
1840                         cifs_dbg(FYI, "Set no oplock for inode=%p due to mand locks\n",
1841                                  inode);
1842                         CIFS_I(inode)->oplock = 0;
1843                 }
1844
1845                 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1846                                             type, 1, 0, wait_flag);
1847                 if (rc) {
1848                         kfree(lock);
1849                         return rc;
1850                 }
1851
1852                 cifs_lock_add(cfile, lock);
1853         } else if (unlock)
1854                 rc = server->ops->mand_unlock_range(cfile, flock, xid);
1855
1856 out:
1857         if ((flock->fl_flags & FL_POSIX) || (flock->fl_flags & FL_FLOCK)) {
1858                 /*
1859                  * If this is a request to remove all locks because we
1860                  * are closing the file, it doesn't matter if the
1861                  * unlocking failed as both cifs.ko and the SMB server
1862                  * remove the lock on file close
1863                  */
1864                 if (rc) {
1865                         cifs_dbg(VFS, "%s failed rc=%d\n", __func__, rc);
1866                         if (!(flock->fl_flags & FL_CLOSE))
1867                                 return rc;
1868                 }
1869                 rc = locks_lock_file_wait(file, flock);
1870         }
1871         return rc;
1872 }
1873
1874 int cifs_flock(struct file *file, int cmd, struct file_lock *fl)
1875 {
1876         int rc, xid;
1877         int lock = 0, unlock = 0;
1878         bool wait_flag = false;
1879         bool posix_lck = false;
1880         struct cifs_sb_info *cifs_sb;
1881         struct cifs_tcon *tcon;
1882         struct cifsFileInfo *cfile;
1883         __u32 type;
1884
1885         rc = -EACCES;
1886         xid = get_xid();
1887
1888         if (!(fl->fl_flags & FL_FLOCK))
1889                 return -ENOLCK;
1890
1891         cfile = (struct cifsFileInfo *)file->private_data;
1892         tcon = tlink_tcon(cfile->tlink);
1893
1894         cifs_read_flock(fl, &type, &lock, &unlock, &wait_flag,
1895                         tcon->ses->server);
1896         cifs_sb = CIFS_FILE_SB(file);
1897
1898         if (cap_unix(tcon->ses) &&
1899             (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
1900             ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
1901                 posix_lck = true;
1902
1903         if (!lock && !unlock) {
1904                 /*
1905                  * if no lock or unlock then nothing to do since we do not
1906                  * know what it is
1907                  */
1908                 free_xid(xid);
1909                 return -EOPNOTSUPP;
1910         }
1911
1912         rc = cifs_setlk(file, fl, type, wait_flag, posix_lck, lock, unlock,
1913                         xid);
1914         free_xid(xid);
1915         return rc;
1916
1917
1918 }
1919
1920 int cifs_lock(struct file *file, int cmd, struct file_lock *flock)
1921 {
1922         int rc, xid;
1923         int lock = 0, unlock = 0;
1924         bool wait_flag = false;
1925         bool posix_lck = false;
1926         struct cifs_sb_info *cifs_sb;
1927         struct cifs_tcon *tcon;
1928         struct cifsFileInfo *cfile;
1929         __u32 type;
1930
1931         rc = -EACCES;
1932         xid = get_xid();
1933
1934         cifs_dbg(FYI, "%s: %pD2 cmd=0x%x type=0x%x flags=0x%x r=%lld:%lld\n", __func__, file, cmd,
1935                  flock->fl_flags, flock->fl_type, (long long)flock->fl_start,
1936                  (long long)flock->fl_end);
1937
1938         cfile = (struct cifsFileInfo *)file->private_data;
1939         tcon = tlink_tcon(cfile->tlink);
1940
1941         cifs_read_flock(flock, &type, &lock, &unlock, &wait_flag,
1942                         tcon->ses->server);
1943         cifs_sb = CIFS_FILE_SB(file);
1944         set_bit(CIFS_INO_CLOSE_ON_LOCK, &CIFS_I(d_inode(cfile->dentry))->flags);
1945
1946         if (cap_unix(tcon->ses) &&
1947             (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
1948             ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
1949                 posix_lck = true;
1950         /*
1951          * BB add code here to normalize offset and length to account for
1952          * negative length which we can not accept over the wire.
1953          */
1954         if (IS_GETLK(cmd)) {
1955                 rc = cifs_getlk(file, flock, type, wait_flag, posix_lck, xid);
1956                 free_xid(xid);
1957                 return rc;
1958         }
1959
1960         if (!lock && !unlock) {
1961                 /*
1962                  * if no lock or unlock then nothing to do since we do not
1963                  * know what it is
1964                  */
1965                 free_xid(xid);
1966                 return -EOPNOTSUPP;
1967         }
1968
1969         rc = cifs_setlk(file, flock, type, wait_flag, posix_lck, lock, unlock,
1970                         xid);
1971         free_xid(xid);
1972         return rc;
1973 }
1974
1975 /*
1976  * update the file size (if needed) after a write. Should be called with
1977  * the inode->i_lock held
1978  */
1979 void
1980 cifs_update_eof(struct cifsInodeInfo *cifsi, loff_t offset,
1981                       unsigned int bytes_written)
1982 {
1983         loff_t end_of_write = offset + bytes_written;
1984
1985         if (end_of_write > cifsi->server_eof)
1986                 cifsi->server_eof = end_of_write;
1987 }
1988
1989 static ssize_t
1990 cifs_write(struct cifsFileInfo *open_file, __u32 pid, const char *write_data,
1991            size_t write_size, loff_t *offset)
1992 {
1993         int rc = 0;
1994         unsigned int bytes_written = 0;
1995         unsigned int total_written;
1996         struct cifs_tcon *tcon;
1997         struct TCP_Server_Info *server;
1998         unsigned int xid;
1999         struct dentry *dentry = open_file->dentry;
2000         struct cifsInodeInfo *cifsi = CIFS_I(d_inode(dentry));
2001         struct cifs_io_parms io_parms = {0};
2002
2003         cifs_dbg(FYI, "write %zd bytes to offset %lld of %pd\n",
2004                  write_size, *offset, dentry);
2005
2006         tcon = tlink_tcon(open_file->tlink);
2007         server = tcon->ses->server;
2008
2009         if (!server->ops->sync_write)
2010                 return -ENOSYS;
2011
2012         xid = get_xid();
2013
2014         for (total_written = 0; write_size > total_written;
2015              total_written += bytes_written) {
2016                 rc = -EAGAIN;
2017                 while (rc == -EAGAIN) {
2018                         struct kvec iov[2];
2019                         unsigned int len;
2020
2021                         if (open_file->invalidHandle) {
2022                                 /* we could deadlock if we called
2023                                    filemap_fdatawait from here so tell
2024                                    reopen_file not to flush data to
2025                                    server now */
2026                                 rc = cifs_reopen_file(open_file, false);
2027                                 if (rc != 0)
2028                                         break;
2029                         }
2030
2031                         len = min(server->ops->wp_retry_size(d_inode(dentry)),
2032                                   (unsigned int)write_size - total_written);
2033                         /* iov[0] is reserved for smb header */
2034                         iov[1].iov_base = (char *)write_data + total_written;
2035                         iov[1].iov_len = len;
2036                         io_parms.pid = pid;
2037                         io_parms.tcon = tcon;
2038                         io_parms.offset = *offset;
2039                         io_parms.length = len;
2040                         rc = server->ops->sync_write(xid, &open_file->fid,
2041                                         &io_parms, &bytes_written, iov, 1);
2042                 }
2043                 if (rc || (bytes_written == 0)) {
2044                         if (total_written)
2045                                 break;
2046                         else {
2047                                 free_xid(xid);
2048                                 return rc;
2049                         }
2050                 } else {
2051                         spin_lock(&d_inode(dentry)->i_lock);
2052                         cifs_update_eof(cifsi, *offset, bytes_written);
2053                         spin_unlock(&d_inode(dentry)->i_lock);
2054                         *offset += bytes_written;
2055                 }
2056         }
2057
2058         cifs_stats_bytes_written(tcon, total_written);
2059
2060         if (total_written > 0) {
2061                 spin_lock(&d_inode(dentry)->i_lock);
2062                 if (*offset > d_inode(dentry)->i_size) {
2063                         i_size_write(d_inode(dentry), *offset);
2064                         d_inode(dentry)->i_blocks = (512 - 1 + *offset) >> 9;
2065                 }
2066                 spin_unlock(&d_inode(dentry)->i_lock);
2067         }
2068         mark_inode_dirty_sync(d_inode(dentry));
2069         free_xid(xid);
2070         return total_written;
2071 }
2072
2073 struct cifsFileInfo *find_readable_file(struct cifsInodeInfo *cifs_inode,
2074                                         bool fsuid_only)
2075 {
2076         struct cifsFileInfo *open_file = NULL;
2077         struct cifs_sb_info *cifs_sb = CIFS_SB(cifs_inode->netfs.inode.i_sb);
2078
2079         /* only filter by fsuid on multiuser mounts */
2080         if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
2081                 fsuid_only = false;
2082
2083         spin_lock(&cifs_inode->open_file_lock);
2084         /* we could simply get the first_list_entry since write-only entries
2085            are always at the end of the list but since the first entry might
2086            have a close pending, we go through the whole list */
2087         list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
2088                 if (fsuid_only && !uid_eq(open_file->uid, current_fsuid()))
2089                         continue;
2090                 if (OPEN_FMODE(open_file->f_flags) & FMODE_READ) {
2091                         if ((!open_file->invalidHandle)) {
2092                                 /* found a good file */
2093                                 /* lock it so it will not be closed on us */
2094                                 cifsFileInfo_get(open_file);
2095                                 spin_unlock(&cifs_inode->open_file_lock);
2096                                 return open_file;
2097                         } /* else might as well continue, and look for
2098                              another, or simply have the caller reopen it
2099                              again rather than trying to fix this handle */
2100                 } else /* write only file */
2101                         break; /* write only files are last so must be done */
2102         }
2103         spin_unlock(&cifs_inode->open_file_lock);
2104         return NULL;
2105 }
2106
2107 /* Return -EBADF if no handle is found and general rc otherwise */
2108 int
2109 cifs_get_writable_file(struct cifsInodeInfo *cifs_inode, int flags,
2110                        struct cifsFileInfo **ret_file)
2111 {
2112         struct cifsFileInfo *open_file, *inv_file = NULL;
2113         struct cifs_sb_info *cifs_sb;
2114         bool any_available = false;
2115         int rc = -EBADF;
2116         unsigned int refind = 0;
2117         bool fsuid_only = flags & FIND_WR_FSUID_ONLY;
2118         bool with_delete = flags & FIND_WR_WITH_DELETE;
2119         *ret_file = NULL;
2120
2121         /*
2122          * Having a null inode here (because mapping->host was set to zero by
2123          * the VFS or MM) should not happen but we had reports of on oops (due
2124          * to it being zero) during stress testcases so we need to check for it
2125          */
2126
2127         if (cifs_inode == NULL) {
2128                 cifs_dbg(VFS, "Null inode passed to cifs_writeable_file\n");
2129                 dump_stack();
2130                 return rc;
2131         }
2132
2133         cifs_sb = CIFS_SB(cifs_inode->netfs.inode.i_sb);
2134
2135         /* only filter by fsuid on multiuser mounts */
2136         if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
2137                 fsuid_only = false;
2138
2139         spin_lock(&cifs_inode->open_file_lock);
2140 refind_writable:
2141         if (refind > MAX_REOPEN_ATT) {
2142                 spin_unlock(&cifs_inode->open_file_lock);
2143                 return rc;
2144         }
2145         list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
2146                 if (!any_available && open_file->pid != current->tgid)
2147                         continue;
2148                 if (fsuid_only && !uid_eq(open_file->uid, current_fsuid()))
2149                         continue;
2150                 if (with_delete && !(open_file->fid.access & DELETE))
2151                         continue;
2152                 if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) {
2153                         if (!open_file->invalidHandle) {
2154                                 /* found a good writable file */
2155                                 cifsFileInfo_get(open_file);
2156                                 spin_unlock(&cifs_inode->open_file_lock);
2157                                 *ret_file = open_file;
2158                                 return 0;
2159                         } else {
2160                                 if (!inv_file)
2161                                         inv_file = open_file;
2162                         }
2163                 }
2164         }
2165         /* couldn't find useable FH with same pid, try any available */
2166         if (!any_available) {
2167                 any_available = true;
2168                 goto refind_writable;
2169         }
2170
2171         if (inv_file) {
2172                 any_available = false;
2173                 cifsFileInfo_get(inv_file);
2174         }
2175
2176         spin_unlock(&cifs_inode->open_file_lock);
2177
2178         if (inv_file) {
2179                 rc = cifs_reopen_file(inv_file, false);
2180                 if (!rc) {
2181                         *ret_file = inv_file;
2182                         return 0;
2183                 }
2184
2185                 spin_lock(&cifs_inode->open_file_lock);
2186                 list_move_tail(&inv_file->flist, &cifs_inode->openFileList);
2187                 spin_unlock(&cifs_inode->open_file_lock);
2188                 cifsFileInfo_put(inv_file);
2189                 ++refind;
2190                 inv_file = NULL;
2191                 spin_lock(&cifs_inode->open_file_lock);
2192                 goto refind_writable;
2193         }
2194
2195         return rc;
2196 }
2197
2198 struct cifsFileInfo *
2199 find_writable_file(struct cifsInodeInfo *cifs_inode, int flags)
2200 {
2201         struct cifsFileInfo *cfile;
2202         int rc;
2203
2204         rc = cifs_get_writable_file(cifs_inode, flags, &cfile);
2205         if (rc)
2206                 cifs_dbg(FYI, "Couldn't find writable handle rc=%d\n", rc);
2207
2208         return cfile;
2209 }
2210
2211 int
2212 cifs_get_writable_path(struct cifs_tcon *tcon, const char *name,
2213                        int flags,
2214                        struct cifsFileInfo **ret_file)
2215 {
2216         struct cifsFileInfo *cfile;
2217         void *page = alloc_dentry_path();
2218
2219         *ret_file = NULL;
2220
2221         spin_lock(&tcon->open_file_lock);
2222         list_for_each_entry(cfile, &tcon->openFileList, tlist) {
2223                 struct cifsInodeInfo *cinode;
2224                 const char *full_path = build_path_from_dentry(cfile->dentry, page);
2225                 if (IS_ERR(full_path)) {
2226                         spin_unlock(&tcon->open_file_lock);
2227                         free_dentry_path(page);
2228                         return PTR_ERR(full_path);
2229                 }
2230                 if (strcmp(full_path, name))
2231                         continue;
2232
2233                 cinode = CIFS_I(d_inode(cfile->dentry));
2234                 spin_unlock(&tcon->open_file_lock);
2235                 free_dentry_path(page);
2236                 return cifs_get_writable_file(cinode, flags, ret_file);
2237         }
2238
2239         spin_unlock(&tcon->open_file_lock);
2240         free_dentry_path(page);
2241         return -ENOENT;
2242 }
2243
2244 int
2245 cifs_get_readable_path(struct cifs_tcon *tcon, const char *name,
2246                        struct cifsFileInfo **ret_file)
2247 {
2248         struct cifsFileInfo *cfile;
2249         void *page = alloc_dentry_path();
2250
2251         *ret_file = NULL;
2252
2253         spin_lock(&tcon->open_file_lock);
2254         list_for_each_entry(cfile, &tcon->openFileList, tlist) {
2255                 struct cifsInodeInfo *cinode;
2256                 const char *full_path = build_path_from_dentry(cfile->dentry, page);
2257                 if (IS_ERR(full_path)) {
2258                         spin_unlock(&tcon->open_file_lock);
2259                         free_dentry_path(page);
2260                         return PTR_ERR(full_path);
2261                 }
2262                 if (strcmp(full_path, name))
2263                         continue;
2264
2265                 cinode = CIFS_I(d_inode(cfile->dentry));
2266                 spin_unlock(&tcon->open_file_lock);
2267                 free_dentry_path(page);
2268                 *ret_file = find_readable_file(cinode, 0);
2269                 return *ret_file ? 0 : -ENOENT;
2270         }
2271
2272         spin_unlock(&tcon->open_file_lock);
2273         free_dentry_path(page);
2274         return -ENOENT;
2275 }
2276
2277 void
2278 cifs_writedata_release(struct kref *refcount)
2279 {
2280         struct cifs_writedata *wdata = container_of(refcount,
2281                                         struct cifs_writedata, refcount);
2282 #ifdef CONFIG_CIFS_SMB_DIRECT
2283         if (wdata->mr) {
2284                 smbd_deregister_mr(wdata->mr);
2285                 wdata->mr = NULL;
2286         }
2287 #endif
2288
2289         if (wdata->cfile)
2290                 cifsFileInfo_put(wdata->cfile);
2291
2292         kvfree(wdata->pages);
2293         kfree(wdata);
2294 }
2295
2296 /*
2297  * Write failed with a retryable error. Resend the write request. It's also
2298  * possible that the page was redirtied so re-clean the page.
2299  */
2300 static void
2301 cifs_writev_requeue(struct cifs_writedata *wdata)
2302 {
2303         int i, rc = 0;
2304         struct inode *inode = d_inode(wdata->cfile->dentry);
2305         struct TCP_Server_Info *server;
2306         unsigned int rest_len;
2307
2308         server = tlink_tcon(wdata->cfile->tlink)->ses->server;
2309         i = 0;
2310         rest_len = wdata->bytes;
2311         do {
2312                 struct cifs_writedata *wdata2;
2313                 unsigned int j, nr_pages, wsize, tailsz, cur_len;
2314
2315                 wsize = server->ops->wp_retry_size(inode);
2316                 if (wsize < rest_len) {
2317                         nr_pages = wsize / PAGE_SIZE;
2318                         if (!nr_pages) {
2319                                 rc = -EOPNOTSUPP;
2320                                 break;
2321                         }
2322                         cur_len = nr_pages * PAGE_SIZE;
2323                         tailsz = PAGE_SIZE;
2324                 } else {
2325                         nr_pages = DIV_ROUND_UP(rest_len, PAGE_SIZE);
2326                         cur_len = rest_len;
2327                         tailsz = rest_len - (nr_pages - 1) * PAGE_SIZE;
2328                 }
2329
2330                 wdata2 = cifs_writedata_alloc(nr_pages, cifs_writev_complete);
2331                 if (!wdata2) {
2332                         rc = -ENOMEM;
2333                         break;
2334                 }
2335
2336                 for (j = 0; j < nr_pages; j++) {
2337                         wdata2->pages[j] = wdata->pages[i + j];
2338                         lock_page(wdata2->pages[j]);
2339                         clear_page_dirty_for_io(wdata2->pages[j]);
2340                 }
2341
2342                 wdata2->sync_mode = wdata->sync_mode;
2343                 wdata2->nr_pages = nr_pages;
2344                 wdata2->offset = page_offset(wdata2->pages[0]);
2345                 wdata2->pagesz = PAGE_SIZE;
2346                 wdata2->tailsz = tailsz;
2347                 wdata2->bytes = cur_len;
2348
2349                 rc = cifs_get_writable_file(CIFS_I(inode), FIND_WR_ANY,
2350                                             &wdata2->cfile);
2351                 if (!wdata2->cfile) {
2352                         cifs_dbg(VFS, "No writable handle to retry writepages rc=%d\n",
2353                                  rc);
2354                         if (!is_retryable_error(rc))
2355                                 rc = -EBADF;
2356                 } else {
2357                         wdata2->pid = wdata2->cfile->pid;
2358                         rc = server->ops->async_writev(wdata2,
2359                                                        cifs_writedata_release);
2360                 }
2361
2362                 for (j = 0; j < nr_pages; j++) {
2363                         unlock_page(wdata2->pages[j]);
2364                         if (rc != 0 && !is_retryable_error(rc)) {
2365                                 SetPageError(wdata2->pages[j]);
2366                                 end_page_writeback(wdata2->pages[j]);
2367                                 put_page(wdata2->pages[j]);
2368                         }
2369                 }
2370
2371                 kref_put(&wdata2->refcount, cifs_writedata_release);
2372                 if (rc) {
2373                         if (is_retryable_error(rc))
2374                                 continue;
2375                         i += nr_pages;
2376                         break;
2377                 }
2378
2379                 rest_len -= cur_len;
2380                 i += nr_pages;
2381         } while (i < wdata->nr_pages);
2382
2383         /* cleanup remaining pages from the original wdata */
2384         for (; i < wdata->nr_pages; i++) {
2385                 SetPageError(wdata->pages[i]);
2386                 end_page_writeback(wdata->pages[i]);
2387                 put_page(wdata->pages[i]);
2388         }
2389
2390         if (rc != 0 && !is_retryable_error(rc))
2391                 mapping_set_error(inode->i_mapping, rc);
2392         kref_put(&wdata->refcount, cifs_writedata_release);
2393 }
2394
2395 void
2396 cifs_writev_complete(struct work_struct *work)
2397 {
2398         struct cifs_writedata *wdata = container_of(work,
2399                                                 struct cifs_writedata, work);
2400         struct inode *inode = d_inode(wdata->cfile->dentry);
2401         int i = 0;
2402
2403         if (wdata->result == 0) {
2404                 spin_lock(&inode->i_lock);
2405                 cifs_update_eof(CIFS_I(inode), wdata->offset, wdata->bytes);
2406                 spin_unlock(&inode->i_lock);
2407                 cifs_stats_bytes_written(tlink_tcon(wdata->cfile->tlink),
2408                                          wdata->bytes);
2409         } else if (wdata->sync_mode == WB_SYNC_ALL && wdata->result == -EAGAIN)
2410                 return cifs_writev_requeue(wdata);
2411
2412         for (i = 0; i < wdata->nr_pages; i++) {
2413                 struct page *page = wdata->pages[i];
2414
2415                 if (wdata->result == -EAGAIN)
2416                         __set_page_dirty_nobuffers(page);
2417                 else if (wdata->result < 0)
2418                         SetPageError(page);
2419                 end_page_writeback(page);
2420                 cifs_readpage_to_fscache(inode, page);
2421                 put_page(page);
2422         }
2423         if (wdata->result != -EAGAIN)
2424                 mapping_set_error(inode->i_mapping, wdata->result);
2425         kref_put(&wdata->refcount, cifs_writedata_release);
2426 }
2427
2428 struct cifs_writedata *
2429 cifs_writedata_alloc(unsigned int nr_pages, work_func_t complete)
2430 {
2431         struct page **pages =
2432                 kcalloc(nr_pages, sizeof(struct page *), GFP_NOFS);
2433         if (pages)
2434                 return cifs_writedata_direct_alloc(pages, complete);
2435
2436         return NULL;
2437 }
2438
2439 struct cifs_writedata *
2440 cifs_writedata_direct_alloc(struct page **pages, work_func_t complete)
2441 {
2442         struct cifs_writedata *wdata;
2443
2444         wdata = kzalloc(sizeof(*wdata), GFP_NOFS);
2445         if (wdata != NULL) {
2446                 wdata->pages = pages;
2447                 kref_init(&wdata->refcount);
2448                 INIT_LIST_HEAD(&wdata->list);
2449                 init_completion(&wdata->done);
2450                 INIT_WORK(&wdata->work, complete);
2451         }
2452         return wdata;
2453 }
2454
2455
2456 static int cifs_partialpagewrite(struct page *page, unsigned from, unsigned to)
2457 {
2458         struct address_space *mapping = page->mapping;
2459         loff_t offset = (loff_t)page->index << PAGE_SHIFT;
2460         char *write_data;
2461         int rc = -EFAULT;
2462         int bytes_written = 0;
2463         struct inode *inode;
2464         struct cifsFileInfo *open_file;
2465
2466         if (!mapping || !mapping->host)
2467                 return -EFAULT;
2468
2469         inode = page->mapping->host;
2470
2471         offset += (loff_t)from;
2472         write_data = kmap(page);
2473         write_data += from;
2474
2475         if ((to > PAGE_SIZE) || (from > to)) {
2476                 kunmap(page);
2477                 return -EIO;
2478         }
2479
2480         /* racing with truncate? */
2481         if (offset > mapping->host->i_size) {
2482                 kunmap(page);
2483                 return 0; /* don't care */
2484         }
2485
2486         /* check to make sure that we are not extending the file */
2487         if (mapping->host->i_size - offset < (loff_t)to)
2488                 to = (unsigned)(mapping->host->i_size - offset);
2489
2490         rc = cifs_get_writable_file(CIFS_I(mapping->host), FIND_WR_ANY,
2491                                     &open_file);
2492         if (!rc) {
2493                 bytes_written = cifs_write(open_file, open_file->pid,
2494                                            write_data, to - from, &offset);
2495                 cifsFileInfo_put(open_file);
2496                 /* Does mm or vfs already set times? */
2497                 inode->i_atime = inode->i_mtime = current_time(inode);
2498                 if ((bytes_written > 0) && (offset))
2499                         rc = 0;
2500                 else if (bytes_written < 0)
2501                         rc = bytes_written;
2502                 else
2503                         rc = -EFAULT;
2504         } else {
2505                 cifs_dbg(FYI, "No writable handle for write page rc=%d\n", rc);
2506                 if (!is_retryable_error(rc))
2507                         rc = -EIO;
2508         }
2509
2510         kunmap(page);
2511         return rc;
2512 }
2513
2514 static struct cifs_writedata *
2515 wdata_alloc_and_fillpages(pgoff_t tofind, struct address_space *mapping,
2516                           pgoff_t end, pgoff_t *index,
2517                           unsigned int *found_pages)
2518 {
2519         struct cifs_writedata *wdata;
2520
2521         wdata = cifs_writedata_alloc((unsigned int)tofind,
2522                                      cifs_writev_complete);
2523         if (!wdata)
2524                 return NULL;
2525
2526         *found_pages = find_get_pages_range_tag(mapping, index, end,
2527                                 PAGECACHE_TAG_DIRTY, tofind, wdata->pages);
2528         return wdata;
2529 }
2530
2531 static unsigned int
2532 wdata_prepare_pages(struct cifs_writedata *wdata, unsigned int found_pages,
2533                     struct address_space *mapping,
2534                     struct writeback_control *wbc,
2535                     pgoff_t end, pgoff_t *index, pgoff_t *next, bool *done)
2536 {
2537         unsigned int nr_pages = 0, i;
2538         struct page *page;
2539
2540         for (i = 0; i < found_pages; i++) {
2541                 page = wdata->pages[i];
2542                 /*
2543                  * At this point we hold neither the i_pages lock nor the
2544                  * page lock: the page may be truncated or invalidated
2545                  * (changing page->mapping to NULL), or even swizzled
2546                  * back from swapper_space to tmpfs file mapping
2547                  */
2548
2549                 if (nr_pages == 0)
2550                         lock_page(page);
2551                 else if (!trylock_page(page))
2552                         break;
2553
2554                 if (unlikely(page->mapping != mapping)) {
2555                         unlock_page(page);
2556                         break;
2557                 }
2558
2559                 if (!wbc->range_cyclic && page->index > end) {
2560                         *done = true;
2561                         unlock_page(page);
2562                         break;
2563                 }
2564
2565                 if (*next && (page->index != *next)) {
2566                         /* Not next consecutive page */
2567                         unlock_page(page);
2568                         break;
2569                 }
2570
2571                 if (wbc->sync_mode != WB_SYNC_NONE)
2572                         wait_on_page_writeback(page);
2573
2574                 if (PageWriteback(page) ||
2575                                 !clear_page_dirty_for_io(page)) {
2576                         unlock_page(page);
2577                         break;
2578                 }
2579
2580                 /*
2581                  * This actually clears the dirty bit in the radix tree.
2582                  * See cifs_writepage() for more commentary.
2583                  */
2584                 set_page_writeback(page);
2585                 if (page_offset(page) >= i_size_read(mapping->host)) {
2586                         *done = true;
2587                         unlock_page(page);
2588                         end_page_writeback(page);
2589                         break;
2590                 }
2591
2592                 wdata->pages[i] = page;
2593                 *next = page->index + 1;
2594                 ++nr_pages;
2595         }
2596
2597         /* reset index to refind any pages skipped */
2598         if (nr_pages == 0)
2599                 *index = wdata->pages[0]->index + 1;
2600
2601         /* put any pages we aren't going to use */
2602         for (i = nr_pages; i < found_pages; i++) {
2603                 put_page(wdata->pages[i]);
2604                 wdata->pages[i] = NULL;
2605         }
2606
2607         return nr_pages;
2608 }
2609
2610 static int
2611 wdata_send_pages(struct cifs_writedata *wdata, unsigned int nr_pages,
2612                  struct address_space *mapping, struct writeback_control *wbc)
2613 {
2614         int rc;
2615
2616         wdata->sync_mode = wbc->sync_mode;
2617         wdata->nr_pages = nr_pages;
2618         wdata->offset = page_offset(wdata->pages[0]);
2619         wdata->pagesz = PAGE_SIZE;
2620         wdata->tailsz = min(i_size_read(mapping->host) -
2621                         page_offset(wdata->pages[nr_pages - 1]),
2622                         (loff_t)PAGE_SIZE);
2623         wdata->bytes = ((nr_pages - 1) * PAGE_SIZE) + wdata->tailsz;
2624         wdata->pid = wdata->cfile->pid;
2625
2626         rc = adjust_credits(wdata->server, &wdata->credits, wdata->bytes);
2627         if (rc)
2628                 return rc;
2629
2630         if (wdata->cfile->invalidHandle)
2631                 rc = -EAGAIN;
2632         else
2633                 rc = wdata->server->ops->async_writev(wdata,
2634                                                       cifs_writedata_release);
2635
2636         return rc;
2637 }
2638
2639 static int cifs_writepages(struct address_space *mapping,
2640                            struct writeback_control *wbc)
2641 {
2642         struct inode *inode = mapping->host;
2643         struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
2644         struct TCP_Server_Info *server;
2645         bool done = false, scanned = false, range_whole = false;
2646         pgoff_t end, index;
2647         struct cifs_writedata *wdata;
2648         struct cifsFileInfo *cfile = NULL;
2649         int rc = 0;
2650         int saved_rc = 0;
2651         unsigned int xid;
2652
2653         /*
2654          * If wsize is smaller than the page cache size, default to writing
2655          * one page at a time via cifs_writepage
2656          */
2657         if (cifs_sb->ctx->wsize < PAGE_SIZE)
2658                 return generic_writepages(mapping, wbc);
2659
2660         xid = get_xid();
2661         if (wbc->range_cyclic) {
2662                 index = mapping->writeback_index; /* Start from prev offset */
2663                 end = -1;
2664         } else {
2665                 index = wbc->range_start >> PAGE_SHIFT;
2666                 end = wbc->range_end >> PAGE_SHIFT;
2667                 if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
2668                         range_whole = true;
2669                 scanned = true;
2670         }
2671         server = cifs_pick_channel(cifs_sb_master_tcon(cifs_sb)->ses);
2672
2673 retry:
2674         while (!done && index <= end) {
2675                 unsigned int i, nr_pages, found_pages, wsize;
2676                 pgoff_t next = 0, tofind, saved_index = index;
2677                 struct cifs_credits credits_on_stack;
2678                 struct cifs_credits *credits = &credits_on_stack;
2679                 int get_file_rc = 0;
2680
2681                 if (cfile)
2682                         cifsFileInfo_put(cfile);
2683
2684                 rc = cifs_get_writable_file(CIFS_I(inode), FIND_WR_ANY, &cfile);
2685
2686                 /* in case of an error store it to return later */
2687                 if (rc)
2688                         get_file_rc = rc;
2689
2690                 rc = server->ops->wait_mtu_credits(server, cifs_sb->ctx->wsize,
2691                                                    &wsize, credits);
2692                 if (rc != 0) {
2693                         done = true;
2694                         break;
2695                 }
2696
2697                 tofind = min((wsize / PAGE_SIZE) - 1, end - index) + 1;
2698
2699                 wdata = wdata_alloc_and_fillpages(tofind, mapping, end, &index,
2700                                                   &found_pages);
2701                 if (!wdata) {
2702                         rc = -ENOMEM;
2703                         done = true;
2704                         add_credits_and_wake_if(server, credits, 0);
2705                         break;
2706                 }
2707
2708                 if (found_pages == 0) {
2709                         kref_put(&wdata->refcount, cifs_writedata_release);
2710                         add_credits_and_wake_if(server, credits, 0);
2711                         break;
2712                 }
2713
2714                 nr_pages = wdata_prepare_pages(wdata, found_pages, mapping, wbc,
2715                                                end, &index, &next, &done);
2716
2717                 /* nothing to write? */
2718                 if (nr_pages == 0) {
2719                         kref_put(&wdata->refcount, cifs_writedata_release);
2720                         add_credits_and_wake_if(server, credits, 0);
2721                         continue;
2722                 }
2723
2724                 wdata->credits = credits_on_stack;
2725                 wdata->cfile = cfile;
2726                 wdata->server = server;
2727                 cfile = NULL;
2728
2729                 if (!wdata->cfile) {
2730                         cifs_dbg(VFS, "No writable handle in writepages rc=%d\n",
2731                                  get_file_rc);
2732                         if (is_retryable_error(get_file_rc))
2733                                 rc = get_file_rc;
2734                         else
2735                                 rc = -EBADF;
2736                 } else
2737                         rc = wdata_send_pages(wdata, nr_pages, mapping, wbc);
2738
2739                 for (i = 0; i < nr_pages; ++i)
2740                         unlock_page(wdata->pages[i]);
2741
2742                 /* send failure -- clean up the mess */
2743                 if (rc != 0) {
2744                         add_credits_and_wake_if(server, &wdata->credits, 0);
2745                         for (i = 0; i < nr_pages; ++i) {
2746                                 if (is_retryable_error(rc))
2747                                         redirty_page_for_writepage(wbc,
2748                                                            wdata->pages[i]);
2749                                 else
2750                                         SetPageError(wdata->pages[i]);
2751                                 end_page_writeback(wdata->pages[i]);
2752                                 put_page(wdata->pages[i]);
2753                         }
2754                         if (!is_retryable_error(rc))
2755                                 mapping_set_error(mapping, rc);
2756                 }
2757                 kref_put(&wdata->refcount, cifs_writedata_release);
2758
2759                 if (wbc->sync_mode == WB_SYNC_ALL && rc == -EAGAIN) {
2760                         index = saved_index;
2761                         continue;
2762                 }
2763
2764                 /* Return immediately if we received a signal during writing */
2765                 if (is_interrupt_error(rc)) {
2766                         done = true;
2767                         break;
2768                 }
2769
2770                 if (rc != 0 && saved_rc == 0)
2771                         saved_rc = rc;
2772
2773                 wbc->nr_to_write -= nr_pages;
2774                 if (wbc->nr_to_write <= 0)
2775                         done = true;
2776
2777                 index = next;
2778         }
2779
2780         if (!scanned && !done) {
2781                 /*
2782                  * We hit the last page and there is more work to be done: wrap
2783                  * back to the start of the file
2784                  */
2785                 scanned = true;
2786                 index = 0;
2787                 goto retry;
2788         }
2789
2790         if (saved_rc != 0)
2791                 rc = saved_rc;
2792
2793         if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0))
2794                 mapping->writeback_index = index;
2795
2796         if (cfile)
2797                 cifsFileInfo_put(cfile);
2798         free_xid(xid);
2799         /* Indication to update ctime and mtime as close is deferred */
2800         set_bit(CIFS_INO_MODIFIED_ATTR, &CIFS_I(inode)->flags);
2801         return rc;
2802 }
2803
2804 static int
2805 cifs_writepage_locked(struct page *page, struct writeback_control *wbc)
2806 {
2807         int rc;
2808         unsigned int xid;
2809
2810         xid = get_xid();
2811 /* BB add check for wbc flags */
2812         get_page(page);
2813         if (!PageUptodate(page))
2814                 cifs_dbg(FYI, "ppw - page not up to date\n");
2815
2816         /*
2817          * Set the "writeback" flag, and clear "dirty" in the radix tree.
2818          *
2819          * A writepage() implementation always needs to do either this,
2820          * or re-dirty the page with "redirty_page_for_writepage()" in
2821          * the case of a failure.
2822          *
2823          * Just unlocking the page will cause the radix tree tag-bits
2824          * to fail to update with the state of the page correctly.
2825          */
2826         set_page_writeback(page);
2827 retry_write:
2828         rc = cifs_partialpagewrite(page, 0, PAGE_SIZE);
2829         if (is_retryable_error(rc)) {
2830                 if (wbc->sync_mode == WB_SYNC_ALL && rc == -EAGAIN)
2831                         goto retry_write;
2832                 redirty_page_for_writepage(wbc, page);
2833         } else if (rc != 0) {
2834                 SetPageError(page);
2835                 mapping_set_error(page->mapping, rc);
2836         } else {
2837                 SetPageUptodate(page);
2838         }
2839         end_page_writeback(page);
2840         put_page(page);
2841         free_xid(xid);
2842         return rc;
2843 }
2844
2845 static int cifs_writepage(struct page *page, struct writeback_control *wbc)
2846 {
2847         int rc = cifs_writepage_locked(page, wbc);
2848         unlock_page(page);
2849         return rc;
2850 }
2851
2852 static int cifs_write_end(struct file *file, struct address_space *mapping,
2853                         loff_t pos, unsigned len, unsigned copied,
2854                         struct page *page, void *fsdata)
2855 {
2856         int rc;
2857         struct inode *inode = mapping->host;
2858         struct cifsFileInfo *cfile = file->private_data;
2859         struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
2860         __u32 pid;
2861
2862         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
2863                 pid = cfile->pid;
2864         else
2865                 pid = current->tgid;
2866
2867         cifs_dbg(FYI, "write_end for page %p from pos %lld with %d bytes\n",
2868                  page, pos, copied);
2869
2870         if (PageChecked(page)) {
2871                 if (copied == len)
2872                         SetPageUptodate(page);
2873                 ClearPageChecked(page);
2874         } else if (!PageUptodate(page) && copied == PAGE_SIZE)
2875                 SetPageUptodate(page);
2876
2877         if (!PageUptodate(page)) {
2878                 char *page_data;
2879                 unsigned offset = pos & (PAGE_SIZE - 1);
2880                 unsigned int xid;
2881
2882                 xid = get_xid();
2883                 /* this is probably better than directly calling
2884                    partialpage_write since in this function the file handle is
2885                    known which we might as well leverage */
2886                 /* BB check if anything else missing out of ppw
2887                    such as updating last write time */
2888                 page_data = kmap(page);
2889                 rc = cifs_write(cfile, pid, page_data + offset, copied, &pos);
2890                 /* if (rc < 0) should we set writebehind rc? */
2891                 kunmap(page);
2892
2893                 free_xid(xid);
2894         } else {
2895                 rc = copied;
2896                 pos += copied;
2897                 set_page_dirty(page);
2898         }
2899
2900         if (rc > 0) {
2901                 spin_lock(&inode->i_lock);
2902                 if (pos > inode->i_size) {
2903                         i_size_write(inode, pos);
2904                         inode->i_blocks = (512 - 1 + pos) >> 9;
2905                 }
2906                 spin_unlock(&inode->i_lock);
2907         }
2908
2909         unlock_page(page);
2910         put_page(page);
2911         /* Indication to update ctime and mtime as close is deferred */
2912         set_bit(CIFS_INO_MODIFIED_ATTR, &CIFS_I(inode)->flags);
2913
2914         return rc;
2915 }
2916
2917 int cifs_strict_fsync(struct file *file, loff_t start, loff_t end,
2918                       int datasync)
2919 {
2920         unsigned int xid;
2921         int rc = 0;
2922         struct cifs_tcon *tcon;
2923         struct TCP_Server_Info *server;
2924         struct cifsFileInfo *smbfile = file->private_data;
2925         struct inode *inode = file_inode(file);
2926         struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
2927
2928         rc = file_write_and_wait_range(file, start, end);
2929         if (rc) {
2930                 trace_cifs_fsync_err(inode->i_ino, rc);
2931                 return rc;
2932         }
2933
2934         xid = get_xid();
2935
2936         cifs_dbg(FYI, "Sync file - name: %pD datasync: 0x%x\n",
2937                  file, datasync);
2938
2939         if (!CIFS_CACHE_READ(CIFS_I(inode))) {
2940                 rc = cifs_zap_mapping(inode);
2941                 if (rc) {
2942                         cifs_dbg(FYI, "rc: %d during invalidate phase\n", rc);
2943                         rc = 0; /* don't care about it in fsync */
2944                 }
2945         }
2946
2947         tcon = tlink_tcon(smbfile->tlink);
2948         if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) {
2949                 server = tcon->ses->server;
2950                 if (server->ops->flush == NULL) {
2951                         rc = -ENOSYS;
2952                         goto strict_fsync_exit;
2953                 }
2954
2955                 if ((OPEN_FMODE(smbfile->f_flags) & FMODE_WRITE) == 0) {
2956                         smbfile = find_writable_file(CIFS_I(inode), FIND_WR_ANY);
2957                         if (smbfile) {
2958                                 rc = server->ops->flush(xid, tcon, &smbfile->fid);
2959                                 cifsFileInfo_put(smbfile);
2960                         } else
2961                                 cifs_dbg(FYI, "ignore fsync for file not open for write\n");
2962                 } else
2963                         rc = server->ops->flush(xid, tcon, &smbfile->fid);
2964         }
2965
2966 strict_fsync_exit:
2967         free_xid(xid);
2968         return rc;
2969 }
2970
2971 int cifs_fsync(struct file *file, loff_t start, loff_t end, int datasync)
2972 {
2973         unsigned int xid;
2974         int rc = 0;
2975         struct cifs_tcon *tcon;
2976         struct TCP_Server_Info *server;
2977         struct cifsFileInfo *smbfile = file->private_data;
2978         struct inode *inode = file_inode(file);
2979         struct cifs_sb_info *cifs_sb = CIFS_FILE_SB(file);
2980
2981         rc = file_write_and_wait_range(file, start, end);
2982         if (rc) {
2983                 trace_cifs_fsync_err(file_inode(file)->i_ino, rc);
2984                 return rc;
2985         }
2986
2987         xid = get_xid();
2988
2989         cifs_dbg(FYI, "Sync file - name: %pD datasync: 0x%x\n",
2990                  file, datasync);
2991
2992         tcon = tlink_tcon(smbfile->tlink);
2993         if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) {
2994                 server = tcon->ses->server;
2995                 if (server->ops->flush == NULL) {
2996                         rc = -ENOSYS;
2997                         goto fsync_exit;
2998                 }
2999
3000                 if ((OPEN_FMODE(smbfile->f_flags) & FMODE_WRITE) == 0) {
3001                         smbfile = find_writable_file(CIFS_I(inode), FIND_WR_ANY);
3002                         if (smbfile) {
3003                                 rc = server->ops->flush(xid, tcon, &smbfile->fid);
3004                                 cifsFileInfo_put(smbfile);
3005                         } else
3006                                 cifs_dbg(FYI, "ignore fsync for file not open for write\n");
3007                 } else
3008                         rc = server->ops->flush(xid, tcon, &smbfile->fid);
3009         }
3010
3011 fsync_exit:
3012         free_xid(xid);
3013         return rc;
3014 }
3015
3016 /*
3017  * As file closes, flush all cached write data for this inode checking
3018  * for write behind errors.
3019  */
3020 int cifs_flush(struct file *file, fl_owner_t id)
3021 {
3022         struct inode *inode = file_inode(file);
3023         int rc = 0;
3024
3025         if (file->f_mode & FMODE_WRITE)
3026                 rc = filemap_write_and_wait(inode->i_mapping);
3027
3028         cifs_dbg(FYI, "Flush inode %p file %p rc %d\n", inode, file, rc);
3029         if (rc) {
3030                 /* get more nuanced writeback errors */
3031                 rc = filemap_check_wb_err(file->f_mapping, 0);
3032                 trace_cifs_flush_err(inode->i_ino, rc);
3033         }
3034         return rc;
3035 }
3036
3037 static int
3038 cifs_write_allocate_pages(struct page **pages, unsigned long num_pages)
3039 {
3040         int rc = 0;
3041         unsigned long i;
3042
3043         for (i = 0; i < num_pages; i++) {
3044                 pages[i] = alloc_page(GFP_KERNEL|__GFP_HIGHMEM);
3045                 if (!pages[i]) {
3046                         /*
3047                          * save number of pages we have already allocated and
3048                          * return with ENOMEM error
3049                          */
3050                         num_pages = i;
3051                         rc = -ENOMEM;
3052                         break;
3053                 }
3054         }
3055
3056         if (rc) {
3057                 for (i = 0; i < num_pages; i++)
3058                         put_page(pages[i]);
3059         }
3060         return rc;
3061 }
3062
3063 static inline
3064 size_t get_numpages(const size_t wsize, const size_t len, size_t *cur_len)
3065 {
3066         size_t num_pages;
3067         size_t clen;
3068
3069         clen = min_t(const size_t, len, wsize);
3070         num_pages = DIV_ROUND_UP(clen, PAGE_SIZE);
3071
3072         if (cur_len)
3073                 *cur_len = clen;
3074
3075         return num_pages;
3076 }
3077
3078 static void
3079 cifs_uncached_writedata_release(struct kref *refcount)
3080 {
3081         int i;
3082         struct cifs_writedata *wdata = container_of(refcount,
3083                                         struct cifs_writedata, refcount);
3084
3085         kref_put(&wdata->ctx->refcount, cifs_aio_ctx_release);
3086         for (i = 0; i < wdata->nr_pages; i++)
3087                 put_page(wdata->pages[i]);
3088         cifs_writedata_release(refcount);
3089 }
3090
3091 static void collect_uncached_write_data(struct cifs_aio_ctx *ctx);
3092
3093 static void
3094 cifs_uncached_writev_complete(struct work_struct *work)
3095 {
3096         struct cifs_writedata *wdata = container_of(work,
3097                                         struct cifs_writedata, work);
3098         struct inode *inode = d_inode(wdata->cfile->dentry);
3099         struct cifsInodeInfo *cifsi = CIFS_I(inode);
3100
3101         spin_lock(&inode->i_lock);
3102         cifs_update_eof(cifsi, wdata->offset, wdata->bytes);
3103         if (cifsi->server_eof > inode->i_size)
3104                 i_size_write(inode, cifsi->server_eof);
3105         spin_unlock(&inode->i_lock);
3106
3107         complete(&wdata->done);
3108         collect_uncached_write_data(wdata->ctx);
3109         /* the below call can possibly free the last ref to aio ctx */
3110         kref_put(&wdata->refcount, cifs_uncached_writedata_release);
3111 }
3112
3113 static int
3114 wdata_fill_from_iovec(struct cifs_writedata *wdata, struct iov_iter *from,
3115                       size_t *len, unsigned long *num_pages)
3116 {
3117         size_t save_len, copied, bytes, cur_len = *len;
3118         unsigned long i, nr_pages = *num_pages;
3119
3120         save_len = cur_len;
3121         for (i = 0; i < nr_pages; i++) {
3122                 bytes = min_t(const size_t, cur_len, PAGE_SIZE);
3123                 copied = copy_page_from_iter(wdata->pages[i], 0, bytes, from);
3124                 cur_len -= copied;
3125                 /*
3126                  * If we didn't copy as much as we expected, then that
3127                  * may mean we trod into an unmapped area. Stop copying
3128                  * at that point. On the next pass through the big
3129                  * loop, we'll likely end up getting a zero-length
3130                  * write and bailing out of it.
3131                  */
3132                 if (copied < bytes)
3133                         break;
3134         }
3135         cur_len = save_len - cur_len;
3136         *len = cur_len;
3137
3138         /*
3139          * If we have no data to send, then that probably means that
3140          * the copy above failed altogether. That's most likely because
3141          * the address in the iovec was bogus. Return -EFAULT and let
3142          * the caller free anything we allocated and bail out.
3143          */
3144         if (!cur_len)
3145                 return -EFAULT;
3146
3147         /*
3148          * i + 1 now represents the number of pages we actually used in
3149          * the copy phase above.
3150          */
3151         *num_pages = i + 1;
3152         return 0;
3153 }
3154
3155 static int
3156 cifs_resend_wdata(struct cifs_writedata *wdata, struct list_head *wdata_list,
3157         struct cifs_aio_ctx *ctx)
3158 {
3159         unsigned int wsize;
3160         struct cifs_credits credits;
3161         int rc;
3162         struct TCP_Server_Info *server = wdata->server;
3163
3164         do {
3165                 if (wdata->cfile->invalidHandle) {
3166                         rc = cifs_reopen_file(wdata->cfile, false);
3167                         if (rc == -EAGAIN)
3168                                 continue;
3169                         else if (rc)
3170                                 break;
3171                 }
3172
3173
3174                 /*
3175                  * Wait for credits to resend this wdata.
3176                  * Note: we are attempting to resend the whole wdata not in
3177                  * segments
3178                  */
3179                 do {
3180                         rc = server->ops->wait_mtu_credits(server, wdata->bytes,
3181                                                 &wsize, &credits);
3182                         if (rc)
3183                                 goto fail;
3184
3185                         if (wsize < wdata->bytes) {
3186                                 add_credits_and_wake_if(server, &credits, 0);
3187                                 msleep(1000);
3188                         }
3189                 } while (wsize < wdata->bytes);
3190                 wdata->credits = credits;
3191
3192                 rc = adjust_credits(server, &wdata->credits, wdata->bytes);
3193
3194                 if (!rc) {
3195                         if (wdata->cfile->invalidHandle)
3196                                 rc = -EAGAIN;
3197                         else {
3198 #ifdef CONFIG_CIFS_SMB_DIRECT
3199                                 if (wdata->mr) {
3200                                         wdata->mr->need_invalidate = true;
3201                                         smbd_deregister_mr(wdata->mr);
3202                                         wdata->mr = NULL;
3203                                 }
3204 #endif
3205                                 rc = server->ops->async_writev(wdata,
3206                                         cifs_uncached_writedata_release);
3207                         }
3208                 }
3209
3210                 /* If the write was successfully sent, we are done */
3211                 if (!rc) {
3212                         list_add_tail(&wdata->list, wdata_list);
3213                         return 0;
3214                 }
3215
3216                 /* Roll back credits and retry if needed */
3217                 add_credits_and_wake_if(server, &wdata->credits, 0);
3218         } while (rc == -EAGAIN);
3219
3220 fail:
3221         kref_put(&wdata->refcount, cifs_uncached_writedata_release);
3222         return rc;
3223 }
3224
3225 static int
3226 cifs_write_from_iter(loff_t offset, size_t len, struct iov_iter *from,
3227                      struct cifsFileInfo *open_file,
3228                      struct cifs_sb_info *cifs_sb, struct list_head *wdata_list,
3229                      struct cifs_aio_ctx *ctx)
3230 {
3231         int rc = 0;
3232         size_t cur_len;
3233         unsigned long nr_pages, num_pages, i;
3234         struct cifs_writedata *wdata;
3235         struct iov_iter saved_from = *from;
3236         loff_t saved_offset = offset;
3237         pid_t pid;
3238         struct TCP_Server_Info *server;
3239         struct page **pagevec;
3240         size_t start;
3241         unsigned int xid;
3242
3243         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
3244                 pid = open_file->pid;
3245         else
3246                 pid = current->tgid;
3247
3248         server = cifs_pick_channel(tlink_tcon(open_file->tlink)->ses);
3249         xid = get_xid();
3250
3251         do {
3252                 unsigned int wsize;
3253                 struct cifs_credits credits_on_stack;
3254                 struct cifs_credits *credits = &credits_on_stack;
3255
3256                 if (open_file->invalidHandle) {
3257                         rc = cifs_reopen_file(open_file, false);
3258                         if (rc == -EAGAIN)
3259                                 continue;
3260                         else if (rc)
3261                                 break;
3262                 }
3263
3264                 rc = server->ops->wait_mtu_credits(server, cifs_sb->ctx->wsize,
3265                                                    &wsize, credits);
3266                 if (rc)
3267                         break;
3268
3269                 cur_len = min_t(const size_t, len, wsize);
3270
3271                 if (ctx->direct_io) {
3272                         ssize_t result;
3273
3274                         result = iov_iter_get_pages_alloc2(
3275                                 from, &pagevec, cur_len, &start);
3276                         if (result < 0) {
3277                                 cifs_dbg(VFS,
3278                                          "direct_writev couldn't get user pages (rc=%zd) iter type %d iov_offset %zd count %zd\n",
3279                                          result, iov_iter_type(from),
3280                                          from->iov_offset, from->count);
3281                                 dump_stack();
3282
3283                                 rc = result;
3284                                 add_credits_and_wake_if(server, credits, 0);
3285                                 break;
3286                         }
3287                         cur_len = (size_t)result;
3288
3289                         nr_pages =
3290                                 (cur_len + start + PAGE_SIZE - 1) / PAGE_SIZE;
3291
3292                         wdata = cifs_writedata_direct_alloc(pagevec,
3293                                              cifs_uncached_writev_complete);
3294                         if (!wdata) {
3295                                 rc = -ENOMEM;
3296                                 add_credits_and_wake_if(server, credits, 0);
3297                                 break;
3298                         }
3299
3300
3301                         wdata->page_offset = start;
3302                         wdata->tailsz =
3303                                 nr_pages > 1 ?
3304                                         cur_len - (PAGE_SIZE - start) -
3305                                         (nr_pages - 2) * PAGE_SIZE :
3306                                         cur_len;
3307                 } else {
3308                         nr_pages = get_numpages(wsize, len, &cur_len);
3309                         wdata = cifs_writedata_alloc(nr_pages,
3310                                              cifs_uncached_writev_complete);
3311                         if (!wdata) {
3312                                 rc = -ENOMEM;
3313                                 add_credits_and_wake_if(server, credits, 0);
3314                                 break;
3315                         }
3316
3317                         rc = cifs_write_allocate_pages(wdata->pages, nr_pages);
3318                         if (rc) {
3319                                 kvfree(wdata->pages);
3320                                 kfree(wdata);
3321                                 add_credits_and_wake_if(server, credits, 0);
3322                                 break;
3323                         }
3324
3325                         num_pages = nr_pages;
3326                         rc = wdata_fill_from_iovec(
3327                                 wdata, from, &cur_len, &num_pages);
3328                         if (rc) {
3329                                 for (i = 0; i < nr_pages; i++)
3330                                         put_page(wdata->pages[i]);
3331                                 kvfree(wdata->pages);
3332                                 kfree(wdata);
3333                                 add_credits_and_wake_if(server, credits, 0);
3334                                 break;
3335                         }
3336
3337                         /*
3338                          * Bring nr_pages down to the number of pages we
3339                          * actually used, and free any pages that we didn't use.
3340                          */
3341                         for ( ; nr_pages > num_pages; nr_pages--)
3342                                 put_page(wdata->pages[nr_pages - 1]);
3343
3344                         wdata->tailsz = cur_len - ((nr_pages - 1) * PAGE_SIZE);
3345                 }
3346
3347                 wdata->sync_mode = WB_SYNC_ALL;
3348                 wdata->nr_pages = nr_pages;
3349                 wdata->offset = (__u64)offset;
3350                 wdata->cfile = cifsFileInfo_get(open_file);
3351                 wdata->server = server;
3352                 wdata->pid = pid;
3353                 wdata->bytes = cur_len;
3354                 wdata->pagesz = PAGE_SIZE;
3355                 wdata->credits = credits_on_stack;
3356                 wdata->ctx = ctx;
3357                 kref_get(&ctx->refcount);
3358
3359                 rc = adjust_credits(server, &wdata->credits, wdata->bytes);
3360
3361                 if (!rc) {
3362                         if (wdata->cfile->invalidHandle)
3363                                 rc = -EAGAIN;
3364                         else
3365                                 rc = server->ops->async_writev(wdata,
3366                                         cifs_uncached_writedata_release);
3367                 }
3368
3369                 if (rc) {
3370                         add_credits_and_wake_if(server, &wdata->credits, 0);
3371                         kref_put(&wdata->refcount,
3372                                  cifs_uncached_writedata_release);
3373                         if (rc == -EAGAIN) {
3374                                 *from = saved_from;
3375                                 iov_iter_advance(from, offset - saved_offset);
3376                                 continue;
3377                         }
3378                         break;
3379                 }
3380
3381                 list_add_tail(&wdata->list, wdata_list);
3382                 offset += cur_len;
3383                 len -= cur_len;
3384         } while (len > 0);
3385
3386         free_xid(xid);
3387         return rc;
3388 }
3389
3390 static void collect_uncached_write_data(struct cifs_aio_ctx *ctx)
3391 {
3392         struct cifs_writedata *wdata, *tmp;
3393         struct cifs_tcon *tcon;
3394         struct cifs_sb_info *cifs_sb;
3395         struct dentry *dentry = ctx->cfile->dentry;
3396         ssize_t rc;
3397
3398         tcon = tlink_tcon(ctx->cfile->tlink);
3399         cifs_sb = CIFS_SB(dentry->d_sb);
3400
3401         mutex_lock(&ctx->aio_mutex);
3402
3403         if (list_empty(&ctx->list)) {
3404                 mutex_unlock(&ctx->aio_mutex);
3405                 return;
3406         }
3407
3408         rc = ctx->rc;
3409         /*
3410          * Wait for and collect replies for any successful sends in order of
3411          * increasing offset. Once an error is hit, then return without waiting
3412          * for any more replies.
3413          */
3414 restart_loop:
3415         list_for_each_entry_safe(wdata, tmp, &ctx->list, list) {
3416                 if (!rc) {
3417                         if (!try_wait_for_completion(&wdata->done)) {
3418                                 mutex_unlock(&ctx->aio_mutex);
3419                                 return;
3420                         }
3421
3422                         if (wdata->result)
3423                                 rc = wdata->result;
3424                         else
3425                                 ctx->total_len += wdata->bytes;
3426
3427                         /* resend call if it's a retryable error */
3428                         if (rc == -EAGAIN) {
3429                                 struct list_head tmp_list;
3430                                 struct iov_iter tmp_from = ctx->iter;
3431
3432                                 INIT_LIST_HEAD(&tmp_list);
3433                                 list_del_init(&wdata->list);
3434
3435                                 if (ctx->direct_io)
3436                                         rc = cifs_resend_wdata(
3437                                                 wdata, &tmp_list, ctx);
3438                                 else {
3439                                         iov_iter_advance(&tmp_from,
3440                                                  wdata->offset - ctx->pos);
3441
3442                                         rc = cifs_write_from_iter(wdata->offset,
3443                                                 wdata->bytes, &tmp_from,
3444                                                 ctx->cfile, cifs_sb, &tmp_list,
3445                                                 ctx);
3446
3447                                         kref_put(&wdata->refcount,
3448                                                 cifs_uncached_writedata_release);
3449                                 }
3450
3451                                 list_splice(&tmp_list, &ctx->list);
3452                                 goto restart_loop;
3453                         }
3454                 }
3455                 list_del_init(&wdata->list);
3456                 kref_put(&wdata->refcount, cifs_uncached_writedata_release);
3457         }
3458
3459         cifs_stats_bytes_written(tcon, ctx->total_len);
3460         set_bit(CIFS_INO_INVALID_MAPPING, &CIFS_I(dentry->d_inode)->flags);
3461
3462         ctx->rc = (rc == 0) ? ctx->total_len : rc;
3463
3464         mutex_unlock(&ctx->aio_mutex);
3465
3466         if (ctx->iocb && ctx->iocb->ki_complete)
3467                 ctx->iocb->ki_complete(ctx->iocb, ctx->rc);
3468         else
3469                 complete(&ctx->done);
3470 }
3471
3472 static ssize_t __cifs_writev(
3473         struct kiocb *iocb, struct iov_iter *from, bool direct)
3474 {
3475         struct file *file = iocb->ki_filp;
3476         ssize_t total_written = 0;
3477         struct cifsFileInfo *cfile;
3478         struct cifs_tcon *tcon;
3479         struct cifs_sb_info *cifs_sb;
3480         struct cifs_aio_ctx *ctx;
3481         struct iov_iter saved_from = *from;
3482         size_t len = iov_iter_count(from);
3483         int rc;
3484
3485         /*
3486          * iov_iter_get_pages_alloc doesn't work with ITER_KVEC.
3487          * In this case, fall back to non-direct write function.
3488          * this could be improved by getting pages directly in ITER_KVEC
3489          */
3490         if (direct && iov_iter_is_kvec(from)) {
3491                 cifs_dbg(FYI, "use non-direct cifs_writev for kvec I/O\n");
3492                 direct = false;
3493         }
3494
3495         rc = generic_write_checks(iocb, from);
3496         if (rc <= 0)
3497                 return rc;
3498
3499         cifs_sb = CIFS_FILE_SB(file);
3500         cfile = file->private_data;
3501         tcon = tlink_tcon(cfile->tlink);
3502
3503         if (!tcon->ses->server->ops->async_writev)
3504                 return -ENOSYS;
3505
3506         ctx = cifs_aio_ctx_alloc();
3507         if (!ctx)
3508                 return -ENOMEM;
3509
3510         ctx->cfile = cifsFileInfo_get(cfile);
3511
3512         if (!is_sync_kiocb(iocb))
3513                 ctx->iocb = iocb;
3514
3515         ctx->pos = iocb->ki_pos;
3516
3517         if (direct) {
3518                 ctx->direct_io = true;
3519                 ctx->iter = *from;
3520                 ctx->len = len;
3521         } else {
3522                 rc = setup_aio_ctx_iter(ctx, from, WRITE);
3523                 if (rc) {
3524                         kref_put(&ctx->refcount, cifs_aio_ctx_release);
3525                         return rc;
3526                 }
3527         }
3528
3529         /* grab a lock here due to read response handlers can access ctx */
3530         mutex_lock(&ctx->aio_mutex);
3531
3532         rc = cifs_write_from_iter(iocb->ki_pos, ctx->len, &saved_from,
3533                                   cfile, cifs_sb, &ctx->list, ctx);
3534
3535         /*
3536          * If at least one write was successfully sent, then discard any rc
3537          * value from the later writes. If the other write succeeds, then
3538          * we'll end up returning whatever was written. If it fails, then
3539          * we'll get a new rc value from that.
3540          */
3541         if (!list_empty(&ctx->list))
3542                 rc = 0;
3543
3544         mutex_unlock(&ctx->aio_mutex);
3545
3546         if (rc) {
3547                 kref_put(&ctx->refcount, cifs_aio_ctx_release);
3548                 return rc;
3549         }
3550
3551         if (!is_sync_kiocb(iocb)) {
3552                 kref_put(&ctx->refcount, cifs_aio_ctx_release);
3553                 return -EIOCBQUEUED;
3554         }
3555
3556         rc = wait_for_completion_killable(&ctx->done);
3557         if (rc) {
3558                 mutex_lock(&ctx->aio_mutex);
3559                 ctx->rc = rc = -EINTR;
3560                 total_written = ctx->total_len;
3561                 mutex_unlock(&ctx->aio_mutex);
3562         } else {
3563                 rc = ctx->rc;
3564                 total_written = ctx->total_len;
3565         }
3566
3567         kref_put(&ctx->refcount, cifs_aio_ctx_release);
3568
3569         if (unlikely(!total_written))
3570                 return rc;
3571
3572         iocb->ki_pos += total_written;
3573         return total_written;
3574 }
3575
3576 ssize_t cifs_direct_writev(struct kiocb *iocb, struct iov_iter *from)
3577 {
3578         return __cifs_writev(iocb, from, true);
3579 }
3580
3581 ssize_t cifs_user_writev(struct kiocb *iocb, struct iov_iter *from)
3582 {
3583         return __cifs_writev(iocb, from, false);
3584 }
3585
3586 static ssize_t
3587 cifs_writev(struct kiocb *iocb, struct iov_iter *from)
3588 {
3589         struct file *file = iocb->ki_filp;
3590         struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
3591         struct inode *inode = file->f_mapping->host;
3592         struct cifsInodeInfo *cinode = CIFS_I(inode);
3593         struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
3594         ssize_t rc;
3595
3596         inode_lock(inode);
3597         /*
3598          * We need to hold the sem to be sure nobody modifies lock list
3599          * with a brlock that prevents writing.
3600          */
3601         down_read(&cinode->lock_sem);
3602
3603         rc = generic_write_checks(iocb, from);
3604         if (rc <= 0)
3605                 goto out;
3606
3607         if (!cifs_find_lock_conflict(cfile, iocb->ki_pos, iov_iter_count(from),
3608                                      server->vals->exclusive_lock_type, 0,
3609                                      NULL, CIFS_WRITE_OP))
3610                 rc = __generic_file_write_iter(iocb, from);
3611         else
3612                 rc = -EACCES;
3613 out:
3614         up_read(&cinode->lock_sem);
3615         inode_unlock(inode);
3616
3617         if (rc > 0)
3618                 rc = generic_write_sync(iocb, rc);
3619         return rc;
3620 }
3621
3622 ssize_t
3623 cifs_strict_writev(struct kiocb *iocb, struct iov_iter *from)
3624 {
3625         struct inode *inode = file_inode(iocb->ki_filp);
3626         struct cifsInodeInfo *cinode = CIFS_I(inode);
3627         struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
3628         struct cifsFileInfo *cfile = (struct cifsFileInfo *)
3629                                                 iocb->ki_filp->private_data;
3630         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
3631         ssize_t written;
3632
3633         written = cifs_get_writer(cinode);
3634         if (written)
3635                 return written;
3636
3637         if (CIFS_CACHE_WRITE(cinode)) {
3638                 if (cap_unix(tcon->ses) &&
3639                 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability))
3640                   && ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0)) {
3641                         written = generic_file_write_iter(iocb, from);
3642                         goto out;
3643                 }
3644                 written = cifs_writev(iocb, from);
3645                 goto out;
3646         }
3647         /*
3648          * For non-oplocked files in strict cache mode we need to write the data
3649          * to the server exactly from the pos to pos+len-1 rather than flush all
3650          * affected pages because it may cause a error with mandatory locks on
3651          * these pages but not on the region from pos to ppos+len-1.
3652          */
3653         written = cifs_user_writev(iocb, from);
3654         if (CIFS_CACHE_READ(cinode)) {
3655                 /*
3656                  * We have read level caching and we have just sent a write
3657                  * request to the server thus making data in the cache stale.
3658                  * Zap the cache and set oplock/lease level to NONE to avoid
3659                  * reading stale data from the cache. All subsequent read
3660                  * operations will read new data from the server.
3661                  */
3662                 cifs_zap_mapping(inode);
3663                 cifs_dbg(FYI, "Set Oplock/Lease to NONE for inode=%p after write\n",
3664                          inode);
3665                 cinode->oplock = 0;
3666         }
3667 out:
3668         cifs_put_writer(cinode);
3669         return written;
3670 }
3671
3672 static struct cifs_readdata *
3673 cifs_readdata_direct_alloc(struct page **pages, work_func_t complete)
3674 {
3675         struct cifs_readdata *rdata;
3676
3677         rdata = kzalloc(sizeof(*rdata), GFP_KERNEL);
3678         if (rdata != NULL) {
3679                 rdata->pages = pages;
3680                 kref_init(&rdata->refcount);
3681                 INIT_LIST_HEAD(&rdata->list);
3682                 init_completion(&rdata->done);
3683                 INIT_WORK(&rdata->work, complete);
3684         }
3685
3686         return rdata;
3687 }
3688
3689 static struct cifs_readdata *
3690 cifs_readdata_alloc(unsigned int nr_pages, work_func_t complete)
3691 {
3692         struct page **pages =
3693                 kcalloc(nr_pages, sizeof(struct page *), GFP_KERNEL);
3694         struct cifs_readdata *ret = NULL;
3695
3696         if (pages) {
3697                 ret = cifs_readdata_direct_alloc(pages, complete);
3698                 if (!ret)
3699                         kfree(pages);
3700         }
3701
3702         return ret;
3703 }
3704
3705 void
3706 cifs_readdata_release(struct kref *refcount)
3707 {
3708         struct cifs_readdata *rdata = container_of(refcount,
3709                                         struct cifs_readdata, refcount);
3710 #ifdef CONFIG_CIFS_SMB_DIRECT
3711         if (rdata->mr) {
3712                 smbd_deregister_mr(rdata->mr);
3713                 rdata->mr = NULL;
3714         }
3715 #endif
3716         if (rdata->cfile)
3717                 cifsFileInfo_put(rdata->cfile);
3718
3719         kvfree(rdata->pages);
3720         kfree(rdata);
3721 }
3722
3723 static int
3724 cifs_read_allocate_pages(struct cifs_readdata *rdata, unsigned int nr_pages)
3725 {
3726         int rc = 0;
3727         struct page *page;
3728         unsigned int i;
3729
3730         for (i = 0; i < nr_pages; i++) {
3731                 page = alloc_page(GFP_KERNEL|__GFP_HIGHMEM);
3732                 if (!page) {
3733                         rc = -ENOMEM;
3734                         break;
3735                 }
3736                 rdata->pages[i] = page;
3737         }
3738
3739         if (rc) {
3740                 unsigned int nr_page_failed = i;
3741
3742                 for (i = 0; i < nr_page_failed; i++) {
3743                         put_page(rdata->pages[i]);
3744                         rdata->pages[i] = NULL;
3745                 }
3746         }
3747         return rc;
3748 }
3749
3750 static void
3751 cifs_uncached_readdata_release(struct kref *refcount)
3752 {
3753         struct cifs_readdata *rdata = container_of(refcount,
3754                                         struct cifs_readdata, refcount);
3755         unsigned int i;
3756
3757         kref_put(&rdata->ctx->refcount, cifs_aio_ctx_release);
3758         for (i = 0; i < rdata->nr_pages; i++) {
3759                 put_page(rdata->pages[i]);
3760         }
3761         cifs_readdata_release(refcount);
3762 }
3763
3764 /**
3765  * cifs_readdata_to_iov - copy data from pages in response to an iovec
3766  * @rdata:      the readdata response with list of pages holding data
3767  * @iter:       destination for our data
3768  *
3769  * This function copies data from a list of pages in a readdata response into
3770  * an array of iovecs. It will first calculate where the data should go
3771  * based on the info in the readdata and then copy the data into that spot.
3772  */
3773 static int
3774 cifs_readdata_to_iov(struct cifs_readdata *rdata, struct iov_iter *iter)
3775 {
3776         size_t remaining = rdata->got_bytes;
3777         unsigned int i;
3778
3779         for (i = 0; i < rdata->nr_pages; i++) {
3780                 struct page *page = rdata->pages[i];
3781                 size_t copy = min_t(size_t, remaining, PAGE_SIZE);
3782                 size_t written;
3783
3784                 if (unlikely(iov_iter_is_pipe(iter))) {
3785                         void *addr = kmap_atomic(page);
3786
3787                         written = copy_to_iter(addr, copy, iter);
3788                         kunmap_atomic(addr);
3789                 } else
3790                         written = copy_page_to_iter(page, 0, copy, iter);
3791                 remaining -= written;
3792                 if (written < copy && iov_iter_count(iter) > 0)
3793                         break;
3794         }
3795         return remaining ? -EFAULT : 0;
3796 }
3797
3798 static void collect_uncached_read_data(struct cifs_aio_ctx *ctx);
3799
3800 static void
3801 cifs_uncached_readv_complete(struct work_struct *work)
3802 {
3803         struct cifs_readdata *rdata = container_of(work,
3804                                                 struct cifs_readdata, work);
3805
3806         complete(&rdata->done);
3807         collect_uncached_read_data(rdata->ctx);
3808         /* the below call can possibly free the last ref to aio ctx */
3809         kref_put(&rdata->refcount, cifs_uncached_readdata_release);
3810 }
3811
3812 static int
3813 uncached_fill_pages(struct TCP_Server_Info *server,
3814                     struct cifs_readdata *rdata, struct iov_iter *iter,
3815                     unsigned int len)
3816 {
3817         int result = 0;
3818         unsigned int i;
3819         unsigned int nr_pages = rdata->nr_pages;
3820         unsigned int page_offset = rdata->page_offset;
3821
3822         rdata->got_bytes = 0;
3823         rdata->tailsz = PAGE_SIZE;
3824         for (i = 0; i < nr_pages; i++) {
3825                 struct page *page = rdata->pages[i];
3826                 size_t n;
3827                 unsigned int segment_size = rdata->pagesz;
3828
3829                 if (i == 0)
3830                         segment_size -= page_offset;
3831                 else
3832                         page_offset = 0;
3833
3834
3835                 if (len <= 0) {
3836                         /* no need to hold page hostage */
3837                         rdata->pages[i] = NULL;
3838                         rdata->nr_pages--;
3839                         put_page(page);
3840                         continue;
3841                 }
3842
3843                 n = len;
3844                 if (len >= segment_size)
3845                         /* enough data to fill the page */
3846                         n = segment_size;
3847                 else
3848                         rdata->tailsz = len;
3849                 len -= n;
3850
3851                 if (iter)
3852                         result = copy_page_from_iter(
3853                                         page, page_offset, n, iter);
3854 #ifdef CONFIG_CIFS_SMB_DIRECT
3855                 else if (rdata->mr)
3856                         result = n;
3857 #endif
3858                 else
3859                         result = cifs_read_page_from_socket(
3860                                         server, page, page_offset, n);
3861                 if (result < 0)
3862                         break;
3863
3864                 rdata->got_bytes += result;
3865         }
3866
3867         return rdata->got_bytes > 0 && result != -ECONNABORTED ?
3868                                                 rdata->got_bytes : result;
3869 }
3870
3871 static int
3872 cifs_uncached_read_into_pages(struct TCP_Server_Info *server,
3873                               struct cifs_readdata *rdata, unsigned int len)
3874 {
3875         return uncached_fill_pages(server, rdata, NULL, len);
3876 }
3877
3878 static int
3879 cifs_uncached_copy_into_pages(struct TCP_Server_Info *server,
3880                               struct cifs_readdata *rdata,
3881                               struct iov_iter *iter)
3882 {
3883         return uncached_fill_pages(server, rdata, iter, iter->count);
3884 }
3885
3886 static int cifs_resend_rdata(struct cifs_readdata *rdata,
3887                         struct list_head *rdata_list,
3888                         struct cifs_aio_ctx *ctx)
3889 {
3890         unsigned int rsize;
3891         struct cifs_credits credits;
3892         int rc;
3893         struct TCP_Server_Info *server;
3894
3895         /* XXX: should we pick a new channel here? */
3896         server = rdata->server;
3897
3898         do {
3899                 if (rdata->cfile->invalidHandle) {
3900                         rc = cifs_reopen_file(rdata->cfile, true);
3901                         if (rc == -EAGAIN)
3902                                 continue;
3903                         else if (rc)
3904                                 break;
3905                 }
3906
3907                 /*
3908                  * Wait for credits to resend this rdata.
3909                  * Note: we are attempting to resend the whole rdata not in
3910                  * segments
3911                  */
3912                 do {
3913                         rc = server->ops->wait_mtu_credits(server, rdata->bytes,
3914                                                 &rsize, &credits);
3915
3916                         if (rc)
3917                                 goto fail;
3918
3919                         if (rsize < rdata->bytes) {
3920                                 add_credits_and_wake_if(server, &credits, 0);
3921                                 msleep(1000);
3922                         }
3923                 } while (rsize < rdata->bytes);
3924                 rdata->credits = credits;
3925
3926                 rc = adjust_credits(server, &rdata->credits, rdata->bytes);
3927                 if (!rc) {
3928                         if (rdata->cfile->invalidHandle)
3929                                 rc = -EAGAIN;
3930                         else {
3931 #ifdef CONFIG_CIFS_SMB_DIRECT
3932                                 if (rdata->mr) {
3933                                         rdata->mr->need_invalidate = true;
3934                                         smbd_deregister_mr(rdata->mr);
3935                                         rdata->mr = NULL;
3936                                 }
3937 #endif
3938                                 rc = server->ops->async_readv(rdata);
3939                         }
3940                 }
3941
3942                 /* If the read was successfully sent, we are done */
3943                 if (!rc) {
3944                         /* Add to aio pending list */
3945                         list_add_tail(&rdata->list, rdata_list);
3946                         return 0;
3947                 }
3948
3949                 /* Roll back credits and retry if needed */
3950                 add_credits_and_wake_if(server, &rdata->credits, 0);
3951         } while (rc == -EAGAIN);
3952
3953 fail:
3954         kref_put(&rdata->refcount, cifs_uncached_readdata_release);
3955         return rc;
3956 }
3957
3958 static int
3959 cifs_send_async_read(loff_t offset, size_t len, struct cifsFileInfo *open_file,
3960                      struct cifs_sb_info *cifs_sb, struct list_head *rdata_list,
3961                      struct cifs_aio_ctx *ctx)
3962 {
3963         struct cifs_readdata *rdata;
3964         unsigned int npages, rsize;
3965         struct cifs_credits credits_on_stack;
3966         struct cifs_credits *credits = &credits_on_stack;
3967         size_t cur_len;
3968         int rc;
3969         pid_t pid;
3970         struct TCP_Server_Info *server;
3971         struct page **pagevec;
3972         size_t start;
3973         struct iov_iter direct_iov = ctx->iter;
3974
3975         server = cifs_pick_channel(tlink_tcon(open_file->tlink)->ses);
3976
3977         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
3978                 pid = open_file->pid;
3979         else
3980                 pid = current->tgid;
3981
3982         if (ctx->direct_io)
3983                 iov_iter_advance(&direct_iov, offset - ctx->pos);
3984
3985         do {
3986                 if (open_file->invalidHandle) {
3987                         rc = cifs_reopen_file(open_file, true);
3988                         if (rc == -EAGAIN)
3989                                 continue;
3990                         else if (rc)
3991                                 break;
3992                 }
3993
3994                 if (cifs_sb->ctx->rsize == 0)
3995                         cifs_sb->ctx->rsize =
3996                                 server->ops->negotiate_rsize(tlink_tcon(open_file->tlink),
3997                                                              cifs_sb->ctx);
3998
3999                 rc = server->ops->wait_mtu_credits(server, cifs_sb->ctx->rsize,
4000                                                    &rsize, credits);
4001                 if (rc)
4002                         break;
4003
4004                 cur_len = min_t(const size_t, len, rsize);
4005
4006                 if (ctx->direct_io) {
4007                         ssize_t result;
4008
4009                         result = iov_iter_get_pages_alloc2(
4010                                         &direct_iov, &pagevec,
4011                                         cur_len, &start);
4012                         if (result < 0) {
4013                                 cifs_dbg(VFS,
4014                                          "Couldn't get user pages (rc=%zd) iter type %d iov_offset %zd count %zd\n",
4015                                          result, iov_iter_type(&direct_iov),
4016                                          direct_iov.iov_offset,
4017                                          direct_iov.count);
4018                                 dump_stack();
4019
4020                                 rc = result;
4021                                 add_credits_and_wake_if(server, credits, 0);
4022                                 break;
4023                         }
4024                         cur_len = (size_t)result;
4025
4026                         rdata = cifs_readdata_direct_alloc(
4027                                         pagevec, cifs_uncached_readv_complete);
4028                         if (!rdata) {
4029                                 add_credits_and_wake_if(server, credits, 0);
4030                                 rc = -ENOMEM;
4031                                 break;
4032                         }
4033
4034                         npages = (cur_len + start + PAGE_SIZE-1) / PAGE_SIZE;
4035                         rdata->page_offset = start;
4036                         rdata->tailsz = npages > 1 ?
4037                                 cur_len-(PAGE_SIZE-start)-(npages-2)*PAGE_SIZE :
4038                                 cur_len;
4039
4040                 } else {
4041
4042                         npages = DIV_ROUND_UP(cur_len, PAGE_SIZE);
4043                         /* allocate a readdata struct */
4044                         rdata = cifs_readdata_alloc(npages,
4045                                             cifs_uncached_readv_complete);
4046                         if (!rdata) {
4047                                 add_credits_and_wake_if(server, credits, 0);
4048                                 rc = -ENOMEM;
4049                                 break;
4050                         }
4051
4052                         rc = cifs_read_allocate_pages(rdata, npages);
4053                         if (rc) {
4054                                 kvfree(rdata->pages);
4055                                 kfree(rdata);
4056                                 add_credits_and_wake_if(server, credits, 0);
4057                                 break;
4058                         }
4059
4060                         rdata->tailsz = PAGE_SIZE;
4061                 }
4062
4063                 rdata->server = server;
4064                 rdata->cfile = cifsFileInfo_get(open_file);
4065                 rdata->nr_pages = npages;
4066                 rdata->offset = offset;
4067                 rdata->bytes = cur_len;
4068                 rdata->pid = pid;
4069                 rdata->pagesz = PAGE_SIZE;
4070                 rdata->read_into_pages = cifs_uncached_read_into_pages;
4071                 rdata->copy_into_pages = cifs_uncached_copy_into_pages;
4072                 rdata->credits = credits_on_stack;
4073                 rdata->ctx = ctx;
4074                 kref_get(&ctx->refcount);
4075
4076                 rc = adjust_credits(server, &rdata->credits, rdata->bytes);
4077
4078                 if (!rc) {
4079                         if (rdata->cfile->invalidHandle)
4080                                 rc = -EAGAIN;
4081                         else
4082                                 rc = server->ops->async_readv(rdata);
4083                 }
4084
4085                 if (rc) {
4086                         add_credits_and_wake_if(server, &rdata->credits, 0);
4087                         kref_put(&rdata->refcount,
4088                                 cifs_uncached_readdata_release);
4089                         if (rc == -EAGAIN) {
4090                                 iov_iter_revert(&direct_iov, cur_len);
4091                                 continue;
4092                         }
4093                         break;
4094                 }
4095
4096                 list_add_tail(&rdata->list, rdata_list);
4097                 offset += cur_len;
4098                 len -= cur_len;
4099         } while (len > 0);
4100
4101         return rc;
4102 }
4103
4104 static void
4105 collect_uncached_read_data(struct cifs_aio_ctx *ctx)
4106 {
4107         struct cifs_readdata *rdata, *tmp;
4108         struct iov_iter *to = &ctx->iter;
4109         struct cifs_sb_info *cifs_sb;
4110         int rc;
4111
4112         cifs_sb = CIFS_SB(ctx->cfile->dentry->d_sb);
4113
4114         mutex_lock(&ctx->aio_mutex);
4115
4116         if (list_empty(&ctx->list)) {
4117                 mutex_unlock(&ctx->aio_mutex);
4118                 return;
4119         }
4120
4121         rc = ctx->rc;
4122         /* the loop below should proceed in the order of increasing offsets */
4123 again:
4124         list_for_each_entry_safe(rdata, tmp, &ctx->list, list) {
4125                 if (!rc) {
4126                         if (!try_wait_for_completion(&rdata->done)) {
4127                                 mutex_unlock(&ctx->aio_mutex);
4128                                 return;
4129                         }
4130
4131                         if (rdata->result == -EAGAIN) {
4132                                 /* resend call if it's a retryable error */
4133                                 struct list_head tmp_list;
4134                                 unsigned int got_bytes = rdata->got_bytes;
4135
4136                                 list_del_init(&rdata->list);
4137                                 INIT_LIST_HEAD(&tmp_list);
4138
4139                                 /*
4140                                  * Got a part of data and then reconnect has
4141                                  * happened -- fill the buffer and continue
4142                                  * reading.
4143                                  */
4144                                 if (got_bytes && got_bytes < rdata->bytes) {
4145                                         rc = 0;
4146                                         if (!ctx->direct_io)
4147                                                 rc = cifs_readdata_to_iov(rdata, to);
4148                                         if (rc) {
4149                                                 kref_put(&rdata->refcount,
4150                                                         cifs_uncached_readdata_release);
4151                                                 continue;
4152                                         }
4153                                 }
4154
4155                                 if (ctx->direct_io) {
4156                                         /*
4157                                          * Re-use rdata as this is a
4158                                          * direct I/O
4159                                          */
4160                                         rc = cifs_resend_rdata(
4161                                                 rdata,
4162                                                 &tmp_list, ctx);
4163                                 } else {
4164                                         rc = cifs_send_async_read(
4165                                                 rdata->offset + got_bytes,
4166                                                 rdata->bytes - got_bytes,
4167                                                 rdata->cfile, cifs_sb,
4168                                                 &tmp_list, ctx);
4169
4170                                         kref_put(&rdata->refcount,
4171                                                 cifs_uncached_readdata_release);
4172                                 }
4173
4174                                 list_splice(&tmp_list, &ctx->list);
4175
4176                                 goto again;
4177                         } else if (rdata->result)
4178                                 rc = rdata->result;
4179                         else if (!ctx->direct_io)
4180                                 rc = cifs_readdata_to_iov(rdata, to);
4181
4182                         /* if there was a short read -- discard anything left */
4183                         if (rdata->got_bytes && rdata->got_bytes < rdata->bytes)
4184                                 rc = -ENODATA;
4185
4186                         ctx->total_len += rdata->got_bytes;
4187                 }
4188                 list_del_init(&rdata->list);
4189                 kref_put(&rdata->refcount, cifs_uncached_readdata_release);
4190         }
4191
4192         if (!ctx->direct_io)
4193                 ctx->total_len = ctx->len - iov_iter_count(to);
4194
4195         /* mask nodata case */
4196         if (rc == -ENODATA)
4197                 rc = 0;
4198
4199         ctx->rc = (rc == 0) ? (ssize_t)ctx->total_len : rc;
4200
4201         mutex_unlock(&ctx->aio_mutex);
4202
4203         if (ctx->iocb && ctx->iocb->ki_complete)
4204                 ctx->iocb->ki_complete(ctx->iocb, ctx->rc);
4205         else
4206                 complete(&ctx->done);
4207 }
4208
4209 static ssize_t __cifs_readv(
4210         struct kiocb *iocb, struct iov_iter *to, bool direct)
4211 {
4212         size_t len;
4213         struct file *file = iocb->ki_filp;
4214         struct cifs_sb_info *cifs_sb;
4215         struct cifsFileInfo *cfile;
4216         struct cifs_tcon *tcon;
4217         ssize_t rc, total_read = 0;
4218         loff_t offset = iocb->ki_pos;
4219         struct cifs_aio_ctx *ctx;
4220
4221         /*
4222          * iov_iter_get_pages_alloc() doesn't work with ITER_KVEC,
4223          * fall back to data copy read path
4224          * this could be improved by getting pages directly in ITER_KVEC
4225          */
4226         if (direct && iov_iter_is_kvec(to)) {
4227                 cifs_dbg(FYI, "use non-direct cifs_user_readv for kvec I/O\n");
4228                 direct = false;
4229         }
4230
4231         len = iov_iter_count(to);
4232         if (!len)
4233                 return 0;
4234
4235         cifs_sb = CIFS_FILE_SB(file);
4236         cfile = file->private_data;
4237         tcon = tlink_tcon(cfile->tlink);
4238
4239         if (!tcon->ses->server->ops->async_readv)
4240                 return -ENOSYS;
4241
4242         if ((file->f_flags & O_ACCMODE) == O_WRONLY)
4243                 cifs_dbg(FYI, "attempting read on write only file instance\n");
4244
4245         ctx = cifs_aio_ctx_alloc();
4246         if (!ctx)
4247                 return -ENOMEM;
4248
4249         ctx->cfile = cifsFileInfo_get(cfile);
4250
4251         if (!is_sync_kiocb(iocb))
4252                 ctx->iocb = iocb;
4253
4254         if (user_backed_iter(to))
4255                 ctx->should_dirty = true;
4256
4257         if (direct) {
4258                 ctx->pos = offset;
4259                 ctx->direct_io = true;
4260                 ctx->iter = *to;
4261                 ctx->len = len;
4262         } else {
4263                 rc = setup_aio_ctx_iter(ctx, to, READ);
4264                 if (rc) {
4265                         kref_put(&ctx->refcount, cifs_aio_ctx_release);
4266                         return rc;
4267                 }
4268                 len = ctx->len;
4269         }
4270
4271         /* grab a lock here due to read response handlers can access ctx */
4272         mutex_lock(&ctx->aio_mutex);
4273
4274         rc = cifs_send_async_read(offset, len, cfile, cifs_sb, &ctx->list, ctx);
4275
4276         /* if at least one read request send succeeded, then reset rc */
4277         if (!list_empty(&ctx->list))
4278                 rc = 0;
4279
4280         mutex_unlock(&ctx->aio_mutex);
4281
4282         if (rc) {
4283                 kref_put(&ctx->refcount, cifs_aio_ctx_release);
4284                 return rc;
4285         }
4286
4287         if (!is_sync_kiocb(iocb)) {
4288                 kref_put(&ctx->refcount, cifs_aio_ctx_release);
4289                 return -EIOCBQUEUED;
4290         }
4291
4292         rc = wait_for_completion_killable(&ctx->done);
4293         if (rc) {
4294                 mutex_lock(&ctx->aio_mutex);
4295                 ctx->rc = rc = -EINTR;
4296                 total_read = ctx->total_len;
4297                 mutex_unlock(&ctx->aio_mutex);
4298         } else {
4299                 rc = ctx->rc;
4300                 total_read = ctx->total_len;
4301         }
4302
4303         kref_put(&ctx->refcount, cifs_aio_ctx_release);
4304
4305         if (total_read) {
4306                 iocb->ki_pos += total_read;
4307                 return total_read;
4308         }
4309         return rc;
4310 }
4311
4312 ssize_t cifs_direct_readv(struct kiocb *iocb, struct iov_iter *to)
4313 {
4314         return __cifs_readv(iocb, to, true);
4315 }
4316
4317 ssize_t cifs_user_readv(struct kiocb *iocb, struct iov_iter *to)
4318 {
4319         return __cifs_readv(iocb, to, false);
4320 }
4321
4322 ssize_t
4323 cifs_strict_readv(struct kiocb *iocb, struct iov_iter *to)
4324 {
4325         struct inode *inode = file_inode(iocb->ki_filp);
4326         struct cifsInodeInfo *cinode = CIFS_I(inode);
4327         struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
4328         struct cifsFileInfo *cfile = (struct cifsFileInfo *)
4329                                                 iocb->ki_filp->private_data;
4330         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
4331         int rc = -EACCES;
4332
4333         /*
4334          * In strict cache mode we need to read from the server all the time
4335          * if we don't have level II oplock because the server can delay mtime
4336          * change - so we can't make a decision about inode invalidating.
4337          * And we can also fail with pagereading if there are mandatory locks
4338          * on pages affected by this read but not on the region from pos to
4339          * pos+len-1.
4340          */
4341         if (!CIFS_CACHE_READ(cinode))
4342                 return cifs_user_readv(iocb, to);
4343
4344         if (cap_unix(tcon->ses) &&
4345             (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
4346             ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
4347                 return generic_file_read_iter(iocb, to);
4348
4349         /*
4350          * We need to hold the sem to be sure nobody modifies lock list
4351          * with a brlock that prevents reading.
4352          */
4353         down_read(&cinode->lock_sem);
4354         if (!cifs_find_lock_conflict(cfile, iocb->ki_pos, iov_iter_count(to),
4355                                      tcon->ses->server->vals->shared_lock_type,
4356                                      0, NULL, CIFS_READ_OP))
4357                 rc = generic_file_read_iter(iocb, to);
4358         up_read(&cinode->lock_sem);
4359         return rc;
4360 }
4361
4362 static ssize_t
4363 cifs_read(struct file *file, char *read_data, size_t read_size, loff_t *offset)
4364 {
4365         int rc = -EACCES;
4366         unsigned int bytes_read = 0;
4367         unsigned int total_read;
4368         unsigned int current_read_size;
4369         unsigned int rsize;
4370         struct cifs_sb_info *cifs_sb;
4371         struct cifs_tcon *tcon;
4372         struct TCP_Server_Info *server;
4373         unsigned int xid;
4374         char *cur_offset;
4375         struct cifsFileInfo *open_file;
4376         struct cifs_io_parms io_parms = {0};
4377         int buf_type = CIFS_NO_BUFFER;
4378         __u32 pid;
4379
4380         xid = get_xid();
4381         cifs_sb = CIFS_FILE_SB(file);
4382
4383         /* FIXME: set up handlers for larger reads and/or convert to async */
4384         rsize = min_t(unsigned int, cifs_sb->ctx->rsize, CIFSMaxBufSize);
4385
4386         if (file->private_data == NULL) {
4387                 rc = -EBADF;
4388                 free_xid(xid);
4389                 return rc;
4390         }
4391         open_file = file->private_data;
4392         tcon = tlink_tcon(open_file->tlink);
4393         server = cifs_pick_channel(tcon->ses);
4394
4395         if (!server->ops->sync_read) {
4396                 free_xid(xid);
4397                 return -ENOSYS;
4398         }
4399
4400         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
4401                 pid = open_file->pid;
4402         else
4403                 pid = current->tgid;
4404
4405         if ((file->f_flags & O_ACCMODE) == O_WRONLY)
4406                 cifs_dbg(FYI, "attempting read on write only file instance\n");
4407
4408         for (total_read = 0, cur_offset = read_data; read_size > total_read;
4409              total_read += bytes_read, cur_offset += bytes_read) {
4410                 do {
4411                         current_read_size = min_t(uint, read_size - total_read,
4412                                                   rsize);
4413                         /*
4414                          * For windows me and 9x we do not want to request more
4415                          * than it negotiated since it will refuse the read
4416                          * then.
4417                          */
4418                         if (!(tcon->ses->capabilities &
4419                                 tcon->ses->server->vals->cap_large_files)) {
4420                                 current_read_size = min_t(uint,
4421                                         current_read_size, CIFSMaxBufSize);
4422                         }
4423                         if (open_file->invalidHandle) {
4424                                 rc = cifs_reopen_file(open_file, true);
4425                                 if (rc != 0)
4426                                         break;
4427                         }
4428                         io_parms.pid = pid;
4429                         io_parms.tcon = tcon;
4430                         io_parms.offset = *offset;
4431                         io_parms.length = current_read_size;
4432                         io_parms.server = server;
4433                         rc = server->ops->sync_read(xid, &open_file->fid, &io_parms,
4434                                                     &bytes_read, &cur_offset,
4435                                                     &buf_type);
4436                 } while (rc == -EAGAIN);
4437
4438                 if (rc || (bytes_read == 0)) {
4439                         if (total_read) {
4440                                 break;
4441                         } else {
4442                                 free_xid(xid);
4443                                 return rc;
4444                         }
4445                 } else {
4446                         cifs_stats_bytes_read(tcon, total_read);
4447                         *offset += bytes_read;
4448                 }
4449         }
4450         free_xid(xid);
4451         return total_read;
4452 }
4453
4454 /*
4455  * If the page is mmap'ed into a process' page tables, then we need to make
4456  * sure that it doesn't change while being written back.
4457  */
4458 static vm_fault_t
4459 cifs_page_mkwrite(struct vm_fault *vmf)
4460 {
4461         struct page *page = vmf->page;
4462
4463         /* Wait for the page to be written to the cache before we allow it to
4464          * be modified.  We then assume the entire page will need writing back.
4465          */
4466 #ifdef CONFIG_CIFS_FSCACHE
4467         if (PageFsCache(page) &&
4468             wait_on_page_fscache_killable(page) < 0)
4469                 return VM_FAULT_RETRY;
4470 #endif
4471
4472         wait_on_page_writeback(page);
4473
4474         if (lock_page_killable(page) < 0)
4475                 return VM_FAULT_RETRY;
4476         return VM_FAULT_LOCKED;
4477 }
4478
4479 static const struct vm_operations_struct cifs_file_vm_ops = {
4480         .fault = filemap_fault,
4481         .map_pages = filemap_map_pages,
4482         .page_mkwrite = cifs_page_mkwrite,
4483 };
4484
4485 int cifs_file_strict_mmap(struct file *file, struct vm_area_struct *vma)
4486 {
4487         int xid, rc = 0;
4488         struct inode *inode = file_inode(file);
4489
4490         xid = get_xid();
4491
4492         if (!CIFS_CACHE_READ(CIFS_I(inode)))
4493                 rc = cifs_zap_mapping(inode);
4494         if (!rc)
4495                 rc = generic_file_mmap(file, vma);
4496         if (!rc)
4497                 vma->vm_ops = &cifs_file_vm_ops;
4498
4499         free_xid(xid);
4500         return rc;
4501 }
4502
4503 int cifs_file_mmap(struct file *file, struct vm_area_struct *vma)
4504 {
4505         int rc, xid;
4506
4507         xid = get_xid();
4508
4509         rc = cifs_revalidate_file(file);
4510         if (rc)
4511                 cifs_dbg(FYI, "Validation prior to mmap failed, error=%d\n",
4512                          rc);
4513         if (!rc)
4514                 rc = generic_file_mmap(file, vma);
4515         if (!rc)
4516                 vma->vm_ops = &cifs_file_vm_ops;
4517
4518         free_xid(xid);
4519         return rc;
4520 }
4521
4522 static void
4523 cifs_readv_complete(struct work_struct *work)
4524 {
4525         unsigned int i, got_bytes;
4526         struct cifs_readdata *rdata = container_of(work,
4527                                                 struct cifs_readdata, work);
4528
4529         got_bytes = rdata->got_bytes;
4530         for (i = 0; i < rdata->nr_pages; i++) {
4531                 struct page *page = rdata->pages[i];
4532
4533                 if (rdata->result == 0 ||
4534                     (rdata->result == -EAGAIN && got_bytes)) {
4535                         flush_dcache_page(page);
4536                         SetPageUptodate(page);
4537                 } else
4538                         SetPageError(page);
4539
4540                 if (rdata->result == 0 ||
4541                     (rdata->result == -EAGAIN && got_bytes))
4542                         cifs_readpage_to_fscache(rdata->mapping->host, page);
4543
4544                 unlock_page(page);
4545
4546                 got_bytes -= min_t(unsigned int, PAGE_SIZE, got_bytes);
4547
4548                 put_page(page);
4549                 rdata->pages[i] = NULL;
4550         }
4551         kref_put(&rdata->refcount, cifs_readdata_release);
4552 }
4553
4554 static int
4555 readpages_fill_pages(struct TCP_Server_Info *server,
4556                      struct cifs_readdata *rdata, struct iov_iter *iter,
4557                      unsigned int len)
4558 {
4559         int result = 0;
4560         unsigned int i;
4561         u64 eof;
4562         pgoff_t eof_index;
4563         unsigned int nr_pages = rdata->nr_pages;
4564         unsigned int page_offset = rdata->page_offset;
4565
4566         /* determine the eof that the server (probably) has */
4567         eof = CIFS_I(rdata->mapping->host)->server_eof;
4568         eof_index = eof ? (eof - 1) >> PAGE_SHIFT : 0;
4569         cifs_dbg(FYI, "eof=%llu eof_index=%lu\n", eof, eof_index);
4570
4571         rdata->got_bytes = 0;
4572         rdata->tailsz = PAGE_SIZE;
4573         for (i = 0; i < nr_pages; i++) {
4574                 struct page *page = rdata->pages[i];
4575                 unsigned int to_read = rdata->pagesz;
4576                 size_t n;
4577
4578                 if (i == 0)
4579                         to_read -= page_offset;
4580                 else
4581                         page_offset = 0;
4582
4583                 n = to_read;
4584
4585                 if (len >= to_read) {
4586                         len -= to_read;
4587                 } else if (len > 0) {
4588                         /* enough for partial page, fill and zero the rest */
4589                         zero_user(page, len + page_offset, to_read - len);
4590                         n = rdata->tailsz = len;
4591                         len = 0;
4592                 } else if (page->index > eof_index) {
4593                         /*
4594                          * The VFS will not try to do readahead past the
4595                          * i_size, but it's possible that we have outstanding
4596                          * writes with gaps in the middle and the i_size hasn't
4597                          * caught up yet. Populate those with zeroed out pages
4598                          * to prevent the VFS from repeatedly attempting to
4599                          * fill them until the writes are flushed.
4600                          */
4601                         zero_user(page, 0, PAGE_SIZE);
4602                         flush_dcache_page(page);
4603                         SetPageUptodate(page);
4604                         unlock_page(page);
4605                         put_page(page);
4606                         rdata->pages[i] = NULL;
4607                         rdata->nr_pages--;
4608                         continue;
4609                 } else {
4610                         /* no need to hold page hostage */
4611                         unlock_page(page);
4612                         put_page(page);
4613                         rdata->pages[i] = NULL;
4614                         rdata->nr_pages--;
4615                         continue;
4616                 }
4617
4618                 if (iter)
4619                         result = copy_page_from_iter(
4620                                         page, page_offset, n, iter);
4621 #ifdef CONFIG_CIFS_SMB_DIRECT
4622                 else if (rdata->mr)
4623                         result = n;
4624 #endif
4625                 else
4626                         result = cifs_read_page_from_socket(
4627                                         server, page, page_offset, n);
4628                 if (result < 0)
4629                         break;
4630
4631                 rdata->got_bytes += result;
4632         }
4633
4634         return rdata->got_bytes > 0 && result != -ECONNABORTED ?
4635                                                 rdata->got_bytes : result;
4636 }
4637
4638 static int
4639 cifs_readpages_read_into_pages(struct TCP_Server_Info *server,
4640                                struct cifs_readdata *rdata, unsigned int len)
4641 {
4642         return readpages_fill_pages(server, rdata, NULL, len);
4643 }
4644
4645 static int
4646 cifs_readpages_copy_into_pages(struct TCP_Server_Info *server,
4647                                struct cifs_readdata *rdata,
4648                                struct iov_iter *iter)
4649 {
4650         return readpages_fill_pages(server, rdata, iter, iter->count);
4651 }
4652
4653 static void cifs_readahead(struct readahead_control *ractl)
4654 {
4655         int rc;
4656         struct cifsFileInfo *open_file = ractl->file->private_data;
4657         struct cifs_sb_info *cifs_sb = CIFS_FILE_SB(ractl->file);
4658         struct TCP_Server_Info *server;
4659         pid_t pid;
4660         unsigned int xid, nr_pages, last_batch_size = 0, cache_nr_pages = 0;
4661         pgoff_t next_cached = ULONG_MAX;
4662         bool caching = fscache_cookie_enabled(cifs_inode_cookie(ractl->mapping->host)) &&
4663                 cifs_inode_cookie(ractl->mapping->host)->cache_priv;
4664         bool check_cache = caching;
4665
4666         xid = get_xid();
4667
4668         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
4669                 pid = open_file->pid;
4670         else
4671                 pid = current->tgid;
4672
4673         rc = 0;
4674         server = cifs_pick_channel(tlink_tcon(open_file->tlink)->ses);
4675
4676         cifs_dbg(FYI, "%s: file=%p mapping=%p num_pages=%u\n",
4677                  __func__, ractl->file, ractl->mapping, readahead_count(ractl));
4678
4679         /*
4680          * Chop the readahead request up into rsize-sized read requests.
4681          */
4682         while ((nr_pages = readahead_count(ractl) - last_batch_size)) {
4683                 unsigned int i, got, rsize;
4684                 struct page *page;
4685                 struct cifs_readdata *rdata;
4686                 struct cifs_credits credits_on_stack;
4687                 struct cifs_credits *credits = &credits_on_stack;
4688                 pgoff_t index = readahead_index(ractl) + last_batch_size;
4689
4690                 /*
4691                  * Find out if we have anything cached in the range of
4692                  * interest, and if so, where the next chunk of cached data is.
4693                  */
4694                 if (caching) {
4695                         if (check_cache) {
4696                                 rc = cifs_fscache_query_occupancy(
4697                                         ractl->mapping->host, index, nr_pages,
4698                                         &next_cached, &cache_nr_pages);
4699                                 if (rc < 0)
4700                                         caching = false;
4701                                 check_cache = false;
4702                         }
4703
4704                         if (index == next_cached) {
4705                                 /*
4706                                  * TODO: Send a whole batch of pages to be read
4707                                  * by the cache.
4708                                  */
4709                                 struct folio *folio = readahead_folio(ractl);
4710
4711                                 last_batch_size = folio_nr_pages(folio);
4712                                 if (cifs_readpage_from_fscache(ractl->mapping->host,
4713                                                                &folio->page) < 0) {
4714                                         /*
4715                                          * TODO: Deal with cache read failure
4716                                          * here, but for the moment, delegate
4717                                          * that to readpage.
4718                                          */
4719                                         caching = false;
4720                                 }
4721                                 folio_unlock(folio);
4722                                 next_cached++;
4723                                 cache_nr_pages--;
4724                                 if (cache_nr_pages == 0)
4725                                         check_cache = true;
4726                                 continue;
4727                         }
4728                 }
4729
4730                 if (open_file->invalidHandle) {
4731                         rc = cifs_reopen_file(open_file, true);
4732                         if (rc) {
4733                                 if (rc == -EAGAIN)
4734                                         continue;
4735                                 break;
4736                         }
4737                 }
4738
4739                 if (cifs_sb->ctx->rsize == 0)
4740                         cifs_sb->ctx->rsize =
4741                                 server->ops->negotiate_rsize(tlink_tcon(open_file->tlink),
4742                                                              cifs_sb->ctx);
4743
4744                 rc = server->ops->wait_mtu_credits(server, cifs_sb->ctx->rsize,
4745                                                    &rsize, credits);
4746                 if (rc)
4747                         break;
4748                 nr_pages = min_t(size_t, rsize / PAGE_SIZE, readahead_count(ractl));
4749                 nr_pages = min_t(size_t, nr_pages, next_cached - index);
4750
4751                 /*
4752                  * Give up immediately if rsize is too small to read an entire
4753                  * page. The VFS will fall back to readpage. We should never
4754                  * reach this point however since we set ra_pages to 0 when the
4755                  * rsize is smaller than a cache page.
4756                  */
4757                 if (unlikely(!nr_pages)) {
4758                         add_credits_and_wake_if(server, credits, 0);
4759                         break;
4760                 }
4761
4762                 rdata = cifs_readdata_alloc(nr_pages, cifs_readv_complete);
4763                 if (!rdata) {
4764                         /* best to give up if we're out of mem */
4765                         add_credits_and_wake_if(server, credits, 0);
4766                         break;
4767                 }
4768
4769                 got = __readahead_batch(ractl, rdata->pages, nr_pages);
4770                 if (got != nr_pages) {
4771                         pr_warn("__readahead_batch() returned %u/%u\n",
4772                                 got, nr_pages);
4773                         nr_pages = got;
4774                 }
4775
4776                 rdata->nr_pages = nr_pages;
4777                 rdata->bytes    = readahead_batch_length(ractl);
4778                 rdata->cfile    = cifsFileInfo_get(open_file);
4779                 rdata->server   = server;
4780                 rdata->mapping  = ractl->mapping;
4781                 rdata->offset   = readahead_pos(ractl);
4782                 rdata->pid      = pid;
4783                 rdata->pagesz   = PAGE_SIZE;
4784                 rdata->tailsz   = PAGE_SIZE;
4785                 rdata->read_into_pages = cifs_readpages_read_into_pages;
4786                 rdata->copy_into_pages = cifs_readpages_copy_into_pages;
4787                 rdata->credits  = credits_on_stack;
4788
4789                 rc = adjust_credits(server, &rdata->credits, rdata->bytes);
4790                 if (!rc) {
4791                         if (rdata->cfile->invalidHandle)
4792                                 rc = -EAGAIN;
4793                         else
4794                                 rc = server->ops->async_readv(rdata);
4795                 }
4796
4797                 if (rc) {
4798                         add_credits_and_wake_if(server, &rdata->credits, 0);
4799                         for (i = 0; i < rdata->nr_pages; i++) {
4800                                 page = rdata->pages[i];
4801                                 unlock_page(page);
4802                                 put_page(page);
4803                         }
4804                         /* Fallback to the readpage in error/reconnect cases */
4805                         kref_put(&rdata->refcount, cifs_readdata_release);
4806                         break;
4807                 }
4808
4809                 kref_put(&rdata->refcount, cifs_readdata_release);
4810                 last_batch_size = nr_pages;
4811         }
4812
4813         free_xid(xid);
4814 }
4815
4816 /*
4817  * cifs_readpage_worker must be called with the page pinned
4818  */
4819 static int cifs_readpage_worker(struct file *file, struct page *page,
4820         loff_t *poffset)
4821 {
4822         char *read_data;
4823         int rc;
4824
4825         /* Is the page cached? */
4826         rc = cifs_readpage_from_fscache(file_inode(file), page);
4827         if (rc == 0)
4828                 goto read_complete;
4829
4830         read_data = kmap(page);
4831         /* for reads over a certain size could initiate async read ahead */
4832
4833         rc = cifs_read(file, read_data, PAGE_SIZE, poffset);
4834
4835         if (rc < 0)
4836                 goto io_error;
4837         else
4838                 cifs_dbg(FYI, "Bytes read %d\n", rc);
4839
4840         /* we do not want atime to be less than mtime, it broke some apps */
4841         file_inode(file)->i_atime = current_time(file_inode(file));
4842         if (timespec64_compare(&(file_inode(file)->i_atime), &(file_inode(file)->i_mtime)))
4843                 file_inode(file)->i_atime = file_inode(file)->i_mtime;
4844         else
4845                 file_inode(file)->i_atime = current_time(file_inode(file));
4846
4847         if (PAGE_SIZE > rc)
4848                 memset(read_data + rc, 0, PAGE_SIZE - rc);
4849
4850         flush_dcache_page(page);
4851         SetPageUptodate(page);
4852
4853         /* send this page to the cache */
4854         cifs_readpage_to_fscache(file_inode(file), page);
4855
4856         rc = 0;
4857
4858 io_error:
4859         kunmap(page);
4860         unlock_page(page);
4861
4862 read_complete:
4863         return rc;
4864 }
4865
4866 static int cifs_read_folio(struct file *file, struct folio *folio)
4867 {
4868         struct page *page = &folio->page;
4869         loff_t offset = page_file_offset(page);
4870         int rc = -EACCES;
4871         unsigned int xid;
4872
4873         xid = get_xid();
4874
4875         if (file->private_data == NULL) {
4876                 rc = -EBADF;
4877                 free_xid(xid);
4878                 return rc;
4879         }
4880
4881         cifs_dbg(FYI, "read_folio %p at offset %d 0x%x\n",
4882                  page, (int)offset, (int)offset);
4883
4884         rc = cifs_readpage_worker(file, page, &offset);
4885
4886         free_xid(xid);
4887         return rc;
4888 }
4889
4890 static int is_inode_writable(struct cifsInodeInfo *cifs_inode)
4891 {
4892         struct cifsFileInfo *open_file;
4893
4894         spin_lock(&cifs_inode->open_file_lock);
4895         list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
4896                 if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) {
4897                         spin_unlock(&cifs_inode->open_file_lock);
4898                         return 1;
4899                 }
4900         }
4901         spin_unlock(&cifs_inode->open_file_lock);
4902         return 0;
4903 }
4904
4905 /* We do not want to update the file size from server for inodes
4906    open for write - to avoid races with writepage extending
4907    the file - in the future we could consider allowing
4908    refreshing the inode only on increases in the file size
4909    but this is tricky to do without racing with writebehind
4910    page caching in the current Linux kernel design */
4911 bool is_size_safe_to_change(struct cifsInodeInfo *cifsInode, __u64 end_of_file)
4912 {
4913         if (!cifsInode)
4914                 return true;
4915
4916         if (is_inode_writable(cifsInode)) {
4917                 /* This inode is open for write at least once */
4918                 struct cifs_sb_info *cifs_sb;
4919
4920                 cifs_sb = CIFS_SB(cifsInode->netfs.inode.i_sb);
4921                 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DIRECT_IO) {
4922                         /* since no page cache to corrupt on directio
4923                         we can change size safely */
4924                         return true;
4925                 }
4926
4927                 if (i_size_read(&cifsInode->netfs.inode) < end_of_file)
4928                         return true;
4929
4930                 return false;
4931         } else
4932                 return true;
4933 }
4934
4935 static int cifs_write_begin(struct file *file, struct address_space *mapping,
4936                         loff_t pos, unsigned len,
4937                         struct page **pagep, void **fsdata)
4938 {
4939         int oncethru = 0;
4940         pgoff_t index = pos >> PAGE_SHIFT;
4941         loff_t offset = pos & (PAGE_SIZE - 1);
4942         loff_t page_start = pos & PAGE_MASK;
4943         loff_t i_size;
4944         struct page *page;
4945         int rc = 0;
4946
4947         cifs_dbg(FYI, "write_begin from %lld len %d\n", (long long)pos, len);
4948
4949 start:
4950         page = grab_cache_page_write_begin(mapping, index);
4951         if (!page) {
4952                 rc = -ENOMEM;
4953                 goto out;
4954         }
4955
4956         if (PageUptodate(page))
4957                 goto out;
4958
4959         /*
4960          * If we write a full page it will be up to date, no need to read from
4961          * the server. If the write is short, we'll end up doing a sync write
4962          * instead.
4963          */
4964         if (len == PAGE_SIZE)
4965                 goto out;
4966
4967         /*
4968          * optimize away the read when we have an oplock, and we're not
4969          * expecting to use any of the data we'd be reading in. That
4970          * is, when the page lies beyond the EOF, or straddles the EOF
4971          * and the write will cover all of the existing data.
4972          */
4973         if (CIFS_CACHE_READ(CIFS_I(mapping->host))) {
4974                 i_size = i_size_read(mapping->host);
4975                 if (page_start >= i_size ||
4976                     (offset == 0 && (pos + len) >= i_size)) {
4977                         zero_user_segments(page, 0, offset,
4978                                            offset + len,
4979                                            PAGE_SIZE);
4980                         /*
4981                          * PageChecked means that the parts of the page
4982                          * to which we're not writing are considered up
4983                          * to date. Once the data is copied to the
4984                          * page, it can be set uptodate.
4985                          */
4986                         SetPageChecked(page);
4987                         goto out;
4988                 }
4989         }
4990
4991         if ((file->f_flags & O_ACCMODE) != O_WRONLY && !oncethru) {
4992                 /*
4993                  * might as well read a page, it is fast enough. If we get
4994                  * an error, we don't need to return it. cifs_write_end will
4995                  * do a sync write instead since PG_uptodate isn't set.
4996                  */
4997                 cifs_readpage_worker(file, page, &page_start);
4998                 put_page(page);
4999                 oncethru = 1;
5000                 goto start;
5001         } else {
5002                 /* we could try using another file handle if there is one -
5003                    but how would we lock it to prevent close of that handle
5004                    racing with this read? In any case
5005                    this will be written out by write_end so is fine */
5006         }
5007 out:
5008         *pagep = page;
5009         return rc;
5010 }
5011
5012 static bool cifs_release_folio(struct folio *folio, gfp_t gfp)
5013 {
5014         if (folio_test_private(folio))
5015                 return 0;
5016         if (folio_test_fscache(folio)) {
5017                 if (current_is_kswapd() || !(gfp & __GFP_FS))
5018                         return false;
5019                 folio_wait_fscache(folio);
5020         }
5021         fscache_note_page_release(cifs_inode_cookie(folio->mapping->host));
5022         return true;
5023 }
5024
5025 static void cifs_invalidate_folio(struct folio *folio, size_t offset,
5026                                  size_t length)
5027 {
5028         folio_wait_fscache(folio);
5029 }
5030
5031 static int cifs_launder_folio(struct folio *folio)
5032 {
5033         int rc = 0;
5034         loff_t range_start = folio_pos(folio);
5035         loff_t range_end = range_start + folio_size(folio);
5036         struct writeback_control wbc = {
5037                 .sync_mode = WB_SYNC_ALL,
5038                 .nr_to_write = 0,
5039                 .range_start = range_start,
5040                 .range_end = range_end,
5041         };
5042
5043         cifs_dbg(FYI, "Launder page: %lu\n", folio->index);
5044
5045         if (folio_clear_dirty_for_io(folio))
5046                 rc = cifs_writepage_locked(&folio->page, &wbc);
5047
5048         folio_wait_fscache(folio);
5049         return rc;
5050 }
5051
5052 void cifs_oplock_break(struct work_struct *work)
5053 {
5054         struct cifsFileInfo *cfile = container_of(work, struct cifsFileInfo,
5055                                                   oplock_break);
5056         struct inode *inode = d_inode(cfile->dentry);
5057         struct cifsInodeInfo *cinode = CIFS_I(inode);
5058         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
5059         struct TCP_Server_Info *server = tcon->ses->server;
5060         int rc = 0;
5061         bool purge_cache = false;
5062
5063         wait_on_bit(&cinode->flags, CIFS_INODE_PENDING_WRITERS,
5064                         TASK_UNINTERRUPTIBLE);
5065
5066         server->ops->downgrade_oplock(server, cinode, cfile->oplock_level,
5067                                       cfile->oplock_epoch, &purge_cache);
5068
5069         if (!CIFS_CACHE_WRITE(cinode) && CIFS_CACHE_READ(cinode) &&
5070                                                 cifs_has_mand_locks(cinode)) {
5071                 cifs_dbg(FYI, "Reset oplock to None for inode=%p due to mand locks\n",
5072                          inode);
5073                 cinode->oplock = 0;
5074         }
5075
5076         if (inode && S_ISREG(inode->i_mode)) {
5077                 if (CIFS_CACHE_READ(cinode))
5078                         break_lease(inode, O_RDONLY);
5079                 else
5080                         break_lease(inode, O_WRONLY);
5081                 rc = filemap_fdatawrite(inode->i_mapping);
5082                 if (!CIFS_CACHE_READ(cinode) || purge_cache) {
5083                         rc = filemap_fdatawait(inode->i_mapping);
5084                         mapping_set_error(inode->i_mapping, rc);
5085                         cifs_zap_mapping(inode);
5086                 }
5087                 cifs_dbg(FYI, "Oplock flush inode %p rc %d\n", inode, rc);
5088                 if (CIFS_CACHE_WRITE(cinode))
5089                         goto oplock_break_ack;
5090         }
5091
5092         rc = cifs_push_locks(cfile);
5093         if (rc)
5094                 cifs_dbg(VFS, "Push locks rc = %d\n", rc);
5095
5096 oplock_break_ack:
5097         /*
5098          * releasing stale oplock after recent reconnect of smb session using
5099          * a now incorrect file handle is not a data integrity issue but do
5100          * not bother sending an oplock release if session to server still is
5101          * disconnected since oplock already released by the server
5102          */
5103         if (!cfile->oplock_break_cancelled) {
5104                 rc = tcon->ses->server->ops->oplock_response(tcon, &cfile->fid,
5105                                                              cinode);
5106                 cifs_dbg(FYI, "Oplock release rc = %d\n", rc);
5107         }
5108
5109         _cifsFileInfo_put(cfile, false /* do not wait for ourself */, false);
5110         cifs_done_oplock_break(cinode);
5111 }
5112
5113 /*
5114  * The presence of cifs_direct_io() in the address space ops vector
5115  * allowes open() O_DIRECT flags which would have failed otherwise.
5116  *
5117  * In the non-cached mode (mount with cache=none), we shunt off direct read and write requests
5118  * so this method should never be called.
5119  *
5120  * Direct IO is not yet supported in the cached mode.
5121  */
5122 static ssize_t
5123 cifs_direct_io(struct kiocb *iocb, struct iov_iter *iter)
5124 {
5125         /*
5126          * FIXME
5127          * Eventually need to support direct IO for non forcedirectio mounts
5128          */
5129         return -EINVAL;
5130 }
5131
5132 static int cifs_swap_activate(struct swap_info_struct *sis,
5133                               struct file *swap_file, sector_t *span)
5134 {
5135         struct cifsFileInfo *cfile = swap_file->private_data;
5136         struct inode *inode = swap_file->f_mapping->host;
5137         unsigned long blocks;
5138         long long isize;
5139
5140         cifs_dbg(FYI, "swap activate\n");
5141
5142         if (!swap_file->f_mapping->a_ops->swap_rw)
5143                 /* Cannot support swap */
5144                 return -EINVAL;
5145
5146         spin_lock(&inode->i_lock);
5147         blocks = inode->i_blocks;
5148         isize = inode->i_size;
5149         spin_unlock(&inode->i_lock);
5150         if (blocks*512 < isize) {
5151                 pr_warn("swap activate: swapfile has holes\n");
5152                 return -EINVAL;
5153         }
5154         *span = sis->pages;
5155
5156         pr_warn_once("Swap support over SMB3 is experimental\n");
5157
5158         /*
5159          * TODO: consider adding ACL (or documenting how) to prevent other
5160          * users (on this or other systems) from reading it
5161          */
5162
5163
5164         /* TODO: add sk_set_memalloc(inet) or similar */
5165
5166         if (cfile)
5167                 cfile->swapfile = true;
5168         /*
5169          * TODO: Since file already open, we can't open with DENY_ALL here
5170          * but we could add call to grab a byte range lock to prevent others
5171          * from reading or writing the file
5172          */
5173
5174         sis->flags |= SWP_FS_OPS;
5175         return add_swap_extent(sis, 0, sis->max, 0);
5176 }
5177
5178 static void cifs_swap_deactivate(struct file *file)
5179 {
5180         struct cifsFileInfo *cfile = file->private_data;
5181
5182         cifs_dbg(FYI, "swap deactivate\n");
5183
5184         /* TODO: undo sk_set_memalloc(inet) will eventually be needed */
5185
5186         if (cfile)
5187                 cfile->swapfile = false;
5188
5189         /* do we need to unpin (or unlock) the file */
5190 }
5191
5192 /*
5193  * Mark a page as having been made dirty and thus needing writeback.  We also
5194  * need to pin the cache object to write back to.
5195  */
5196 #ifdef CONFIG_CIFS_FSCACHE
5197 static bool cifs_dirty_folio(struct address_space *mapping, struct folio *folio)
5198 {
5199         return fscache_dirty_folio(mapping, folio,
5200                                         cifs_inode_cookie(mapping->host));
5201 }
5202 #else
5203 #define cifs_dirty_folio filemap_dirty_folio
5204 #endif
5205
5206 const struct address_space_operations cifs_addr_ops = {
5207         .read_folio = cifs_read_folio,
5208         .readahead = cifs_readahead,
5209         .writepage = cifs_writepage,
5210         .writepages = cifs_writepages,
5211         .write_begin = cifs_write_begin,
5212         .write_end = cifs_write_end,
5213         .dirty_folio = cifs_dirty_folio,
5214         .release_folio = cifs_release_folio,
5215         .direct_IO = cifs_direct_io,
5216         .invalidate_folio = cifs_invalidate_folio,
5217         .launder_folio = cifs_launder_folio,
5218         /*
5219          * TODO: investigate and if useful we could add an cifs_migratePage
5220          * helper (under an CONFIG_MIGRATION) in the future, and also
5221          * investigate and add an is_dirty_writeback helper if needed
5222          */
5223         .swap_activate = cifs_swap_activate,
5224         .swap_deactivate = cifs_swap_deactivate,
5225 };
5226
5227 /*
5228  * cifs_readahead requires the server to support a buffer large enough to
5229  * contain the header plus one complete page of data.  Otherwise, we need
5230  * to leave cifs_readahead out of the address space operations.
5231  */
5232 const struct address_space_operations cifs_addr_ops_smallbuf = {
5233         .read_folio = cifs_read_folio,
5234         .writepage = cifs_writepage,
5235         .writepages = cifs_writepages,
5236         .write_begin = cifs_write_begin,
5237         .write_end = cifs_write_end,
5238         .dirty_folio = cifs_dirty_folio,
5239         .release_folio = cifs_release_folio,
5240         .invalidate_folio = cifs_invalidate_folio,
5241         .launder_folio = cifs_launder_folio,
5242 };