cifs: account for primary channel in the interface list
[platform/kernel/linux-rpi.git] / fs / smb / client / file.c
1 // SPDX-License-Identifier: LGPL-2.1
2 /*
3  *
4  *   vfs operations that deal with files
5  *
6  *   Copyright (C) International Business Machines  Corp., 2002,2010
7  *   Author(s): Steve French (sfrench@us.ibm.com)
8  *              Jeremy Allison (jra@samba.org)
9  *
10  */
11 #include <linux/fs.h>
12 #include <linux/filelock.h>
13 #include <linux/backing-dev.h>
14 #include <linux/stat.h>
15 #include <linux/fcntl.h>
16 #include <linux/pagemap.h>
17 #include <linux/pagevec.h>
18 #include <linux/writeback.h>
19 #include <linux/task_io_accounting_ops.h>
20 #include <linux/delay.h>
21 #include <linux/mount.h>
22 #include <linux/slab.h>
23 #include <linux/swap.h>
24 #include <linux/mm.h>
25 #include <asm/div64.h>
26 #include "cifsfs.h"
27 #include "cifspdu.h"
28 #include "cifsglob.h"
29 #include "cifsproto.h"
30 #include "smb2proto.h"
31 #include "cifs_unicode.h"
32 #include "cifs_debug.h"
33 #include "cifs_fs_sb.h"
34 #include "fscache.h"
35 #include "smbdirect.h"
36 #include "fs_context.h"
37 #include "cifs_ioctl.h"
38 #include "cached_dir.h"
39
40 /*
41  * Remove the dirty flags from a span of pages.
42  */
43 static void cifs_undirty_folios(struct inode *inode, loff_t start, unsigned int len)
44 {
45         struct address_space *mapping = inode->i_mapping;
46         struct folio *folio;
47         pgoff_t end;
48
49         XA_STATE(xas, &mapping->i_pages, start / PAGE_SIZE);
50
51         rcu_read_lock();
52
53         end = (start + len - 1) / PAGE_SIZE;
54         xas_for_each_marked(&xas, folio, end, PAGECACHE_TAG_DIRTY) {
55                 if (xas_retry(&xas, folio))
56                         continue;
57                 xas_pause(&xas);
58                 rcu_read_unlock();
59                 folio_lock(folio);
60                 folio_clear_dirty_for_io(folio);
61                 folio_unlock(folio);
62                 rcu_read_lock();
63         }
64
65         rcu_read_unlock();
66 }
67
68 /*
69  * Completion of write to server.
70  */
71 void cifs_pages_written_back(struct inode *inode, loff_t start, unsigned int len)
72 {
73         struct address_space *mapping = inode->i_mapping;
74         struct folio *folio;
75         pgoff_t end;
76
77         XA_STATE(xas, &mapping->i_pages, start / PAGE_SIZE);
78
79         if (!len)
80                 return;
81
82         rcu_read_lock();
83
84         end = (start + len - 1) / PAGE_SIZE;
85         xas_for_each(&xas, folio, end) {
86                 if (xas_retry(&xas, folio))
87                         continue;
88                 if (!folio_test_writeback(folio)) {
89                         WARN_ONCE(1, "bad %x @%llx page %lx %lx\n",
90                                   len, start, folio_index(folio), end);
91                         continue;
92                 }
93
94                 folio_detach_private(folio);
95                 folio_end_writeback(folio);
96         }
97
98         rcu_read_unlock();
99 }
100
101 /*
102  * Failure of write to server.
103  */
104 void cifs_pages_write_failed(struct inode *inode, loff_t start, unsigned int len)
105 {
106         struct address_space *mapping = inode->i_mapping;
107         struct folio *folio;
108         pgoff_t end;
109
110         XA_STATE(xas, &mapping->i_pages, start / PAGE_SIZE);
111
112         if (!len)
113                 return;
114
115         rcu_read_lock();
116
117         end = (start + len - 1) / PAGE_SIZE;
118         xas_for_each(&xas, folio, end) {
119                 if (xas_retry(&xas, folio))
120                         continue;
121                 if (!folio_test_writeback(folio)) {
122                         WARN_ONCE(1, "bad %x @%llx page %lx %lx\n",
123                                   len, start, folio_index(folio), end);
124                         continue;
125                 }
126
127                 folio_set_error(folio);
128                 folio_end_writeback(folio);
129         }
130
131         rcu_read_unlock();
132 }
133
134 /*
135  * Redirty pages after a temporary failure.
136  */
137 void cifs_pages_write_redirty(struct inode *inode, loff_t start, unsigned int len)
138 {
139         struct address_space *mapping = inode->i_mapping;
140         struct folio *folio;
141         pgoff_t end;
142
143         XA_STATE(xas, &mapping->i_pages, start / PAGE_SIZE);
144
145         if (!len)
146                 return;
147
148         rcu_read_lock();
149
150         end = (start + len - 1) / PAGE_SIZE;
151         xas_for_each(&xas, folio, end) {
152                 if (!folio_test_writeback(folio)) {
153                         WARN_ONCE(1, "bad %x @%llx page %lx %lx\n",
154                                   len, start, folio_index(folio), end);
155                         continue;
156                 }
157
158                 filemap_dirty_folio(folio->mapping, folio);
159                 folio_end_writeback(folio);
160         }
161
162         rcu_read_unlock();
163 }
164
165 /*
166  * Mark as invalid, all open files on tree connections since they
167  * were closed when session to server was lost.
168  */
169 void
170 cifs_mark_open_files_invalid(struct cifs_tcon *tcon)
171 {
172         struct cifsFileInfo *open_file = NULL;
173         struct list_head *tmp;
174         struct list_head *tmp1;
175
176         /* only send once per connect */
177         spin_lock(&tcon->tc_lock);
178         if (tcon->status != TID_NEED_RECON) {
179                 spin_unlock(&tcon->tc_lock);
180                 return;
181         }
182         tcon->status = TID_IN_FILES_INVALIDATE;
183         spin_unlock(&tcon->tc_lock);
184
185         /* list all files open on tree connection and mark them invalid */
186         spin_lock(&tcon->open_file_lock);
187         list_for_each_safe(tmp, tmp1, &tcon->openFileList) {
188                 open_file = list_entry(tmp, struct cifsFileInfo, tlist);
189                 open_file->invalidHandle = true;
190                 open_file->oplock_break_cancelled = true;
191         }
192         spin_unlock(&tcon->open_file_lock);
193
194         invalidate_all_cached_dirs(tcon);
195         spin_lock(&tcon->tc_lock);
196         if (tcon->status == TID_IN_FILES_INVALIDATE)
197                 tcon->status = TID_NEED_TCON;
198         spin_unlock(&tcon->tc_lock);
199
200         /*
201          * BB Add call to invalidate_inodes(sb) for all superblocks mounted
202          * to this tcon.
203          */
204 }
205
206 static inline int cifs_convert_flags(unsigned int flags)
207 {
208         if ((flags & O_ACCMODE) == O_RDONLY)
209                 return GENERIC_READ;
210         else if ((flags & O_ACCMODE) == O_WRONLY)
211                 return GENERIC_WRITE;
212         else if ((flags & O_ACCMODE) == O_RDWR) {
213                 /* GENERIC_ALL is too much permission to request
214                    can cause unnecessary access denied on create */
215                 /* return GENERIC_ALL; */
216                 return (GENERIC_READ | GENERIC_WRITE);
217         }
218
219         return (READ_CONTROL | FILE_WRITE_ATTRIBUTES | FILE_READ_ATTRIBUTES |
220                 FILE_WRITE_EA | FILE_APPEND_DATA | FILE_WRITE_DATA |
221                 FILE_READ_DATA);
222 }
223
224 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
225 static u32 cifs_posix_convert_flags(unsigned int flags)
226 {
227         u32 posix_flags = 0;
228
229         if ((flags & O_ACCMODE) == O_RDONLY)
230                 posix_flags = SMB_O_RDONLY;
231         else if ((flags & O_ACCMODE) == O_WRONLY)
232                 posix_flags = SMB_O_WRONLY;
233         else if ((flags & O_ACCMODE) == O_RDWR)
234                 posix_flags = SMB_O_RDWR;
235
236         if (flags & O_CREAT) {
237                 posix_flags |= SMB_O_CREAT;
238                 if (flags & O_EXCL)
239                         posix_flags |= SMB_O_EXCL;
240         } else if (flags & O_EXCL)
241                 cifs_dbg(FYI, "Application %s pid %d has incorrectly set O_EXCL flag but not O_CREAT on file open. Ignoring O_EXCL\n",
242                          current->comm, current->tgid);
243
244         if (flags & O_TRUNC)
245                 posix_flags |= SMB_O_TRUNC;
246         /* be safe and imply O_SYNC for O_DSYNC */
247         if (flags & O_DSYNC)
248                 posix_flags |= SMB_O_SYNC;
249         if (flags & O_DIRECTORY)
250                 posix_flags |= SMB_O_DIRECTORY;
251         if (flags & O_NOFOLLOW)
252                 posix_flags |= SMB_O_NOFOLLOW;
253         if (flags & O_DIRECT)
254                 posix_flags |= SMB_O_DIRECT;
255
256         return posix_flags;
257 }
258 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
259
260 static inline int cifs_get_disposition(unsigned int flags)
261 {
262         if ((flags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL))
263                 return FILE_CREATE;
264         else if ((flags & (O_CREAT | O_TRUNC)) == (O_CREAT | O_TRUNC))
265                 return FILE_OVERWRITE_IF;
266         else if ((flags & O_CREAT) == O_CREAT)
267                 return FILE_OPEN_IF;
268         else if ((flags & O_TRUNC) == O_TRUNC)
269                 return FILE_OVERWRITE;
270         else
271                 return FILE_OPEN;
272 }
273
274 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
275 int cifs_posix_open(const char *full_path, struct inode **pinode,
276                         struct super_block *sb, int mode, unsigned int f_flags,
277                         __u32 *poplock, __u16 *pnetfid, unsigned int xid)
278 {
279         int rc;
280         FILE_UNIX_BASIC_INFO *presp_data;
281         __u32 posix_flags = 0;
282         struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
283         struct cifs_fattr fattr;
284         struct tcon_link *tlink;
285         struct cifs_tcon *tcon;
286
287         cifs_dbg(FYI, "posix open %s\n", full_path);
288
289         presp_data = kzalloc(sizeof(FILE_UNIX_BASIC_INFO), GFP_KERNEL);
290         if (presp_data == NULL)
291                 return -ENOMEM;
292
293         tlink = cifs_sb_tlink(cifs_sb);
294         if (IS_ERR(tlink)) {
295                 rc = PTR_ERR(tlink);
296                 goto posix_open_ret;
297         }
298
299         tcon = tlink_tcon(tlink);
300         mode &= ~current_umask();
301
302         posix_flags = cifs_posix_convert_flags(f_flags);
303         rc = CIFSPOSIXCreate(xid, tcon, posix_flags, mode, pnetfid, presp_data,
304                              poplock, full_path, cifs_sb->local_nls,
305                              cifs_remap(cifs_sb));
306         cifs_put_tlink(tlink);
307
308         if (rc)
309                 goto posix_open_ret;
310
311         if (presp_data->Type == cpu_to_le32(-1))
312                 goto posix_open_ret; /* open ok, caller does qpathinfo */
313
314         if (!pinode)
315                 goto posix_open_ret; /* caller does not need info */
316
317         cifs_unix_basic_to_fattr(&fattr, presp_data, cifs_sb);
318
319         /* get new inode and set it up */
320         if (*pinode == NULL) {
321                 cifs_fill_uniqueid(sb, &fattr);
322                 *pinode = cifs_iget(sb, &fattr);
323                 if (!*pinode) {
324                         rc = -ENOMEM;
325                         goto posix_open_ret;
326                 }
327         } else {
328                 cifs_revalidate_mapping(*pinode);
329                 rc = cifs_fattr_to_inode(*pinode, &fattr);
330         }
331
332 posix_open_ret:
333         kfree(presp_data);
334         return rc;
335 }
336 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
337
338 static int cifs_nt_open(const char *full_path, struct inode *inode, struct cifs_sb_info *cifs_sb,
339                         struct cifs_tcon *tcon, unsigned int f_flags, __u32 *oplock,
340                         struct cifs_fid *fid, unsigned int xid, struct cifs_open_info_data *buf)
341 {
342         int rc;
343         int desired_access;
344         int disposition;
345         int create_options = CREATE_NOT_DIR;
346         struct TCP_Server_Info *server = tcon->ses->server;
347         struct cifs_open_parms oparms;
348
349         if (!server->ops->open)
350                 return -ENOSYS;
351
352         desired_access = cifs_convert_flags(f_flags);
353
354 /*********************************************************************
355  *  open flag mapping table:
356  *
357  *      POSIX Flag            CIFS Disposition
358  *      ----------            ----------------
359  *      O_CREAT               FILE_OPEN_IF
360  *      O_CREAT | O_EXCL      FILE_CREATE
361  *      O_CREAT | O_TRUNC     FILE_OVERWRITE_IF
362  *      O_TRUNC               FILE_OVERWRITE
363  *      none of the above     FILE_OPEN
364  *
365  *      Note that there is not a direct match between disposition
366  *      FILE_SUPERSEDE (ie create whether or not file exists although
367  *      O_CREAT | O_TRUNC is similar but truncates the existing
368  *      file rather than creating a new file as FILE_SUPERSEDE does
369  *      (which uses the attributes / metadata passed in on open call)
370  *?
371  *?  O_SYNC is a reasonable match to CIFS writethrough flag
372  *?  and the read write flags match reasonably.  O_LARGEFILE
373  *?  is irrelevant because largefile support is always used
374  *?  by this client. Flags O_APPEND, O_DIRECT, O_DIRECTORY,
375  *       O_FASYNC, O_NOFOLLOW, O_NONBLOCK need further investigation
376  *********************************************************************/
377
378         disposition = cifs_get_disposition(f_flags);
379
380         /* BB pass O_SYNC flag through on file attributes .. BB */
381
382         /* O_SYNC also has bit for O_DSYNC so following check picks up either */
383         if (f_flags & O_SYNC)
384                 create_options |= CREATE_WRITE_THROUGH;
385
386         if (f_flags & O_DIRECT)
387                 create_options |= CREATE_NO_BUFFER;
388
389         oparms = (struct cifs_open_parms) {
390                 .tcon = tcon,
391                 .cifs_sb = cifs_sb,
392                 .desired_access = desired_access,
393                 .create_options = cifs_create_options(cifs_sb, create_options),
394                 .disposition = disposition,
395                 .path = full_path,
396                 .fid = fid,
397         };
398
399         rc = server->ops->open(xid, &oparms, oplock, buf);
400         if (rc)
401                 return rc;
402
403         /* TODO: Add support for calling posix query info but with passing in fid */
404         if (tcon->unix_ext)
405                 rc = cifs_get_inode_info_unix(&inode, full_path, inode->i_sb,
406                                               xid);
407         else
408                 rc = cifs_get_inode_info(&inode, full_path, buf, inode->i_sb,
409                                          xid, fid);
410
411         if (rc) {
412                 server->ops->close(xid, tcon, fid);
413                 if (rc == -ESTALE)
414                         rc = -EOPENSTALE;
415         }
416
417         return rc;
418 }
419
420 static bool
421 cifs_has_mand_locks(struct cifsInodeInfo *cinode)
422 {
423         struct cifs_fid_locks *cur;
424         bool has_locks = false;
425
426         down_read(&cinode->lock_sem);
427         list_for_each_entry(cur, &cinode->llist, llist) {
428                 if (!list_empty(&cur->locks)) {
429                         has_locks = true;
430                         break;
431                 }
432         }
433         up_read(&cinode->lock_sem);
434         return has_locks;
435 }
436
437 void
438 cifs_down_write(struct rw_semaphore *sem)
439 {
440         while (!down_write_trylock(sem))
441                 msleep(10);
442 }
443
444 static void cifsFileInfo_put_work(struct work_struct *work);
445
446 struct cifsFileInfo *cifs_new_fileinfo(struct cifs_fid *fid, struct file *file,
447                                        struct tcon_link *tlink, __u32 oplock,
448                                        const char *symlink_target)
449 {
450         struct dentry *dentry = file_dentry(file);
451         struct inode *inode = d_inode(dentry);
452         struct cifsInodeInfo *cinode = CIFS_I(inode);
453         struct cifsFileInfo *cfile;
454         struct cifs_fid_locks *fdlocks;
455         struct cifs_tcon *tcon = tlink_tcon(tlink);
456         struct TCP_Server_Info *server = tcon->ses->server;
457
458         cfile = kzalloc(sizeof(struct cifsFileInfo), GFP_KERNEL);
459         if (cfile == NULL)
460                 return cfile;
461
462         fdlocks = kzalloc(sizeof(struct cifs_fid_locks), GFP_KERNEL);
463         if (!fdlocks) {
464                 kfree(cfile);
465                 return NULL;
466         }
467
468         if (symlink_target) {
469                 cfile->symlink_target = kstrdup(symlink_target, GFP_KERNEL);
470                 if (!cfile->symlink_target) {
471                         kfree(fdlocks);
472                         kfree(cfile);
473                         return NULL;
474                 }
475         }
476
477         INIT_LIST_HEAD(&fdlocks->locks);
478         fdlocks->cfile = cfile;
479         cfile->llist = fdlocks;
480
481         cfile->count = 1;
482         cfile->pid = current->tgid;
483         cfile->uid = current_fsuid();
484         cfile->dentry = dget(dentry);
485         cfile->f_flags = file->f_flags;
486         cfile->invalidHandle = false;
487         cfile->deferred_close_scheduled = false;
488         cfile->tlink = cifs_get_tlink(tlink);
489         INIT_WORK(&cfile->oplock_break, cifs_oplock_break);
490         INIT_WORK(&cfile->put, cifsFileInfo_put_work);
491         INIT_DELAYED_WORK(&cfile->deferred, smb2_deferred_work_close);
492         mutex_init(&cfile->fh_mutex);
493         spin_lock_init(&cfile->file_info_lock);
494
495         cifs_sb_active(inode->i_sb);
496
497         /*
498          * If the server returned a read oplock and we have mandatory brlocks,
499          * set oplock level to None.
500          */
501         if (server->ops->is_read_op(oplock) && cifs_has_mand_locks(cinode)) {
502                 cifs_dbg(FYI, "Reset oplock val from read to None due to mand locks\n");
503                 oplock = 0;
504         }
505
506         cifs_down_write(&cinode->lock_sem);
507         list_add(&fdlocks->llist, &cinode->llist);
508         up_write(&cinode->lock_sem);
509
510         spin_lock(&tcon->open_file_lock);
511         if (fid->pending_open->oplock != CIFS_OPLOCK_NO_CHANGE && oplock)
512                 oplock = fid->pending_open->oplock;
513         list_del(&fid->pending_open->olist);
514
515         fid->purge_cache = false;
516         server->ops->set_fid(cfile, fid, oplock);
517
518         list_add(&cfile->tlist, &tcon->openFileList);
519         atomic_inc(&tcon->num_local_opens);
520
521         /* if readable file instance put first in list*/
522         spin_lock(&cinode->open_file_lock);
523         if (file->f_mode & FMODE_READ)
524                 list_add(&cfile->flist, &cinode->openFileList);
525         else
526                 list_add_tail(&cfile->flist, &cinode->openFileList);
527         spin_unlock(&cinode->open_file_lock);
528         spin_unlock(&tcon->open_file_lock);
529
530         if (fid->purge_cache)
531                 cifs_zap_mapping(inode);
532
533         file->private_data = cfile;
534         return cfile;
535 }
536
537 struct cifsFileInfo *
538 cifsFileInfo_get(struct cifsFileInfo *cifs_file)
539 {
540         spin_lock(&cifs_file->file_info_lock);
541         cifsFileInfo_get_locked(cifs_file);
542         spin_unlock(&cifs_file->file_info_lock);
543         return cifs_file;
544 }
545
546 static void cifsFileInfo_put_final(struct cifsFileInfo *cifs_file)
547 {
548         struct inode *inode = d_inode(cifs_file->dentry);
549         struct cifsInodeInfo *cifsi = CIFS_I(inode);
550         struct cifsLockInfo *li, *tmp;
551         struct super_block *sb = inode->i_sb;
552
553         /*
554          * Delete any outstanding lock records. We'll lose them when the file
555          * is closed anyway.
556          */
557         cifs_down_write(&cifsi->lock_sem);
558         list_for_each_entry_safe(li, tmp, &cifs_file->llist->locks, llist) {
559                 list_del(&li->llist);
560                 cifs_del_lock_waiters(li);
561                 kfree(li);
562         }
563         list_del(&cifs_file->llist->llist);
564         kfree(cifs_file->llist);
565         up_write(&cifsi->lock_sem);
566
567         cifs_put_tlink(cifs_file->tlink);
568         dput(cifs_file->dentry);
569         cifs_sb_deactive(sb);
570         kfree(cifs_file->symlink_target);
571         kfree(cifs_file);
572 }
573
574 static void cifsFileInfo_put_work(struct work_struct *work)
575 {
576         struct cifsFileInfo *cifs_file = container_of(work,
577                         struct cifsFileInfo, put);
578
579         cifsFileInfo_put_final(cifs_file);
580 }
581
582 /**
583  * cifsFileInfo_put - release a reference of file priv data
584  *
585  * Always potentially wait for oplock handler. See _cifsFileInfo_put().
586  *
587  * @cifs_file:  cifs/smb3 specific info (eg refcounts) for an open file
588  */
589 void cifsFileInfo_put(struct cifsFileInfo *cifs_file)
590 {
591         _cifsFileInfo_put(cifs_file, true, true);
592 }
593
594 /**
595  * _cifsFileInfo_put - release a reference of file priv data
596  *
597  * This may involve closing the filehandle @cifs_file out on the
598  * server. Must be called without holding tcon->open_file_lock,
599  * cinode->open_file_lock and cifs_file->file_info_lock.
600  *
601  * If @wait_for_oplock_handler is true and we are releasing the last
602  * reference, wait for any running oplock break handler of the file
603  * and cancel any pending one.
604  *
605  * @cifs_file:  cifs/smb3 specific info (eg refcounts) for an open file
606  * @wait_oplock_handler: must be false if called from oplock_break_handler
607  * @offload:    not offloaded on close and oplock breaks
608  *
609  */
610 void _cifsFileInfo_put(struct cifsFileInfo *cifs_file,
611                        bool wait_oplock_handler, bool offload)
612 {
613         struct inode *inode = d_inode(cifs_file->dentry);
614         struct cifs_tcon *tcon = tlink_tcon(cifs_file->tlink);
615         struct TCP_Server_Info *server = tcon->ses->server;
616         struct cifsInodeInfo *cifsi = CIFS_I(inode);
617         struct super_block *sb = inode->i_sb;
618         struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
619         struct cifs_fid fid = {};
620         struct cifs_pending_open open;
621         bool oplock_break_cancelled;
622
623         spin_lock(&tcon->open_file_lock);
624         spin_lock(&cifsi->open_file_lock);
625         spin_lock(&cifs_file->file_info_lock);
626         if (--cifs_file->count > 0) {
627                 spin_unlock(&cifs_file->file_info_lock);
628                 spin_unlock(&cifsi->open_file_lock);
629                 spin_unlock(&tcon->open_file_lock);
630                 return;
631         }
632         spin_unlock(&cifs_file->file_info_lock);
633
634         if (server->ops->get_lease_key)
635                 server->ops->get_lease_key(inode, &fid);
636
637         /* store open in pending opens to make sure we don't miss lease break */
638         cifs_add_pending_open_locked(&fid, cifs_file->tlink, &open);
639
640         /* remove it from the lists */
641         list_del(&cifs_file->flist);
642         list_del(&cifs_file->tlist);
643         atomic_dec(&tcon->num_local_opens);
644
645         if (list_empty(&cifsi->openFileList)) {
646                 cifs_dbg(FYI, "closing last open instance for inode %p\n",
647                          d_inode(cifs_file->dentry));
648                 /*
649                  * In strict cache mode we need invalidate mapping on the last
650                  * close  because it may cause a error when we open this file
651                  * again and get at least level II oplock.
652                  */
653                 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO)
654                         set_bit(CIFS_INO_INVALID_MAPPING, &cifsi->flags);
655                 cifs_set_oplock_level(cifsi, 0);
656         }
657
658         spin_unlock(&cifsi->open_file_lock);
659         spin_unlock(&tcon->open_file_lock);
660
661         oplock_break_cancelled = wait_oplock_handler ?
662                 cancel_work_sync(&cifs_file->oplock_break) : false;
663
664         if (!tcon->need_reconnect && !cifs_file->invalidHandle) {
665                 struct TCP_Server_Info *server = tcon->ses->server;
666                 unsigned int xid;
667
668                 xid = get_xid();
669                 if (server->ops->close_getattr)
670                         server->ops->close_getattr(xid, tcon, cifs_file);
671                 else if (server->ops->close)
672                         server->ops->close(xid, tcon, &cifs_file->fid);
673                 _free_xid(xid);
674         }
675
676         if (oplock_break_cancelled)
677                 cifs_done_oplock_break(cifsi);
678
679         cifs_del_pending_open(&open);
680
681         if (offload)
682                 queue_work(fileinfo_put_wq, &cifs_file->put);
683         else
684                 cifsFileInfo_put_final(cifs_file);
685 }
686
687 int cifs_open(struct inode *inode, struct file *file)
688
689 {
690         int rc = -EACCES;
691         unsigned int xid;
692         __u32 oplock;
693         struct cifs_sb_info *cifs_sb;
694         struct TCP_Server_Info *server;
695         struct cifs_tcon *tcon;
696         struct tcon_link *tlink;
697         struct cifsFileInfo *cfile = NULL;
698         void *page;
699         const char *full_path;
700         bool posix_open_ok = false;
701         struct cifs_fid fid = {};
702         struct cifs_pending_open open;
703         struct cifs_open_info_data data = {};
704
705         xid = get_xid();
706
707         cifs_sb = CIFS_SB(inode->i_sb);
708         if (unlikely(cifs_forced_shutdown(cifs_sb))) {
709                 free_xid(xid);
710                 return -EIO;
711         }
712
713         tlink = cifs_sb_tlink(cifs_sb);
714         if (IS_ERR(tlink)) {
715                 free_xid(xid);
716                 return PTR_ERR(tlink);
717         }
718         tcon = tlink_tcon(tlink);
719         server = tcon->ses->server;
720
721         page = alloc_dentry_path();
722         full_path = build_path_from_dentry(file_dentry(file), page);
723         if (IS_ERR(full_path)) {
724                 rc = PTR_ERR(full_path);
725                 goto out;
726         }
727
728         cifs_dbg(FYI, "inode = 0x%p file flags are 0x%x for %s\n",
729                  inode, file->f_flags, full_path);
730
731         if (file->f_flags & O_DIRECT &&
732             cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO) {
733                 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_BRL)
734                         file->f_op = &cifs_file_direct_nobrl_ops;
735                 else
736                         file->f_op = &cifs_file_direct_ops;
737         }
738
739         /* Get the cached handle as SMB2 close is deferred */
740         rc = cifs_get_readable_path(tcon, full_path, &cfile);
741         if (rc == 0) {
742                 if (file->f_flags == cfile->f_flags) {
743                         file->private_data = cfile;
744                         spin_lock(&CIFS_I(inode)->deferred_lock);
745                         cifs_del_deferred_close(cfile);
746                         spin_unlock(&CIFS_I(inode)->deferred_lock);
747                         goto use_cache;
748                 } else {
749                         _cifsFileInfo_put(cfile, true, false);
750                 }
751         }
752
753         if (server->oplocks)
754                 oplock = REQ_OPLOCK;
755         else
756                 oplock = 0;
757
758 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
759         if (!tcon->broken_posix_open && tcon->unix_ext &&
760             cap_unix(tcon->ses) && (CIFS_UNIX_POSIX_PATH_OPS_CAP &
761                                 le64_to_cpu(tcon->fsUnixInfo.Capability))) {
762                 /* can not refresh inode info since size could be stale */
763                 rc = cifs_posix_open(full_path, &inode, inode->i_sb,
764                                 cifs_sb->ctx->file_mode /* ignored */,
765                                 file->f_flags, &oplock, &fid.netfid, xid);
766                 if (rc == 0) {
767                         cifs_dbg(FYI, "posix open succeeded\n");
768                         posix_open_ok = true;
769                 } else if ((rc == -EINVAL) || (rc == -EOPNOTSUPP)) {
770                         if (tcon->ses->serverNOS)
771                                 cifs_dbg(VFS, "server %s of type %s returned unexpected error on SMB posix open, disabling posix open support. Check if server update available.\n",
772                                          tcon->ses->ip_addr,
773                                          tcon->ses->serverNOS);
774                         tcon->broken_posix_open = true;
775                 } else if ((rc != -EIO) && (rc != -EREMOTE) &&
776                          (rc != -EOPNOTSUPP)) /* path not found or net err */
777                         goto out;
778                 /*
779                  * Else fallthrough to retry open the old way on network i/o
780                  * or DFS errors.
781                  */
782         }
783 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
784
785         if (server->ops->get_lease_key)
786                 server->ops->get_lease_key(inode, &fid);
787
788         cifs_add_pending_open(&fid, tlink, &open);
789
790         if (!posix_open_ok) {
791                 if (server->ops->get_lease_key)
792                         server->ops->get_lease_key(inode, &fid);
793
794                 rc = cifs_nt_open(full_path, inode, cifs_sb, tcon, file->f_flags, &oplock, &fid,
795                                   xid, &data);
796                 if (rc) {
797                         cifs_del_pending_open(&open);
798                         goto out;
799                 }
800         }
801
802         cfile = cifs_new_fileinfo(&fid, file, tlink, oplock, data.symlink_target);
803         if (cfile == NULL) {
804                 if (server->ops->close)
805                         server->ops->close(xid, tcon, &fid);
806                 cifs_del_pending_open(&open);
807                 rc = -ENOMEM;
808                 goto out;
809         }
810
811 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
812         if ((oplock & CIFS_CREATE_ACTION) && !posix_open_ok && tcon->unix_ext) {
813                 /*
814                  * Time to set mode which we can not set earlier due to
815                  * problems creating new read-only files.
816                  */
817                 struct cifs_unix_set_info_args args = {
818                         .mode   = inode->i_mode,
819                         .uid    = INVALID_UID, /* no change */
820                         .gid    = INVALID_GID, /* no change */
821                         .ctime  = NO_CHANGE_64,
822                         .atime  = NO_CHANGE_64,
823                         .mtime  = NO_CHANGE_64,
824                         .device = 0,
825                 };
826                 CIFSSMBUnixSetFileInfo(xid, tcon, &args, fid.netfid,
827                                        cfile->pid);
828         }
829 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
830
831 use_cache:
832         fscache_use_cookie(cifs_inode_cookie(file_inode(file)),
833                            file->f_mode & FMODE_WRITE);
834         if (file->f_flags & O_DIRECT &&
835             (!((file->f_flags & O_ACCMODE) != O_RDONLY) ||
836              file->f_flags & O_APPEND))
837                 cifs_invalidate_cache(file_inode(file),
838                                       FSCACHE_INVAL_DIO_WRITE);
839
840 out:
841         free_dentry_path(page);
842         free_xid(xid);
843         cifs_put_tlink(tlink);
844         cifs_free_open_info(&data);
845         return rc;
846 }
847
848 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
849 static int cifs_push_posix_locks(struct cifsFileInfo *cfile);
850 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
851
852 /*
853  * Try to reacquire byte range locks that were released when session
854  * to server was lost.
855  */
856 static int
857 cifs_relock_file(struct cifsFileInfo *cfile)
858 {
859         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
860         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
861         int rc = 0;
862 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
863         struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
864 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
865
866         down_read_nested(&cinode->lock_sem, SINGLE_DEPTH_NESTING);
867         if (cinode->can_cache_brlcks) {
868                 /* can cache locks - no need to relock */
869                 up_read(&cinode->lock_sem);
870                 return rc;
871         }
872
873 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
874         if (cap_unix(tcon->ses) &&
875             (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
876             ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
877                 rc = cifs_push_posix_locks(cfile);
878         else
879 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
880                 rc = tcon->ses->server->ops->push_mand_locks(cfile);
881
882         up_read(&cinode->lock_sem);
883         return rc;
884 }
885
886 static int
887 cifs_reopen_file(struct cifsFileInfo *cfile, bool can_flush)
888 {
889         int rc = -EACCES;
890         unsigned int xid;
891         __u32 oplock;
892         struct cifs_sb_info *cifs_sb;
893         struct cifs_tcon *tcon;
894         struct TCP_Server_Info *server;
895         struct cifsInodeInfo *cinode;
896         struct inode *inode;
897         void *page;
898         const char *full_path;
899         int desired_access;
900         int disposition = FILE_OPEN;
901         int create_options = CREATE_NOT_DIR;
902         struct cifs_open_parms oparms;
903
904         xid = get_xid();
905         mutex_lock(&cfile->fh_mutex);
906         if (!cfile->invalidHandle) {
907                 mutex_unlock(&cfile->fh_mutex);
908                 free_xid(xid);
909                 return 0;
910         }
911
912         inode = d_inode(cfile->dentry);
913         cifs_sb = CIFS_SB(inode->i_sb);
914         tcon = tlink_tcon(cfile->tlink);
915         server = tcon->ses->server;
916
917         /*
918          * Can not grab rename sem here because various ops, including those
919          * that already have the rename sem can end up causing writepage to get
920          * called and if the server was down that means we end up here, and we
921          * can never tell if the caller already has the rename_sem.
922          */
923         page = alloc_dentry_path();
924         full_path = build_path_from_dentry(cfile->dentry, page);
925         if (IS_ERR(full_path)) {
926                 mutex_unlock(&cfile->fh_mutex);
927                 free_dentry_path(page);
928                 free_xid(xid);
929                 return PTR_ERR(full_path);
930         }
931
932         cifs_dbg(FYI, "inode = 0x%p file flags 0x%x for %s\n",
933                  inode, cfile->f_flags, full_path);
934
935         if (tcon->ses->server->oplocks)
936                 oplock = REQ_OPLOCK;
937         else
938                 oplock = 0;
939
940 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
941         if (tcon->unix_ext && cap_unix(tcon->ses) &&
942             (CIFS_UNIX_POSIX_PATH_OPS_CAP &
943                                 le64_to_cpu(tcon->fsUnixInfo.Capability))) {
944                 /*
945                  * O_CREAT, O_EXCL and O_TRUNC already had their effect on the
946                  * original open. Must mask them off for a reopen.
947                  */
948                 unsigned int oflags = cfile->f_flags &
949                                                 ~(O_CREAT | O_EXCL | O_TRUNC);
950
951                 rc = cifs_posix_open(full_path, NULL, inode->i_sb,
952                                      cifs_sb->ctx->file_mode /* ignored */,
953                                      oflags, &oplock, &cfile->fid.netfid, xid);
954                 if (rc == 0) {
955                         cifs_dbg(FYI, "posix reopen succeeded\n");
956                         oparms.reconnect = true;
957                         goto reopen_success;
958                 }
959                 /*
960                  * fallthrough to retry open the old way on errors, especially
961                  * in the reconnect path it is important to retry hard
962                  */
963         }
964 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
965
966         desired_access = cifs_convert_flags(cfile->f_flags);
967
968         /* O_SYNC also has bit for O_DSYNC so following check picks up either */
969         if (cfile->f_flags & O_SYNC)
970                 create_options |= CREATE_WRITE_THROUGH;
971
972         if (cfile->f_flags & O_DIRECT)
973                 create_options |= CREATE_NO_BUFFER;
974
975         if (server->ops->get_lease_key)
976                 server->ops->get_lease_key(inode, &cfile->fid);
977
978         oparms = (struct cifs_open_parms) {
979                 .tcon = tcon,
980                 .cifs_sb = cifs_sb,
981                 .desired_access = desired_access,
982                 .create_options = cifs_create_options(cifs_sb, create_options),
983                 .disposition = disposition,
984                 .path = full_path,
985                 .fid = &cfile->fid,
986                 .reconnect = true,
987         };
988
989         /*
990          * Can not refresh inode by passing in file_info buf to be returned by
991          * ops->open and then calling get_inode_info with returned buf since
992          * file might have write behind data that needs to be flushed and server
993          * version of file size can be stale. If we knew for sure that inode was
994          * not dirty locally we could do this.
995          */
996         rc = server->ops->open(xid, &oparms, &oplock, NULL);
997         if (rc == -ENOENT && oparms.reconnect == false) {
998                 /* durable handle timeout is expired - open the file again */
999                 rc = server->ops->open(xid, &oparms, &oplock, NULL);
1000                 /* indicate that we need to relock the file */
1001                 oparms.reconnect = true;
1002         }
1003
1004         if (rc) {
1005                 mutex_unlock(&cfile->fh_mutex);
1006                 cifs_dbg(FYI, "cifs_reopen returned 0x%x\n", rc);
1007                 cifs_dbg(FYI, "oplock: %d\n", oplock);
1008                 goto reopen_error_exit;
1009         }
1010
1011 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
1012 reopen_success:
1013 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
1014         cfile->invalidHandle = false;
1015         mutex_unlock(&cfile->fh_mutex);
1016         cinode = CIFS_I(inode);
1017
1018         if (can_flush) {
1019                 rc = filemap_write_and_wait(inode->i_mapping);
1020                 if (!is_interrupt_error(rc))
1021                         mapping_set_error(inode->i_mapping, rc);
1022
1023                 if (tcon->posix_extensions)
1024                         rc = smb311_posix_get_inode_info(&inode, full_path, inode->i_sb, xid);
1025                 else if (tcon->unix_ext)
1026                         rc = cifs_get_inode_info_unix(&inode, full_path,
1027                                                       inode->i_sb, xid);
1028                 else
1029                         rc = cifs_get_inode_info(&inode, full_path, NULL,
1030                                                  inode->i_sb, xid, NULL);
1031         }
1032         /*
1033          * Else we are writing out data to server already and could deadlock if
1034          * we tried to flush data, and since we do not know if we have data that
1035          * would invalidate the current end of file on the server we can not go
1036          * to the server to get the new inode info.
1037          */
1038
1039         /*
1040          * If the server returned a read oplock and we have mandatory brlocks,
1041          * set oplock level to None.
1042          */
1043         if (server->ops->is_read_op(oplock) && cifs_has_mand_locks(cinode)) {
1044                 cifs_dbg(FYI, "Reset oplock val from read to None due to mand locks\n");
1045                 oplock = 0;
1046         }
1047
1048         server->ops->set_fid(cfile, &cfile->fid, oplock);
1049         if (oparms.reconnect)
1050                 cifs_relock_file(cfile);
1051
1052 reopen_error_exit:
1053         free_dentry_path(page);
1054         free_xid(xid);
1055         return rc;
1056 }
1057
1058 void smb2_deferred_work_close(struct work_struct *work)
1059 {
1060         struct cifsFileInfo *cfile = container_of(work,
1061                         struct cifsFileInfo, deferred.work);
1062
1063         spin_lock(&CIFS_I(d_inode(cfile->dentry))->deferred_lock);
1064         cifs_del_deferred_close(cfile);
1065         cfile->deferred_close_scheduled = false;
1066         spin_unlock(&CIFS_I(d_inode(cfile->dentry))->deferred_lock);
1067         _cifsFileInfo_put(cfile, true, false);
1068 }
1069
1070 int cifs_close(struct inode *inode, struct file *file)
1071 {
1072         struct cifsFileInfo *cfile;
1073         struct cifsInodeInfo *cinode = CIFS_I(inode);
1074         struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
1075         struct cifs_deferred_close *dclose;
1076
1077         cifs_fscache_unuse_inode_cookie(inode, file->f_mode & FMODE_WRITE);
1078
1079         if (file->private_data != NULL) {
1080                 cfile = file->private_data;
1081                 file->private_data = NULL;
1082                 dclose = kmalloc(sizeof(struct cifs_deferred_close), GFP_KERNEL);
1083                 if ((cifs_sb->ctx->closetimeo && cinode->oplock == CIFS_CACHE_RHW_FLG)
1084                     && cinode->lease_granted &&
1085                     !test_bit(CIFS_INO_CLOSE_ON_LOCK, &cinode->flags) &&
1086                     dclose) {
1087                         if (test_and_clear_bit(CIFS_INO_MODIFIED_ATTR, &cinode->flags)) {
1088                                 inode->i_mtime = inode_set_ctime_current(inode);
1089                         }
1090                         spin_lock(&cinode->deferred_lock);
1091                         cifs_add_deferred_close(cfile, dclose);
1092                         if (cfile->deferred_close_scheduled &&
1093                             delayed_work_pending(&cfile->deferred)) {
1094                                 /*
1095                                  * If there is no pending work, mod_delayed_work queues new work.
1096                                  * So, Increase the ref count to avoid use-after-free.
1097                                  */
1098                                 if (!mod_delayed_work(deferredclose_wq,
1099                                                 &cfile->deferred, cifs_sb->ctx->closetimeo))
1100                                         cifsFileInfo_get(cfile);
1101                         } else {
1102                                 /* Deferred close for files */
1103                                 queue_delayed_work(deferredclose_wq,
1104                                                 &cfile->deferred, cifs_sb->ctx->closetimeo);
1105                                 cfile->deferred_close_scheduled = true;
1106                                 spin_unlock(&cinode->deferred_lock);
1107                                 return 0;
1108                         }
1109                         spin_unlock(&cinode->deferred_lock);
1110                         _cifsFileInfo_put(cfile, true, false);
1111                 } else {
1112                         _cifsFileInfo_put(cfile, true, false);
1113                         kfree(dclose);
1114                 }
1115         }
1116
1117         /* return code from the ->release op is always ignored */
1118         return 0;
1119 }
1120
1121 void
1122 cifs_reopen_persistent_handles(struct cifs_tcon *tcon)
1123 {
1124         struct cifsFileInfo *open_file, *tmp;
1125         struct list_head tmp_list;
1126
1127         if (!tcon->use_persistent || !tcon->need_reopen_files)
1128                 return;
1129
1130         tcon->need_reopen_files = false;
1131
1132         cifs_dbg(FYI, "Reopen persistent handles\n");
1133         INIT_LIST_HEAD(&tmp_list);
1134
1135         /* list all files open on tree connection, reopen resilient handles  */
1136         spin_lock(&tcon->open_file_lock);
1137         list_for_each_entry(open_file, &tcon->openFileList, tlist) {
1138                 if (!open_file->invalidHandle)
1139                         continue;
1140                 cifsFileInfo_get(open_file);
1141                 list_add_tail(&open_file->rlist, &tmp_list);
1142         }
1143         spin_unlock(&tcon->open_file_lock);
1144
1145         list_for_each_entry_safe(open_file, tmp, &tmp_list, rlist) {
1146                 if (cifs_reopen_file(open_file, false /* do not flush */))
1147                         tcon->need_reopen_files = true;
1148                 list_del_init(&open_file->rlist);
1149                 cifsFileInfo_put(open_file);
1150         }
1151 }
1152
1153 int cifs_closedir(struct inode *inode, struct file *file)
1154 {
1155         int rc = 0;
1156         unsigned int xid;
1157         struct cifsFileInfo *cfile = file->private_data;
1158         struct cifs_tcon *tcon;
1159         struct TCP_Server_Info *server;
1160         char *buf;
1161
1162         cifs_dbg(FYI, "Closedir inode = 0x%p\n", inode);
1163
1164         if (cfile == NULL)
1165                 return rc;
1166
1167         xid = get_xid();
1168         tcon = tlink_tcon(cfile->tlink);
1169         server = tcon->ses->server;
1170
1171         cifs_dbg(FYI, "Freeing private data in close dir\n");
1172         spin_lock(&cfile->file_info_lock);
1173         if (server->ops->dir_needs_close(cfile)) {
1174                 cfile->invalidHandle = true;
1175                 spin_unlock(&cfile->file_info_lock);
1176                 if (server->ops->close_dir)
1177                         rc = server->ops->close_dir(xid, tcon, &cfile->fid);
1178                 else
1179                         rc = -ENOSYS;
1180                 cifs_dbg(FYI, "Closing uncompleted readdir with rc %d\n", rc);
1181                 /* not much we can do if it fails anyway, ignore rc */
1182                 rc = 0;
1183         } else
1184                 spin_unlock(&cfile->file_info_lock);
1185
1186         buf = cfile->srch_inf.ntwrk_buf_start;
1187         if (buf) {
1188                 cifs_dbg(FYI, "closedir free smb buf in srch struct\n");
1189                 cfile->srch_inf.ntwrk_buf_start = NULL;
1190                 if (cfile->srch_inf.smallBuf)
1191                         cifs_small_buf_release(buf);
1192                 else
1193                         cifs_buf_release(buf);
1194         }
1195
1196         cifs_put_tlink(cfile->tlink);
1197         kfree(file->private_data);
1198         file->private_data = NULL;
1199         /* BB can we lock the filestruct while this is going on? */
1200         free_xid(xid);
1201         return rc;
1202 }
1203
1204 static struct cifsLockInfo *
1205 cifs_lock_init(__u64 offset, __u64 length, __u8 type, __u16 flags)
1206 {
1207         struct cifsLockInfo *lock =
1208                 kmalloc(sizeof(struct cifsLockInfo), GFP_KERNEL);
1209         if (!lock)
1210                 return lock;
1211         lock->offset = offset;
1212         lock->length = length;
1213         lock->type = type;
1214         lock->pid = current->tgid;
1215         lock->flags = flags;
1216         INIT_LIST_HEAD(&lock->blist);
1217         init_waitqueue_head(&lock->block_q);
1218         return lock;
1219 }
1220
1221 void
1222 cifs_del_lock_waiters(struct cifsLockInfo *lock)
1223 {
1224         struct cifsLockInfo *li, *tmp;
1225         list_for_each_entry_safe(li, tmp, &lock->blist, blist) {
1226                 list_del_init(&li->blist);
1227                 wake_up(&li->block_q);
1228         }
1229 }
1230
1231 #define CIFS_LOCK_OP    0
1232 #define CIFS_READ_OP    1
1233 #define CIFS_WRITE_OP   2
1234
1235 /* @rw_check : 0 - no op, 1 - read, 2 - write */
1236 static bool
1237 cifs_find_fid_lock_conflict(struct cifs_fid_locks *fdlocks, __u64 offset,
1238                             __u64 length, __u8 type, __u16 flags,
1239                             struct cifsFileInfo *cfile,
1240                             struct cifsLockInfo **conf_lock, int rw_check)
1241 {
1242         struct cifsLockInfo *li;
1243         struct cifsFileInfo *cur_cfile = fdlocks->cfile;
1244         struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
1245
1246         list_for_each_entry(li, &fdlocks->locks, llist) {
1247                 if (offset + length <= li->offset ||
1248                     offset >= li->offset + li->length)
1249                         continue;
1250                 if (rw_check != CIFS_LOCK_OP && current->tgid == li->pid &&
1251                     server->ops->compare_fids(cfile, cur_cfile)) {
1252                         /* shared lock prevents write op through the same fid */
1253                         if (!(li->type & server->vals->shared_lock_type) ||
1254                             rw_check != CIFS_WRITE_OP)
1255                                 continue;
1256                 }
1257                 if ((type & server->vals->shared_lock_type) &&
1258                     ((server->ops->compare_fids(cfile, cur_cfile) &&
1259                      current->tgid == li->pid) || type == li->type))
1260                         continue;
1261                 if (rw_check == CIFS_LOCK_OP &&
1262                     (flags & FL_OFDLCK) && (li->flags & FL_OFDLCK) &&
1263                     server->ops->compare_fids(cfile, cur_cfile))
1264                         continue;
1265                 if (conf_lock)
1266                         *conf_lock = li;
1267                 return true;
1268         }
1269         return false;
1270 }
1271
1272 bool
1273 cifs_find_lock_conflict(struct cifsFileInfo *cfile, __u64 offset, __u64 length,
1274                         __u8 type, __u16 flags,
1275                         struct cifsLockInfo **conf_lock, int rw_check)
1276 {
1277         bool rc = false;
1278         struct cifs_fid_locks *cur;
1279         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1280
1281         list_for_each_entry(cur, &cinode->llist, llist) {
1282                 rc = cifs_find_fid_lock_conflict(cur, offset, length, type,
1283                                                  flags, cfile, conf_lock,
1284                                                  rw_check);
1285                 if (rc)
1286                         break;
1287         }
1288
1289         return rc;
1290 }
1291
1292 /*
1293  * Check if there is another lock that prevents us to set the lock (mandatory
1294  * style). If such a lock exists, update the flock structure with its
1295  * properties. Otherwise, set the flock type to F_UNLCK if we can cache brlocks
1296  * or leave it the same if we can't. Returns 0 if we don't need to request to
1297  * the server or 1 otherwise.
1298  */
1299 static int
1300 cifs_lock_test(struct cifsFileInfo *cfile, __u64 offset, __u64 length,
1301                __u8 type, struct file_lock *flock)
1302 {
1303         int rc = 0;
1304         struct cifsLockInfo *conf_lock;
1305         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1306         struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
1307         bool exist;
1308
1309         down_read(&cinode->lock_sem);
1310
1311         exist = cifs_find_lock_conflict(cfile, offset, length, type,
1312                                         flock->fl_flags, &conf_lock,
1313                                         CIFS_LOCK_OP);
1314         if (exist) {
1315                 flock->fl_start = conf_lock->offset;
1316                 flock->fl_end = conf_lock->offset + conf_lock->length - 1;
1317                 flock->fl_pid = conf_lock->pid;
1318                 if (conf_lock->type & server->vals->shared_lock_type)
1319                         flock->fl_type = F_RDLCK;
1320                 else
1321                         flock->fl_type = F_WRLCK;
1322         } else if (!cinode->can_cache_brlcks)
1323                 rc = 1;
1324         else
1325                 flock->fl_type = F_UNLCK;
1326
1327         up_read(&cinode->lock_sem);
1328         return rc;
1329 }
1330
1331 static void
1332 cifs_lock_add(struct cifsFileInfo *cfile, struct cifsLockInfo *lock)
1333 {
1334         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1335         cifs_down_write(&cinode->lock_sem);
1336         list_add_tail(&lock->llist, &cfile->llist->locks);
1337         up_write(&cinode->lock_sem);
1338 }
1339
1340 /*
1341  * Set the byte-range lock (mandatory style). Returns:
1342  * 1) 0, if we set the lock and don't need to request to the server;
1343  * 2) 1, if no locks prevent us but we need to request to the server;
1344  * 3) -EACCES, if there is a lock that prevents us and wait is false.
1345  */
1346 static int
1347 cifs_lock_add_if(struct cifsFileInfo *cfile, struct cifsLockInfo *lock,
1348                  bool wait)
1349 {
1350         struct cifsLockInfo *conf_lock;
1351         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1352         bool exist;
1353         int rc = 0;
1354
1355 try_again:
1356         exist = false;
1357         cifs_down_write(&cinode->lock_sem);
1358
1359         exist = cifs_find_lock_conflict(cfile, lock->offset, lock->length,
1360                                         lock->type, lock->flags, &conf_lock,
1361                                         CIFS_LOCK_OP);
1362         if (!exist && cinode->can_cache_brlcks) {
1363                 list_add_tail(&lock->llist, &cfile->llist->locks);
1364                 up_write(&cinode->lock_sem);
1365                 return rc;
1366         }
1367
1368         if (!exist)
1369                 rc = 1;
1370         else if (!wait)
1371                 rc = -EACCES;
1372         else {
1373                 list_add_tail(&lock->blist, &conf_lock->blist);
1374                 up_write(&cinode->lock_sem);
1375                 rc = wait_event_interruptible(lock->block_q,
1376                                         (lock->blist.prev == &lock->blist) &&
1377                                         (lock->blist.next == &lock->blist));
1378                 if (!rc)
1379                         goto try_again;
1380                 cifs_down_write(&cinode->lock_sem);
1381                 list_del_init(&lock->blist);
1382         }
1383
1384         up_write(&cinode->lock_sem);
1385         return rc;
1386 }
1387
1388 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
1389 /*
1390  * Check if there is another lock that prevents us to set the lock (posix
1391  * style). If such a lock exists, update the flock structure with its
1392  * properties. Otherwise, set the flock type to F_UNLCK if we can cache brlocks
1393  * or leave it the same if we can't. Returns 0 if we don't need to request to
1394  * the server or 1 otherwise.
1395  */
1396 static int
1397 cifs_posix_lock_test(struct file *file, struct file_lock *flock)
1398 {
1399         int rc = 0;
1400         struct cifsInodeInfo *cinode = CIFS_I(file_inode(file));
1401         unsigned char saved_type = flock->fl_type;
1402
1403         if ((flock->fl_flags & FL_POSIX) == 0)
1404                 return 1;
1405
1406         down_read(&cinode->lock_sem);
1407         posix_test_lock(file, flock);
1408
1409         if (flock->fl_type == F_UNLCK && !cinode->can_cache_brlcks) {
1410                 flock->fl_type = saved_type;
1411                 rc = 1;
1412         }
1413
1414         up_read(&cinode->lock_sem);
1415         return rc;
1416 }
1417
1418 /*
1419  * Set the byte-range lock (posix style). Returns:
1420  * 1) <0, if the error occurs while setting the lock;
1421  * 2) 0, if we set the lock and don't need to request to the server;
1422  * 3) FILE_LOCK_DEFERRED, if we will wait for some other file_lock;
1423  * 4) FILE_LOCK_DEFERRED + 1, if we need to request to the server.
1424  */
1425 static int
1426 cifs_posix_lock_set(struct file *file, struct file_lock *flock)
1427 {
1428         struct cifsInodeInfo *cinode = CIFS_I(file_inode(file));
1429         int rc = FILE_LOCK_DEFERRED + 1;
1430
1431         if ((flock->fl_flags & FL_POSIX) == 0)
1432                 return rc;
1433
1434         cifs_down_write(&cinode->lock_sem);
1435         if (!cinode->can_cache_brlcks) {
1436                 up_write(&cinode->lock_sem);
1437                 return rc;
1438         }
1439
1440         rc = posix_lock_file(file, flock, NULL);
1441         up_write(&cinode->lock_sem);
1442         return rc;
1443 }
1444
1445 int
1446 cifs_push_mandatory_locks(struct cifsFileInfo *cfile)
1447 {
1448         unsigned int xid;
1449         int rc = 0, stored_rc;
1450         struct cifsLockInfo *li, *tmp;
1451         struct cifs_tcon *tcon;
1452         unsigned int num, max_num, max_buf;
1453         LOCKING_ANDX_RANGE *buf, *cur;
1454         static const int types[] = {
1455                 LOCKING_ANDX_LARGE_FILES,
1456                 LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES
1457         };
1458         int i;
1459
1460         xid = get_xid();
1461         tcon = tlink_tcon(cfile->tlink);
1462
1463         /*
1464          * Accessing maxBuf is racy with cifs_reconnect - need to store value
1465          * and check it before using.
1466          */
1467         max_buf = tcon->ses->server->maxBuf;
1468         if (max_buf < (sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE))) {
1469                 free_xid(xid);
1470                 return -EINVAL;
1471         }
1472
1473         BUILD_BUG_ON(sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE) >
1474                      PAGE_SIZE);
1475         max_buf = min_t(unsigned int, max_buf - sizeof(struct smb_hdr),
1476                         PAGE_SIZE);
1477         max_num = (max_buf - sizeof(struct smb_hdr)) /
1478                                                 sizeof(LOCKING_ANDX_RANGE);
1479         buf = kcalloc(max_num, sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
1480         if (!buf) {
1481                 free_xid(xid);
1482                 return -ENOMEM;
1483         }
1484
1485         for (i = 0; i < 2; i++) {
1486                 cur = buf;
1487                 num = 0;
1488                 list_for_each_entry_safe(li, tmp, &cfile->llist->locks, llist) {
1489                         if (li->type != types[i])
1490                                 continue;
1491                         cur->Pid = cpu_to_le16(li->pid);
1492                         cur->LengthLow = cpu_to_le32((u32)li->length);
1493                         cur->LengthHigh = cpu_to_le32((u32)(li->length>>32));
1494                         cur->OffsetLow = cpu_to_le32((u32)li->offset);
1495                         cur->OffsetHigh = cpu_to_le32((u32)(li->offset>>32));
1496                         if (++num == max_num) {
1497                                 stored_rc = cifs_lockv(xid, tcon,
1498                                                        cfile->fid.netfid,
1499                                                        (__u8)li->type, 0, num,
1500                                                        buf);
1501                                 if (stored_rc)
1502                                         rc = stored_rc;
1503                                 cur = buf;
1504                                 num = 0;
1505                         } else
1506                                 cur++;
1507                 }
1508
1509                 if (num) {
1510                         stored_rc = cifs_lockv(xid, tcon, cfile->fid.netfid,
1511                                                (__u8)types[i], 0, num, buf);
1512                         if (stored_rc)
1513                                 rc = stored_rc;
1514                 }
1515         }
1516
1517         kfree(buf);
1518         free_xid(xid);
1519         return rc;
1520 }
1521
1522 static __u32
1523 hash_lockowner(fl_owner_t owner)
1524 {
1525         return cifs_lock_secret ^ hash32_ptr((const void *)owner);
1526 }
1527 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
1528
1529 struct lock_to_push {
1530         struct list_head llist;
1531         __u64 offset;
1532         __u64 length;
1533         __u32 pid;
1534         __u16 netfid;
1535         __u8 type;
1536 };
1537
1538 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
1539 static int
1540 cifs_push_posix_locks(struct cifsFileInfo *cfile)
1541 {
1542         struct inode *inode = d_inode(cfile->dentry);
1543         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1544         struct file_lock *flock;
1545         struct file_lock_context *flctx = locks_inode_context(inode);
1546         unsigned int count = 0, i;
1547         int rc = 0, xid, type;
1548         struct list_head locks_to_send, *el;
1549         struct lock_to_push *lck, *tmp;
1550         __u64 length;
1551
1552         xid = get_xid();
1553
1554         if (!flctx)
1555                 goto out;
1556
1557         spin_lock(&flctx->flc_lock);
1558         list_for_each(el, &flctx->flc_posix) {
1559                 count++;
1560         }
1561         spin_unlock(&flctx->flc_lock);
1562
1563         INIT_LIST_HEAD(&locks_to_send);
1564
1565         /*
1566          * Allocating count locks is enough because no FL_POSIX locks can be
1567          * added to the list while we are holding cinode->lock_sem that
1568          * protects locking operations of this inode.
1569          */
1570         for (i = 0; i < count; i++) {
1571                 lck = kmalloc(sizeof(struct lock_to_push), GFP_KERNEL);
1572                 if (!lck) {
1573                         rc = -ENOMEM;
1574                         goto err_out;
1575                 }
1576                 list_add_tail(&lck->llist, &locks_to_send);
1577         }
1578
1579         el = locks_to_send.next;
1580         spin_lock(&flctx->flc_lock);
1581         list_for_each_entry(flock, &flctx->flc_posix, fl_list) {
1582                 if (el == &locks_to_send) {
1583                         /*
1584                          * The list ended. We don't have enough allocated
1585                          * structures - something is really wrong.
1586                          */
1587                         cifs_dbg(VFS, "Can't push all brlocks!\n");
1588                         break;
1589                 }
1590                 length = cifs_flock_len(flock);
1591                 if (flock->fl_type == F_RDLCK || flock->fl_type == F_SHLCK)
1592                         type = CIFS_RDLCK;
1593                 else
1594                         type = CIFS_WRLCK;
1595                 lck = list_entry(el, struct lock_to_push, llist);
1596                 lck->pid = hash_lockowner(flock->fl_owner);
1597                 lck->netfid = cfile->fid.netfid;
1598                 lck->length = length;
1599                 lck->type = type;
1600                 lck->offset = flock->fl_start;
1601         }
1602         spin_unlock(&flctx->flc_lock);
1603
1604         list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) {
1605                 int stored_rc;
1606
1607                 stored_rc = CIFSSMBPosixLock(xid, tcon, lck->netfid, lck->pid,
1608                                              lck->offset, lck->length, NULL,
1609                                              lck->type, 0);
1610                 if (stored_rc)
1611                         rc = stored_rc;
1612                 list_del(&lck->llist);
1613                 kfree(lck);
1614         }
1615
1616 out:
1617         free_xid(xid);
1618         return rc;
1619 err_out:
1620         list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) {
1621                 list_del(&lck->llist);
1622                 kfree(lck);
1623         }
1624         goto out;
1625 }
1626 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
1627
1628 static int
1629 cifs_push_locks(struct cifsFileInfo *cfile)
1630 {
1631         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1632         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1633         int rc = 0;
1634 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
1635         struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
1636 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
1637
1638         /* we are going to update can_cache_brlcks here - need a write access */
1639         cifs_down_write(&cinode->lock_sem);
1640         if (!cinode->can_cache_brlcks) {
1641                 up_write(&cinode->lock_sem);
1642                 return rc;
1643         }
1644
1645 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
1646         if (cap_unix(tcon->ses) &&
1647             (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
1648             ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
1649                 rc = cifs_push_posix_locks(cfile);
1650         else
1651 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
1652                 rc = tcon->ses->server->ops->push_mand_locks(cfile);
1653
1654         cinode->can_cache_brlcks = false;
1655         up_write(&cinode->lock_sem);
1656         return rc;
1657 }
1658
1659 static void
1660 cifs_read_flock(struct file_lock *flock, __u32 *type, int *lock, int *unlock,
1661                 bool *wait_flag, struct TCP_Server_Info *server)
1662 {
1663         if (flock->fl_flags & FL_POSIX)
1664                 cifs_dbg(FYI, "Posix\n");
1665         if (flock->fl_flags & FL_FLOCK)
1666                 cifs_dbg(FYI, "Flock\n");
1667         if (flock->fl_flags & FL_SLEEP) {
1668                 cifs_dbg(FYI, "Blocking lock\n");
1669                 *wait_flag = true;
1670         }
1671         if (flock->fl_flags & FL_ACCESS)
1672                 cifs_dbg(FYI, "Process suspended by mandatory locking - not implemented yet\n");
1673         if (flock->fl_flags & FL_LEASE)
1674                 cifs_dbg(FYI, "Lease on file - not implemented yet\n");
1675         if (flock->fl_flags &
1676             (~(FL_POSIX | FL_FLOCK | FL_SLEEP |
1677                FL_ACCESS | FL_LEASE | FL_CLOSE | FL_OFDLCK)))
1678                 cifs_dbg(FYI, "Unknown lock flags 0x%x\n", flock->fl_flags);
1679
1680         *type = server->vals->large_lock_type;
1681         if (flock->fl_type == F_WRLCK) {
1682                 cifs_dbg(FYI, "F_WRLCK\n");
1683                 *type |= server->vals->exclusive_lock_type;
1684                 *lock = 1;
1685         } else if (flock->fl_type == F_UNLCK) {
1686                 cifs_dbg(FYI, "F_UNLCK\n");
1687                 *type |= server->vals->unlock_lock_type;
1688                 *unlock = 1;
1689                 /* Check if unlock includes more than one lock range */
1690         } else if (flock->fl_type == F_RDLCK) {
1691                 cifs_dbg(FYI, "F_RDLCK\n");
1692                 *type |= server->vals->shared_lock_type;
1693                 *lock = 1;
1694         } else if (flock->fl_type == F_EXLCK) {
1695                 cifs_dbg(FYI, "F_EXLCK\n");
1696                 *type |= server->vals->exclusive_lock_type;
1697                 *lock = 1;
1698         } else if (flock->fl_type == F_SHLCK) {
1699                 cifs_dbg(FYI, "F_SHLCK\n");
1700                 *type |= server->vals->shared_lock_type;
1701                 *lock = 1;
1702         } else
1703                 cifs_dbg(FYI, "Unknown type of lock\n");
1704 }
1705
1706 static int
1707 cifs_getlk(struct file *file, struct file_lock *flock, __u32 type,
1708            bool wait_flag, bool posix_lck, unsigned int xid)
1709 {
1710         int rc = 0;
1711         __u64 length = cifs_flock_len(flock);
1712         struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
1713         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1714         struct TCP_Server_Info *server = tcon->ses->server;
1715 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
1716         __u16 netfid = cfile->fid.netfid;
1717
1718         if (posix_lck) {
1719                 int posix_lock_type;
1720
1721                 rc = cifs_posix_lock_test(file, flock);
1722                 if (!rc)
1723                         return rc;
1724
1725                 if (type & server->vals->shared_lock_type)
1726                         posix_lock_type = CIFS_RDLCK;
1727                 else
1728                         posix_lock_type = CIFS_WRLCK;
1729                 rc = CIFSSMBPosixLock(xid, tcon, netfid,
1730                                       hash_lockowner(flock->fl_owner),
1731                                       flock->fl_start, length, flock,
1732                                       posix_lock_type, wait_flag);
1733                 return rc;
1734         }
1735 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
1736
1737         rc = cifs_lock_test(cfile, flock->fl_start, length, type, flock);
1738         if (!rc)
1739                 return rc;
1740
1741         /* BB we could chain these into one lock request BB */
1742         rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length, type,
1743                                     1, 0, false);
1744         if (rc == 0) {
1745                 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1746                                             type, 0, 1, false);
1747                 flock->fl_type = F_UNLCK;
1748                 if (rc != 0)
1749                         cifs_dbg(VFS, "Error unlocking previously locked range %d during test of lock\n",
1750                                  rc);
1751                 return 0;
1752         }
1753
1754         if (type & server->vals->shared_lock_type) {
1755                 flock->fl_type = F_WRLCK;
1756                 return 0;
1757         }
1758
1759         type &= ~server->vals->exclusive_lock_type;
1760
1761         rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1762                                     type | server->vals->shared_lock_type,
1763                                     1, 0, false);
1764         if (rc == 0) {
1765                 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1766                         type | server->vals->shared_lock_type, 0, 1, false);
1767                 flock->fl_type = F_RDLCK;
1768                 if (rc != 0)
1769                         cifs_dbg(VFS, "Error unlocking previously locked range %d during test of lock\n",
1770                                  rc);
1771         } else
1772                 flock->fl_type = F_WRLCK;
1773
1774         return 0;
1775 }
1776
1777 void
1778 cifs_move_llist(struct list_head *source, struct list_head *dest)
1779 {
1780         struct list_head *li, *tmp;
1781         list_for_each_safe(li, tmp, source)
1782                 list_move(li, dest);
1783 }
1784
1785 void
1786 cifs_free_llist(struct list_head *llist)
1787 {
1788         struct cifsLockInfo *li, *tmp;
1789         list_for_each_entry_safe(li, tmp, llist, llist) {
1790                 cifs_del_lock_waiters(li);
1791                 list_del(&li->llist);
1792                 kfree(li);
1793         }
1794 }
1795
1796 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
1797 int
1798 cifs_unlock_range(struct cifsFileInfo *cfile, struct file_lock *flock,
1799                   unsigned int xid)
1800 {
1801         int rc = 0, stored_rc;
1802         static const int types[] = {
1803                 LOCKING_ANDX_LARGE_FILES,
1804                 LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES
1805         };
1806         unsigned int i;
1807         unsigned int max_num, num, max_buf;
1808         LOCKING_ANDX_RANGE *buf, *cur;
1809         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1810         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1811         struct cifsLockInfo *li, *tmp;
1812         __u64 length = cifs_flock_len(flock);
1813         struct list_head tmp_llist;
1814
1815         INIT_LIST_HEAD(&tmp_llist);
1816
1817         /*
1818          * Accessing maxBuf is racy with cifs_reconnect - need to store value
1819          * and check it before using.
1820          */
1821         max_buf = tcon->ses->server->maxBuf;
1822         if (max_buf < (sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE)))
1823                 return -EINVAL;
1824
1825         BUILD_BUG_ON(sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE) >
1826                      PAGE_SIZE);
1827         max_buf = min_t(unsigned int, max_buf - sizeof(struct smb_hdr),
1828                         PAGE_SIZE);
1829         max_num = (max_buf - sizeof(struct smb_hdr)) /
1830                                                 sizeof(LOCKING_ANDX_RANGE);
1831         buf = kcalloc(max_num, sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
1832         if (!buf)
1833                 return -ENOMEM;
1834
1835         cifs_down_write(&cinode->lock_sem);
1836         for (i = 0; i < 2; i++) {
1837                 cur = buf;
1838                 num = 0;
1839                 list_for_each_entry_safe(li, tmp, &cfile->llist->locks, llist) {
1840                         if (flock->fl_start > li->offset ||
1841                             (flock->fl_start + length) <
1842                             (li->offset + li->length))
1843                                 continue;
1844                         if (current->tgid != li->pid)
1845                                 continue;
1846                         if (types[i] != li->type)
1847                                 continue;
1848                         if (cinode->can_cache_brlcks) {
1849                                 /*
1850                                  * We can cache brlock requests - simply remove
1851                                  * a lock from the file's list.
1852                                  */
1853                                 list_del(&li->llist);
1854                                 cifs_del_lock_waiters(li);
1855                                 kfree(li);
1856                                 continue;
1857                         }
1858                         cur->Pid = cpu_to_le16(li->pid);
1859                         cur->LengthLow = cpu_to_le32((u32)li->length);
1860                         cur->LengthHigh = cpu_to_le32((u32)(li->length>>32));
1861                         cur->OffsetLow = cpu_to_le32((u32)li->offset);
1862                         cur->OffsetHigh = cpu_to_le32((u32)(li->offset>>32));
1863                         /*
1864                          * We need to save a lock here to let us add it again to
1865                          * the file's list if the unlock range request fails on
1866                          * the server.
1867                          */
1868                         list_move(&li->llist, &tmp_llist);
1869                         if (++num == max_num) {
1870                                 stored_rc = cifs_lockv(xid, tcon,
1871                                                        cfile->fid.netfid,
1872                                                        li->type, num, 0, buf);
1873                                 if (stored_rc) {
1874                                         /*
1875                                          * We failed on the unlock range
1876                                          * request - add all locks from the tmp
1877                                          * list to the head of the file's list.
1878                                          */
1879                                         cifs_move_llist(&tmp_llist,
1880                                                         &cfile->llist->locks);
1881                                         rc = stored_rc;
1882                                 } else
1883                                         /*
1884                                          * The unlock range request succeed -
1885                                          * free the tmp list.
1886                                          */
1887                                         cifs_free_llist(&tmp_llist);
1888                                 cur = buf;
1889                                 num = 0;
1890                         } else
1891                                 cur++;
1892                 }
1893                 if (num) {
1894                         stored_rc = cifs_lockv(xid, tcon, cfile->fid.netfid,
1895                                                types[i], num, 0, buf);
1896                         if (stored_rc) {
1897                                 cifs_move_llist(&tmp_llist,
1898                                                 &cfile->llist->locks);
1899                                 rc = stored_rc;
1900                         } else
1901                                 cifs_free_llist(&tmp_llist);
1902                 }
1903         }
1904
1905         up_write(&cinode->lock_sem);
1906         kfree(buf);
1907         return rc;
1908 }
1909 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
1910
1911 static int
1912 cifs_setlk(struct file *file, struct file_lock *flock, __u32 type,
1913            bool wait_flag, bool posix_lck, int lock, int unlock,
1914            unsigned int xid)
1915 {
1916         int rc = 0;
1917         __u64 length = cifs_flock_len(flock);
1918         struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
1919         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1920         struct TCP_Server_Info *server = tcon->ses->server;
1921         struct inode *inode = d_inode(cfile->dentry);
1922
1923 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
1924         if (posix_lck) {
1925                 int posix_lock_type;
1926
1927                 rc = cifs_posix_lock_set(file, flock);
1928                 if (rc <= FILE_LOCK_DEFERRED)
1929                         return rc;
1930
1931                 if (type & server->vals->shared_lock_type)
1932                         posix_lock_type = CIFS_RDLCK;
1933                 else
1934                         posix_lock_type = CIFS_WRLCK;
1935
1936                 if (unlock == 1)
1937                         posix_lock_type = CIFS_UNLCK;
1938
1939                 rc = CIFSSMBPosixLock(xid, tcon, cfile->fid.netfid,
1940                                       hash_lockowner(flock->fl_owner),
1941                                       flock->fl_start, length,
1942                                       NULL, posix_lock_type, wait_flag);
1943                 goto out;
1944         }
1945 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
1946         if (lock) {
1947                 struct cifsLockInfo *lock;
1948
1949                 lock = cifs_lock_init(flock->fl_start, length, type,
1950                                       flock->fl_flags);
1951                 if (!lock)
1952                         return -ENOMEM;
1953
1954                 rc = cifs_lock_add_if(cfile, lock, wait_flag);
1955                 if (rc < 0) {
1956                         kfree(lock);
1957                         return rc;
1958                 }
1959                 if (!rc)
1960                         goto out;
1961
1962                 /*
1963                  * Windows 7 server can delay breaking lease from read to None
1964                  * if we set a byte-range lock on a file - break it explicitly
1965                  * before sending the lock to the server to be sure the next
1966                  * read won't conflict with non-overlapted locks due to
1967                  * pagereading.
1968                  */
1969                 if (!CIFS_CACHE_WRITE(CIFS_I(inode)) &&
1970                                         CIFS_CACHE_READ(CIFS_I(inode))) {
1971                         cifs_zap_mapping(inode);
1972                         cifs_dbg(FYI, "Set no oplock for inode=%p due to mand locks\n",
1973                                  inode);
1974                         CIFS_I(inode)->oplock = 0;
1975                 }
1976
1977                 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1978                                             type, 1, 0, wait_flag);
1979                 if (rc) {
1980                         kfree(lock);
1981                         return rc;
1982                 }
1983
1984                 cifs_lock_add(cfile, lock);
1985         } else if (unlock)
1986                 rc = server->ops->mand_unlock_range(cfile, flock, xid);
1987
1988 out:
1989         if ((flock->fl_flags & FL_POSIX) || (flock->fl_flags & FL_FLOCK)) {
1990                 /*
1991                  * If this is a request to remove all locks because we
1992                  * are closing the file, it doesn't matter if the
1993                  * unlocking failed as both cifs.ko and the SMB server
1994                  * remove the lock on file close
1995                  */
1996                 if (rc) {
1997                         cifs_dbg(VFS, "%s failed rc=%d\n", __func__, rc);
1998                         if (!(flock->fl_flags & FL_CLOSE))
1999                                 return rc;
2000                 }
2001                 rc = locks_lock_file_wait(file, flock);
2002         }
2003         return rc;
2004 }
2005
2006 int cifs_flock(struct file *file, int cmd, struct file_lock *fl)
2007 {
2008         int rc, xid;
2009         int lock = 0, unlock = 0;
2010         bool wait_flag = false;
2011         bool posix_lck = false;
2012         struct cifs_sb_info *cifs_sb;
2013         struct cifs_tcon *tcon;
2014         struct cifsFileInfo *cfile;
2015         __u32 type;
2016
2017         xid = get_xid();
2018
2019         if (!(fl->fl_flags & FL_FLOCK)) {
2020                 rc = -ENOLCK;
2021                 free_xid(xid);
2022                 return rc;
2023         }
2024
2025         cfile = (struct cifsFileInfo *)file->private_data;
2026         tcon = tlink_tcon(cfile->tlink);
2027
2028         cifs_read_flock(fl, &type, &lock, &unlock, &wait_flag,
2029                         tcon->ses->server);
2030         cifs_sb = CIFS_FILE_SB(file);
2031
2032         if (cap_unix(tcon->ses) &&
2033             (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
2034             ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
2035                 posix_lck = true;
2036
2037         if (!lock && !unlock) {
2038                 /*
2039                  * if no lock or unlock then nothing to do since we do not
2040                  * know what it is
2041                  */
2042                 rc = -EOPNOTSUPP;
2043                 free_xid(xid);
2044                 return rc;
2045         }
2046
2047         rc = cifs_setlk(file, fl, type, wait_flag, posix_lck, lock, unlock,
2048                         xid);
2049         free_xid(xid);
2050         return rc;
2051
2052
2053 }
2054
2055 int cifs_lock(struct file *file, int cmd, struct file_lock *flock)
2056 {
2057         int rc, xid;
2058         int lock = 0, unlock = 0;
2059         bool wait_flag = false;
2060         bool posix_lck = false;
2061         struct cifs_sb_info *cifs_sb;
2062         struct cifs_tcon *tcon;
2063         struct cifsFileInfo *cfile;
2064         __u32 type;
2065
2066         rc = -EACCES;
2067         xid = get_xid();
2068
2069         cifs_dbg(FYI, "%s: %pD2 cmd=0x%x type=0x%x flags=0x%x r=%lld:%lld\n", __func__, file, cmd,
2070                  flock->fl_flags, flock->fl_type, (long long)flock->fl_start,
2071                  (long long)flock->fl_end);
2072
2073         cfile = (struct cifsFileInfo *)file->private_data;
2074         tcon = tlink_tcon(cfile->tlink);
2075
2076         cifs_read_flock(flock, &type, &lock, &unlock, &wait_flag,
2077                         tcon->ses->server);
2078         cifs_sb = CIFS_FILE_SB(file);
2079         set_bit(CIFS_INO_CLOSE_ON_LOCK, &CIFS_I(d_inode(cfile->dentry))->flags);
2080
2081         if (cap_unix(tcon->ses) &&
2082             (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
2083             ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
2084                 posix_lck = true;
2085         /*
2086          * BB add code here to normalize offset and length to account for
2087          * negative length which we can not accept over the wire.
2088          */
2089         if (IS_GETLK(cmd)) {
2090                 rc = cifs_getlk(file, flock, type, wait_flag, posix_lck, xid);
2091                 free_xid(xid);
2092                 return rc;
2093         }
2094
2095         if (!lock && !unlock) {
2096                 /*
2097                  * if no lock or unlock then nothing to do since we do not
2098                  * know what it is
2099                  */
2100                 free_xid(xid);
2101                 return -EOPNOTSUPP;
2102         }
2103
2104         rc = cifs_setlk(file, flock, type, wait_flag, posix_lck, lock, unlock,
2105                         xid);
2106         free_xid(xid);
2107         return rc;
2108 }
2109
2110 /*
2111  * update the file size (if needed) after a write. Should be called with
2112  * the inode->i_lock held
2113  */
2114 void
2115 cifs_update_eof(struct cifsInodeInfo *cifsi, loff_t offset,
2116                       unsigned int bytes_written)
2117 {
2118         loff_t end_of_write = offset + bytes_written;
2119
2120         if (end_of_write > cifsi->server_eof)
2121                 cifsi->server_eof = end_of_write;
2122 }
2123
2124 static ssize_t
2125 cifs_write(struct cifsFileInfo *open_file, __u32 pid, const char *write_data,
2126            size_t write_size, loff_t *offset)
2127 {
2128         int rc = 0;
2129         unsigned int bytes_written = 0;
2130         unsigned int total_written;
2131         struct cifs_tcon *tcon;
2132         struct TCP_Server_Info *server;
2133         unsigned int xid;
2134         struct dentry *dentry = open_file->dentry;
2135         struct cifsInodeInfo *cifsi = CIFS_I(d_inode(dentry));
2136         struct cifs_io_parms io_parms = {0};
2137
2138         cifs_dbg(FYI, "write %zd bytes to offset %lld of %pd\n",
2139                  write_size, *offset, dentry);
2140
2141         tcon = tlink_tcon(open_file->tlink);
2142         server = tcon->ses->server;
2143
2144         if (!server->ops->sync_write)
2145                 return -ENOSYS;
2146
2147         xid = get_xid();
2148
2149         for (total_written = 0; write_size > total_written;
2150              total_written += bytes_written) {
2151                 rc = -EAGAIN;
2152                 while (rc == -EAGAIN) {
2153                         struct kvec iov[2];
2154                         unsigned int len;
2155
2156                         if (open_file->invalidHandle) {
2157                                 /* we could deadlock if we called
2158                                    filemap_fdatawait from here so tell
2159                                    reopen_file not to flush data to
2160                                    server now */
2161                                 rc = cifs_reopen_file(open_file, false);
2162                                 if (rc != 0)
2163                                         break;
2164                         }
2165
2166                         len = min(server->ops->wp_retry_size(d_inode(dentry)),
2167                                   (unsigned int)write_size - total_written);
2168                         /* iov[0] is reserved for smb header */
2169                         iov[1].iov_base = (char *)write_data + total_written;
2170                         iov[1].iov_len = len;
2171                         io_parms.pid = pid;
2172                         io_parms.tcon = tcon;
2173                         io_parms.offset = *offset;
2174                         io_parms.length = len;
2175                         rc = server->ops->sync_write(xid, &open_file->fid,
2176                                         &io_parms, &bytes_written, iov, 1);
2177                 }
2178                 if (rc || (bytes_written == 0)) {
2179                         if (total_written)
2180                                 break;
2181                         else {
2182                                 free_xid(xid);
2183                                 return rc;
2184                         }
2185                 } else {
2186                         spin_lock(&d_inode(dentry)->i_lock);
2187                         cifs_update_eof(cifsi, *offset, bytes_written);
2188                         spin_unlock(&d_inode(dentry)->i_lock);
2189                         *offset += bytes_written;
2190                 }
2191         }
2192
2193         cifs_stats_bytes_written(tcon, total_written);
2194
2195         if (total_written > 0) {
2196                 spin_lock(&d_inode(dentry)->i_lock);
2197                 if (*offset > d_inode(dentry)->i_size) {
2198                         i_size_write(d_inode(dentry), *offset);
2199                         d_inode(dentry)->i_blocks = (512 - 1 + *offset) >> 9;
2200                 }
2201                 spin_unlock(&d_inode(dentry)->i_lock);
2202         }
2203         mark_inode_dirty_sync(d_inode(dentry));
2204         free_xid(xid);
2205         return total_written;
2206 }
2207
2208 struct cifsFileInfo *find_readable_file(struct cifsInodeInfo *cifs_inode,
2209                                         bool fsuid_only)
2210 {
2211         struct cifsFileInfo *open_file = NULL;
2212         struct cifs_sb_info *cifs_sb = CIFS_SB(cifs_inode->netfs.inode.i_sb);
2213
2214         /* only filter by fsuid on multiuser mounts */
2215         if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
2216                 fsuid_only = false;
2217
2218         spin_lock(&cifs_inode->open_file_lock);
2219         /* we could simply get the first_list_entry since write-only entries
2220            are always at the end of the list but since the first entry might
2221            have a close pending, we go through the whole list */
2222         list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
2223                 if (fsuid_only && !uid_eq(open_file->uid, current_fsuid()))
2224                         continue;
2225                 if (OPEN_FMODE(open_file->f_flags) & FMODE_READ) {
2226                         if ((!open_file->invalidHandle)) {
2227                                 /* found a good file */
2228                                 /* lock it so it will not be closed on us */
2229                                 cifsFileInfo_get(open_file);
2230                                 spin_unlock(&cifs_inode->open_file_lock);
2231                                 return open_file;
2232                         } /* else might as well continue, and look for
2233                              another, or simply have the caller reopen it
2234                              again rather than trying to fix this handle */
2235                 } else /* write only file */
2236                         break; /* write only files are last so must be done */
2237         }
2238         spin_unlock(&cifs_inode->open_file_lock);
2239         return NULL;
2240 }
2241
2242 /* Return -EBADF if no handle is found and general rc otherwise */
2243 int
2244 cifs_get_writable_file(struct cifsInodeInfo *cifs_inode, int flags,
2245                        struct cifsFileInfo **ret_file)
2246 {
2247         struct cifsFileInfo *open_file, *inv_file = NULL;
2248         struct cifs_sb_info *cifs_sb;
2249         bool any_available = false;
2250         int rc = -EBADF;
2251         unsigned int refind = 0;
2252         bool fsuid_only = flags & FIND_WR_FSUID_ONLY;
2253         bool with_delete = flags & FIND_WR_WITH_DELETE;
2254         *ret_file = NULL;
2255
2256         /*
2257          * Having a null inode here (because mapping->host was set to zero by
2258          * the VFS or MM) should not happen but we had reports of on oops (due
2259          * to it being zero) during stress testcases so we need to check for it
2260          */
2261
2262         if (cifs_inode == NULL) {
2263                 cifs_dbg(VFS, "Null inode passed to cifs_writeable_file\n");
2264                 dump_stack();
2265                 return rc;
2266         }
2267
2268         cifs_sb = CIFS_SB(cifs_inode->netfs.inode.i_sb);
2269
2270         /* only filter by fsuid on multiuser mounts */
2271         if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
2272                 fsuid_only = false;
2273
2274         spin_lock(&cifs_inode->open_file_lock);
2275 refind_writable:
2276         if (refind > MAX_REOPEN_ATT) {
2277                 spin_unlock(&cifs_inode->open_file_lock);
2278                 return rc;
2279         }
2280         list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
2281                 if (!any_available && open_file->pid != current->tgid)
2282                         continue;
2283                 if (fsuid_only && !uid_eq(open_file->uid, current_fsuid()))
2284                         continue;
2285                 if (with_delete && !(open_file->fid.access & DELETE))
2286                         continue;
2287                 if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) {
2288                         if (!open_file->invalidHandle) {
2289                                 /* found a good writable file */
2290                                 cifsFileInfo_get(open_file);
2291                                 spin_unlock(&cifs_inode->open_file_lock);
2292                                 *ret_file = open_file;
2293                                 return 0;
2294                         } else {
2295                                 if (!inv_file)
2296                                         inv_file = open_file;
2297                         }
2298                 }
2299         }
2300         /* couldn't find useable FH with same pid, try any available */
2301         if (!any_available) {
2302                 any_available = true;
2303                 goto refind_writable;
2304         }
2305
2306         if (inv_file) {
2307                 any_available = false;
2308                 cifsFileInfo_get(inv_file);
2309         }
2310
2311         spin_unlock(&cifs_inode->open_file_lock);
2312
2313         if (inv_file) {
2314                 rc = cifs_reopen_file(inv_file, false);
2315                 if (!rc) {
2316                         *ret_file = inv_file;
2317                         return 0;
2318                 }
2319
2320                 spin_lock(&cifs_inode->open_file_lock);
2321                 list_move_tail(&inv_file->flist, &cifs_inode->openFileList);
2322                 spin_unlock(&cifs_inode->open_file_lock);
2323                 cifsFileInfo_put(inv_file);
2324                 ++refind;
2325                 inv_file = NULL;
2326                 spin_lock(&cifs_inode->open_file_lock);
2327                 goto refind_writable;
2328         }
2329
2330         return rc;
2331 }
2332
2333 struct cifsFileInfo *
2334 find_writable_file(struct cifsInodeInfo *cifs_inode, int flags)
2335 {
2336         struct cifsFileInfo *cfile;
2337         int rc;
2338
2339         rc = cifs_get_writable_file(cifs_inode, flags, &cfile);
2340         if (rc)
2341                 cifs_dbg(FYI, "Couldn't find writable handle rc=%d\n", rc);
2342
2343         return cfile;
2344 }
2345
2346 int
2347 cifs_get_writable_path(struct cifs_tcon *tcon, const char *name,
2348                        int flags,
2349                        struct cifsFileInfo **ret_file)
2350 {
2351         struct cifsFileInfo *cfile;
2352         void *page = alloc_dentry_path();
2353
2354         *ret_file = NULL;
2355
2356         spin_lock(&tcon->open_file_lock);
2357         list_for_each_entry(cfile, &tcon->openFileList, tlist) {
2358                 struct cifsInodeInfo *cinode;
2359                 const char *full_path = build_path_from_dentry(cfile->dentry, page);
2360                 if (IS_ERR(full_path)) {
2361                         spin_unlock(&tcon->open_file_lock);
2362                         free_dentry_path(page);
2363                         return PTR_ERR(full_path);
2364                 }
2365                 if (strcmp(full_path, name))
2366                         continue;
2367
2368                 cinode = CIFS_I(d_inode(cfile->dentry));
2369                 spin_unlock(&tcon->open_file_lock);
2370                 free_dentry_path(page);
2371                 return cifs_get_writable_file(cinode, flags, ret_file);
2372         }
2373
2374         spin_unlock(&tcon->open_file_lock);
2375         free_dentry_path(page);
2376         return -ENOENT;
2377 }
2378
2379 int
2380 cifs_get_readable_path(struct cifs_tcon *tcon, const char *name,
2381                        struct cifsFileInfo **ret_file)
2382 {
2383         struct cifsFileInfo *cfile;
2384         void *page = alloc_dentry_path();
2385
2386         *ret_file = NULL;
2387
2388         spin_lock(&tcon->open_file_lock);
2389         list_for_each_entry(cfile, &tcon->openFileList, tlist) {
2390                 struct cifsInodeInfo *cinode;
2391                 const char *full_path = build_path_from_dentry(cfile->dentry, page);
2392                 if (IS_ERR(full_path)) {
2393                         spin_unlock(&tcon->open_file_lock);
2394                         free_dentry_path(page);
2395                         return PTR_ERR(full_path);
2396                 }
2397                 if (strcmp(full_path, name))
2398                         continue;
2399
2400                 cinode = CIFS_I(d_inode(cfile->dentry));
2401                 spin_unlock(&tcon->open_file_lock);
2402                 free_dentry_path(page);
2403                 *ret_file = find_readable_file(cinode, 0);
2404                 return *ret_file ? 0 : -ENOENT;
2405         }
2406
2407         spin_unlock(&tcon->open_file_lock);
2408         free_dentry_path(page);
2409         return -ENOENT;
2410 }
2411
2412 void
2413 cifs_writedata_release(struct kref *refcount)
2414 {
2415         struct cifs_writedata *wdata = container_of(refcount,
2416                                         struct cifs_writedata, refcount);
2417 #ifdef CONFIG_CIFS_SMB_DIRECT
2418         if (wdata->mr) {
2419                 smbd_deregister_mr(wdata->mr);
2420                 wdata->mr = NULL;
2421         }
2422 #endif
2423
2424         if (wdata->cfile)
2425                 cifsFileInfo_put(wdata->cfile);
2426
2427         kfree(wdata);
2428 }
2429
2430 /*
2431  * Write failed with a retryable error. Resend the write request. It's also
2432  * possible that the page was redirtied so re-clean the page.
2433  */
2434 static void
2435 cifs_writev_requeue(struct cifs_writedata *wdata)
2436 {
2437         int rc = 0;
2438         struct inode *inode = d_inode(wdata->cfile->dentry);
2439         struct TCP_Server_Info *server;
2440         unsigned int rest_len = wdata->bytes;
2441         loff_t fpos = wdata->offset;
2442
2443         server = tlink_tcon(wdata->cfile->tlink)->ses->server;
2444         do {
2445                 struct cifs_writedata *wdata2;
2446                 unsigned int wsize, cur_len;
2447
2448                 wsize = server->ops->wp_retry_size(inode);
2449                 if (wsize < rest_len) {
2450                         if (wsize < PAGE_SIZE) {
2451                                 rc = -EOPNOTSUPP;
2452                                 break;
2453                         }
2454                         cur_len = min(round_down(wsize, PAGE_SIZE), rest_len);
2455                 } else {
2456                         cur_len = rest_len;
2457                 }
2458
2459                 wdata2 = cifs_writedata_alloc(cifs_writev_complete);
2460                 if (!wdata2) {
2461                         rc = -ENOMEM;
2462                         break;
2463                 }
2464
2465                 wdata2->sync_mode = wdata->sync_mode;
2466                 wdata2->offset  = fpos;
2467                 wdata2->bytes   = cur_len;
2468                 wdata2->iter    = wdata->iter;
2469
2470                 iov_iter_advance(&wdata2->iter, fpos - wdata->offset);
2471                 iov_iter_truncate(&wdata2->iter, wdata2->bytes);
2472
2473                 if (iov_iter_is_xarray(&wdata2->iter))
2474                         /* Check for pages having been redirtied and clean
2475                          * them.  We can do this by walking the xarray.  If
2476                          * it's not an xarray, then it's a DIO and we shouldn't
2477                          * be mucking around with the page bits.
2478                          */
2479                         cifs_undirty_folios(inode, fpos, cur_len);
2480
2481                 rc = cifs_get_writable_file(CIFS_I(inode), FIND_WR_ANY,
2482                                             &wdata2->cfile);
2483                 if (!wdata2->cfile) {
2484                         cifs_dbg(VFS, "No writable handle to retry writepages rc=%d\n",
2485                                  rc);
2486                         if (!is_retryable_error(rc))
2487                                 rc = -EBADF;
2488                 } else {
2489                         wdata2->pid = wdata2->cfile->pid;
2490                         rc = server->ops->async_writev(wdata2,
2491                                                        cifs_writedata_release);
2492                 }
2493
2494                 kref_put(&wdata2->refcount, cifs_writedata_release);
2495                 if (rc) {
2496                         if (is_retryable_error(rc))
2497                                 continue;
2498                         fpos += cur_len;
2499                         rest_len -= cur_len;
2500                         break;
2501                 }
2502
2503                 fpos += cur_len;
2504                 rest_len -= cur_len;
2505         } while (rest_len > 0);
2506
2507         /* Clean up remaining pages from the original wdata */
2508         if (iov_iter_is_xarray(&wdata->iter))
2509                 cifs_pages_write_failed(inode, fpos, rest_len);
2510
2511         if (rc != 0 && !is_retryable_error(rc))
2512                 mapping_set_error(inode->i_mapping, rc);
2513         kref_put(&wdata->refcount, cifs_writedata_release);
2514 }
2515
2516 void
2517 cifs_writev_complete(struct work_struct *work)
2518 {
2519         struct cifs_writedata *wdata = container_of(work,
2520                                                 struct cifs_writedata, work);
2521         struct inode *inode = d_inode(wdata->cfile->dentry);
2522
2523         if (wdata->result == 0) {
2524                 spin_lock(&inode->i_lock);
2525                 cifs_update_eof(CIFS_I(inode), wdata->offset, wdata->bytes);
2526                 spin_unlock(&inode->i_lock);
2527                 cifs_stats_bytes_written(tlink_tcon(wdata->cfile->tlink),
2528                                          wdata->bytes);
2529         } else if (wdata->sync_mode == WB_SYNC_ALL && wdata->result == -EAGAIN)
2530                 return cifs_writev_requeue(wdata);
2531
2532         if (wdata->result == -EAGAIN)
2533                 cifs_pages_write_redirty(inode, wdata->offset, wdata->bytes);
2534         else if (wdata->result < 0)
2535                 cifs_pages_write_failed(inode, wdata->offset, wdata->bytes);
2536         else
2537                 cifs_pages_written_back(inode, wdata->offset, wdata->bytes);
2538
2539         if (wdata->result != -EAGAIN)
2540                 mapping_set_error(inode->i_mapping, wdata->result);
2541         kref_put(&wdata->refcount, cifs_writedata_release);
2542 }
2543
2544 struct cifs_writedata *cifs_writedata_alloc(work_func_t complete)
2545 {
2546         struct cifs_writedata *wdata;
2547
2548         wdata = kzalloc(sizeof(*wdata), GFP_NOFS);
2549         if (wdata != NULL) {
2550                 kref_init(&wdata->refcount);
2551                 INIT_LIST_HEAD(&wdata->list);
2552                 init_completion(&wdata->done);
2553                 INIT_WORK(&wdata->work, complete);
2554         }
2555         return wdata;
2556 }
2557
2558 static int cifs_partialpagewrite(struct page *page, unsigned from, unsigned to)
2559 {
2560         struct address_space *mapping = page->mapping;
2561         loff_t offset = (loff_t)page->index << PAGE_SHIFT;
2562         char *write_data;
2563         int rc = -EFAULT;
2564         int bytes_written = 0;
2565         struct inode *inode;
2566         struct cifsFileInfo *open_file;
2567
2568         if (!mapping || !mapping->host)
2569                 return -EFAULT;
2570
2571         inode = page->mapping->host;
2572
2573         offset += (loff_t)from;
2574         write_data = kmap(page);
2575         write_data += from;
2576
2577         if ((to > PAGE_SIZE) || (from > to)) {
2578                 kunmap(page);
2579                 return -EIO;
2580         }
2581
2582         /* racing with truncate? */
2583         if (offset > mapping->host->i_size) {
2584                 kunmap(page);
2585                 return 0; /* don't care */
2586         }
2587
2588         /* check to make sure that we are not extending the file */
2589         if (mapping->host->i_size - offset < (loff_t)to)
2590                 to = (unsigned)(mapping->host->i_size - offset);
2591
2592         rc = cifs_get_writable_file(CIFS_I(mapping->host), FIND_WR_ANY,
2593                                     &open_file);
2594         if (!rc) {
2595                 bytes_written = cifs_write(open_file, open_file->pid,
2596                                            write_data, to - from, &offset);
2597                 cifsFileInfo_put(open_file);
2598                 /* Does mm or vfs already set times? */
2599                 inode->i_atime = inode->i_mtime = inode_set_ctime_current(inode);
2600                 if ((bytes_written > 0) && (offset))
2601                         rc = 0;
2602                 else if (bytes_written < 0)
2603                         rc = bytes_written;
2604                 else
2605                         rc = -EFAULT;
2606         } else {
2607                 cifs_dbg(FYI, "No writable handle for write page rc=%d\n", rc);
2608                 if (!is_retryable_error(rc))
2609                         rc = -EIO;
2610         }
2611
2612         kunmap(page);
2613         return rc;
2614 }
2615
2616 /*
2617  * Extend the region to be written back to include subsequent contiguously
2618  * dirty pages if possible, but don't sleep while doing so.
2619  */
2620 static void cifs_extend_writeback(struct address_space *mapping,
2621                                   long *_count,
2622                                   loff_t start,
2623                                   int max_pages,
2624                                   size_t max_len,
2625                                   unsigned int *_len)
2626 {
2627         struct folio_batch batch;
2628         struct folio *folio;
2629         unsigned int psize, nr_pages;
2630         size_t len = *_len;
2631         pgoff_t index = (start + len) / PAGE_SIZE;
2632         bool stop = true;
2633         unsigned int i;
2634         XA_STATE(xas, &mapping->i_pages, index);
2635
2636         folio_batch_init(&batch);
2637
2638         do {
2639                 /* Firstly, we gather up a batch of contiguous dirty pages
2640                  * under the RCU read lock - but we can't clear the dirty flags
2641                  * there if any of those pages are mapped.
2642                  */
2643                 rcu_read_lock();
2644
2645                 xas_for_each(&xas, folio, ULONG_MAX) {
2646                         stop = true;
2647                         if (xas_retry(&xas, folio))
2648                                 continue;
2649                         if (xa_is_value(folio))
2650                                 break;
2651                         if (folio_index(folio) != index)
2652                                 break;
2653                         if (!folio_try_get_rcu(folio)) {
2654                                 xas_reset(&xas);
2655                                 continue;
2656                         }
2657                         nr_pages = folio_nr_pages(folio);
2658                         if (nr_pages > max_pages)
2659                                 break;
2660
2661                         /* Has the page moved or been split? */
2662                         if (unlikely(folio != xas_reload(&xas))) {
2663                                 folio_put(folio);
2664                                 break;
2665                         }
2666
2667                         if (!folio_trylock(folio)) {
2668                                 folio_put(folio);
2669                                 break;
2670                         }
2671                         if (!folio_test_dirty(folio) || folio_test_writeback(folio)) {
2672                                 folio_unlock(folio);
2673                                 folio_put(folio);
2674                                 break;
2675                         }
2676
2677                         max_pages -= nr_pages;
2678                         psize = folio_size(folio);
2679                         len += psize;
2680                         stop = false;
2681                         if (max_pages <= 0 || len >= max_len || *_count <= 0)
2682                                 stop = true;
2683
2684                         index += nr_pages;
2685                         if (!folio_batch_add(&batch, folio))
2686                                 break;
2687                         if (stop)
2688                                 break;
2689                 }
2690
2691                 if (!stop)
2692                         xas_pause(&xas);
2693                 rcu_read_unlock();
2694
2695                 /* Now, if we obtained any pages, we can shift them to being
2696                  * writable and mark them for caching.
2697                  */
2698                 if (!folio_batch_count(&batch))
2699                         break;
2700
2701                 for (i = 0; i < folio_batch_count(&batch); i++) {
2702                         folio = batch.folios[i];
2703                         /* The folio should be locked, dirty and not undergoing
2704                          * writeback from the loop above.
2705                          */
2706                         if (!folio_clear_dirty_for_io(folio))
2707                                 WARN_ON(1);
2708                         if (folio_start_writeback(folio))
2709                                 WARN_ON(1);
2710
2711                         *_count -= folio_nr_pages(folio);
2712                         folio_unlock(folio);
2713                 }
2714
2715                 folio_batch_release(&batch);
2716                 cond_resched();
2717         } while (!stop);
2718
2719         *_len = len;
2720 }
2721
2722 /*
2723  * Write back the locked page and any subsequent non-locked dirty pages.
2724  */
2725 static ssize_t cifs_write_back_from_locked_folio(struct address_space *mapping,
2726                                                  struct writeback_control *wbc,
2727                                                  struct folio *folio,
2728                                                  loff_t start, loff_t end)
2729 {
2730         struct inode *inode = mapping->host;
2731         struct TCP_Server_Info *server;
2732         struct cifs_writedata *wdata;
2733         struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
2734         struct cifs_credits credits_on_stack;
2735         struct cifs_credits *credits = &credits_on_stack;
2736         struct cifsFileInfo *cfile = NULL;
2737         unsigned int xid, wsize, len;
2738         loff_t i_size = i_size_read(inode);
2739         size_t max_len;
2740         long count = wbc->nr_to_write;
2741         int rc;
2742
2743         /* The folio should be locked, dirty and not undergoing writeback. */
2744         if (folio_start_writeback(folio))
2745                 WARN_ON(1);
2746
2747         count -= folio_nr_pages(folio);
2748         len = folio_size(folio);
2749
2750         xid = get_xid();
2751         server = cifs_pick_channel(cifs_sb_master_tcon(cifs_sb)->ses);
2752
2753         rc = cifs_get_writable_file(CIFS_I(inode), FIND_WR_ANY, &cfile);
2754         if (rc) {
2755                 cifs_dbg(VFS, "No writable handle in writepages rc=%d\n", rc);
2756                 goto err_xid;
2757         }
2758
2759         rc = server->ops->wait_mtu_credits(server, cifs_sb->ctx->wsize,
2760                                            &wsize, credits);
2761         if (rc != 0)
2762                 goto err_close;
2763
2764         wdata = cifs_writedata_alloc(cifs_writev_complete);
2765         if (!wdata) {
2766                 rc = -ENOMEM;
2767                 goto err_uncredit;
2768         }
2769
2770         wdata->sync_mode = wbc->sync_mode;
2771         wdata->offset = folio_pos(folio);
2772         wdata->pid = cfile->pid;
2773         wdata->credits = credits_on_stack;
2774         wdata->cfile = cfile;
2775         wdata->server = server;
2776         cfile = NULL;
2777
2778         /* Find all consecutive lockable dirty pages, stopping when we find a
2779          * page that is not immediately lockable, is not dirty or is missing,
2780          * or we reach the end of the range.
2781          */
2782         if (start < i_size) {
2783                 /* Trim the write to the EOF; the extra data is ignored.  Also
2784                  * put an upper limit on the size of a single storedata op.
2785                  */
2786                 max_len = wsize;
2787                 max_len = min_t(unsigned long long, max_len, end - start + 1);
2788                 max_len = min_t(unsigned long long, max_len, i_size - start);
2789
2790                 if (len < max_len) {
2791                         int max_pages = INT_MAX;
2792
2793 #ifdef CONFIG_CIFS_SMB_DIRECT
2794                         if (server->smbd_conn)
2795                                 max_pages = server->smbd_conn->max_frmr_depth;
2796 #endif
2797                         max_pages -= folio_nr_pages(folio);
2798
2799                         if (max_pages > 0)
2800                                 cifs_extend_writeback(mapping, &count, start,
2801                                                       max_pages, max_len, &len);
2802                 }
2803                 len = min_t(loff_t, len, max_len);
2804         }
2805
2806         wdata->bytes = len;
2807
2808         /* We now have a contiguous set of dirty pages, each with writeback
2809          * set; the first page is still locked at this point, but all the rest
2810          * have been unlocked.
2811          */
2812         folio_unlock(folio);
2813
2814         if (start < i_size) {
2815                 iov_iter_xarray(&wdata->iter, ITER_SOURCE, &mapping->i_pages,
2816                                 start, len);
2817
2818                 rc = adjust_credits(wdata->server, &wdata->credits, wdata->bytes);
2819                 if (rc)
2820                         goto err_wdata;
2821
2822                 if (wdata->cfile->invalidHandle)
2823                         rc = -EAGAIN;
2824                 else
2825                         rc = wdata->server->ops->async_writev(wdata,
2826                                                               cifs_writedata_release);
2827                 if (rc >= 0) {
2828                         kref_put(&wdata->refcount, cifs_writedata_release);
2829                         goto err_close;
2830                 }
2831         } else {
2832                 /* The dirty region was entirely beyond the EOF. */
2833                 cifs_pages_written_back(inode, start, len);
2834                 rc = 0;
2835         }
2836
2837 err_wdata:
2838         kref_put(&wdata->refcount, cifs_writedata_release);
2839 err_uncredit:
2840         add_credits_and_wake_if(server, credits, 0);
2841 err_close:
2842         if (cfile)
2843                 cifsFileInfo_put(cfile);
2844 err_xid:
2845         free_xid(xid);
2846         if (rc == 0) {
2847                 wbc->nr_to_write = count;
2848                 rc = len;
2849         } else if (is_retryable_error(rc)) {
2850                 cifs_pages_write_redirty(inode, start, len);
2851         } else {
2852                 cifs_pages_write_failed(inode, start, len);
2853                 mapping_set_error(mapping, rc);
2854         }
2855         /* Indication to update ctime and mtime as close is deferred */
2856         set_bit(CIFS_INO_MODIFIED_ATTR, &CIFS_I(inode)->flags);
2857         return rc;
2858 }
2859
2860 /*
2861  * write a region of pages back to the server
2862  */
2863 static int cifs_writepages_region(struct address_space *mapping,
2864                                   struct writeback_control *wbc,
2865                                   loff_t start, loff_t end, loff_t *_next)
2866 {
2867         struct folio_batch fbatch;
2868         int skips = 0;
2869
2870         folio_batch_init(&fbatch);
2871         do {
2872                 int nr;
2873                 pgoff_t index = start / PAGE_SIZE;
2874
2875                 nr = filemap_get_folios_tag(mapping, &index, end / PAGE_SIZE,
2876                                             PAGECACHE_TAG_DIRTY, &fbatch);
2877                 if (!nr)
2878                         break;
2879
2880                 for (int i = 0; i < nr; i++) {
2881                         ssize_t ret;
2882                         struct folio *folio = fbatch.folios[i];
2883
2884 redo_folio:
2885                         start = folio_pos(folio); /* May regress with THPs */
2886
2887                         /* At this point we hold neither the i_pages lock nor the
2888                          * page lock: the page may be truncated or invalidated
2889                          * (changing page->mapping to NULL), or even swizzled
2890                          * back from swapper_space to tmpfs file mapping
2891                          */
2892                         if (wbc->sync_mode != WB_SYNC_NONE) {
2893                                 ret = folio_lock_killable(folio);
2894                                 if (ret < 0)
2895                                         goto write_error;
2896                         } else {
2897                                 if (!folio_trylock(folio))
2898                                         goto skip_write;
2899                         }
2900
2901                         if (folio_mapping(folio) != mapping ||
2902                             !folio_test_dirty(folio)) {
2903                                 start += folio_size(folio);
2904                                 folio_unlock(folio);
2905                                 continue;
2906                         }
2907
2908                         if (folio_test_writeback(folio) ||
2909                             folio_test_fscache(folio)) {
2910                                 folio_unlock(folio);
2911                                 if (wbc->sync_mode == WB_SYNC_NONE)
2912                                         goto skip_write;
2913
2914                                 folio_wait_writeback(folio);
2915 #ifdef CONFIG_CIFS_FSCACHE
2916                                 folio_wait_fscache(folio);
2917 #endif
2918                                 goto redo_folio;
2919                         }
2920
2921                         if (!folio_clear_dirty_for_io(folio))
2922                                 /* We hold the page lock - it should've been dirty. */
2923                                 WARN_ON(1);
2924
2925                         ret = cifs_write_back_from_locked_folio(mapping, wbc, folio, start, end);
2926                         if (ret < 0)
2927                                 goto write_error;
2928
2929                         start += ret;
2930                         continue;
2931
2932 write_error:
2933                         folio_batch_release(&fbatch);
2934                         *_next = start;
2935                         return ret;
2936
2937 skip_write:
2938                         /*
2939                          * Too many skipped writes, or need to reschedule?
2940                          * Treat it as a write error without an error code.
2941                          */
2942                         if (skips >= 5 || need_resched()) {
2943                                 ret = 0;
2944                                 goto write_error;
2945                         }
2946
2947                         /* Otherwise, just skip that folio and go on to the next */
2948                         skips++;
2949                         start += folio_size(folio);
2950                         continue;
2951                 }
2952
2953                 folio_batch_release(&fbatch);           
2954                 cond_resched();
2955         } while (wbc->nr_to_write > 0);
2956
2957         *_next = start;
2958         return 0;
2959 }
2960
2961 /*
2962  * Write some of the pending data back to the server
2963  */
2964 static int cifs_writepages(struct address_space *mapping,
2965                            struct writeback_control *wbc)
2966 {
2967         loff_t start, next;
2968         int ret;
2969
2970         /* We have to be careful as we can end up racing with setattr()
2971          * truncating the pagecache since the caller doesn't take a lock here
2972          * to prevent it.
2973          */
2974
2975         if (wbc->range_cyclic) {
2976                 start = mapping->writeback_index * PAGE_SIZE;
2977                 ret = cifs_writepages_region(mapping, wbc, start, LLONG_MAX, &next);
2978                 if (ret == 0) {
2979                         mapping->writeback_index = next / PAGE_SIZE;
2980                         if (start > 0 && wbc->nr_to_write > 0) {
2981                                 ret = cifs_writepages_region(mapping, wbc, 0,
2982                                                              start, &next);
2983                                 if (ret == 0)
2984                                         mapping->writeback_index =
2985                                                 next / PAGE_SIZE;
2986                         }
2987                 }
2988         } else if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX) {
2989                 ret = cifs_writepages_region(mapping, wbc, 0, LLONG_MAX, &next);
2990                 if (wbc->nr_to_write > 0 && ret == 0)
2991                         mapping->writeback_index = next / PAGE_SIZE;
2992         } else {
2993                 ret = cifs_writepages_region(mapping, wbc,
2994                                              wbc->range_start, wbc->range_end, &next);
2995         }
2996
2997         return ret;
2998 }
2999
3000 static int
3001 cifs_writepage_locked(struct page *page, struct writeback_control *wbc)
3002 {
3003         int rc;
3004         unsigned int xid;
3005
3006         xid = get_xid();
3007 /* BB add check for wbc flags */
3008         get_page(page);
3009         if (!PageUptodate(page))
3010                 cifs_dbg(FYI, "ppw - page not up to date\n");
3011
3012         /*
3013          * Set the "writeback" flag, and clear "dirty" in the radix tree.
3014          *
3015          * A writepage() implementation always needs to do either this,
3016          * or re-dirty the page with "redirty_page_for_writepage()" in
3017          * the case of a failure.
3018          *
3019          * Just unlocking the page will cause the radix tree tag-bits
3020          * to fail to update with the state of the page correctly.
3021          */
3022         set_page_writeback(page);
3023 retry_write:
3024         rc = cifs_partialpagewrite(page, 0, PAGE_SIZE);
3025         if (is_retryable_error(rc)) {
3026                 if (wbc->sync_mode == WB_SYNC_ALL && rc == -EAGAIN)
3027                         goto retry_write;
3028                 redirty_page_for_writepage(wbc, page);
3029         } else if (rc != 0) {
3030                 SetPageError(page);
3031                 mapping_set_error(page->mapping, rc);
3032         } else {
3033                 SetPageUptodate(page);
3034         }
3035         end_page_writeback(page);
3036         put_page(page);
3037         free_xid(xid);
3038         return rc;
3039 }
3040
3041 static int cifs_write_end(struct file *file, struct address_space *mapping,
3042                         loff_t pos, unsigned len, unsigned copied,
3043                         struct page *page, void *fsdata)
3044 {
3045         int rc;
3046         struct inode *inode = mapping->host;
3047         struct cifsFileInfo *cfile = file->private_data;
3048         struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
3049         struct folio *folio = page_folio(page);
3050         __u32 pid;
3051
3052         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
3053                 pid = cfile->pid;
3054         else
3055                 pid = current->tgid;
3056
3057         cifs_dbg(FYI, "write_end for page %p from pos %lld with %d bytes\n",
3058                  page, pos, copied);
3059
3060         if (folio_test_checked(folio)) {
3061                 if (copied == len)
3062                         folio_mark_uptodate(folio);
3063                 folio_clear_checked(folio);
3064         } else if (!folio_test_uptodate(folio) && copied == PAGE_SIZE)
3065                 folio_mark_uptodate(folio);
3066
3067         if (!folio_test_uptodate(folio)) {
3068                 char *page_data;
3069                 unsigned offset = pos & (PAGE_SIZE - 1);
3070                 unsigned int xid;
3071
3072                 xid = get_xid();
3073                 /* this is probably better than directly calling
3074                    partialpage_write since in this function the file handle is
3075                    known which we might as well leverage */
3076                 /* BB check if anything else missing out of ppw
3077                    such as updating last write time */
3078                 page_data = kmap(page);
3079                 rc = cifs_write(cfile, pid, page_data + offset, copied, &pos);
3080                 /* if (rc < 0) should we set writebehind rc? */
3081                 kunmap(page);
3082
3083                 free_xid(xid);
3084         } else {
3085                 rc = copied;
3086                 pos += copied;
3087                 set_page_dirty(page);
3088         }
3089
3090         if (rc > 0) {
3091                 spin_lock(&inode->i_lock);
3092                 if (pos > inode->i_size) {
3093                         i_size_write(inode, pos);
3094                         inode->i_blocks = (512 - 1 + pos) >> 9;
3095                 }
3096                 spin_unlock(&inode->i_lock);
3097         }
3098
3099         unlock_page(page);
3100         put_page(page);
3101         /* Indication to update ctime and mtime as close is deferred */
3102         set_bit(CIFS_INO_MODIFIED_ATTR, &CIFS_I(inode)->flags);
3103
3104         return rc;
3105 }
3106
3107 int cifs_strict_fsync(struct file *file, loff_t start, loff_t end,
3108                       int datasync)
3109 {
3110         unsigned int xid;
3111         int rc = 0;
3112         struct cifs_tcon *tcon;
3113         struct TCP_Server_Info *server;
3114         struct cifsFileInfo *smbfile = file->private_data;
3115         struct inode *inode = file_inode(file);
3116         struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
3117
3118         rc = file_write_and_wait_range(file, start, end);
3119         if (rc) {
3120                 trace_cifs_fsync_err(inode->i_ino, rc);
3121                 return rc;
3122         }
3123
3124         xid = get_xid();
3125
3126         cifs_dbg(FYI, "Sync file - name: %pD datasync: 0x%x\n",
3127                  file, datasync);
3128
3129         if (!CIFS_CACHE_READ(CIFS_I(inode))) {
3130                 rc = cifs_zap_mapping(inode);
3131                 if (rc) {
3132                         cifs_dbg(FYI, "rc: %d during invalidate phase\n", rc);
3133                         rc = 0; /* don't care about it in fsync */
3134                 }
3135         }
3136
3137         tcon = tlink_tcon(smbfile->tlink);
3138         if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) {
3139                 server = tcon->ses->server;
3140                 if (server->ops->flush == NULL) {
3141                         rc = -ENOSYS;
3142                         goto strict_fsync_exit;
3143                 }
3144
3145                 if ((OPEN_FMODE(smbfile->f_flags) & FMODE_WRITE) == 0) {
3146                         smbfile = find_writable_file(CIFS_I(inode), FIND_WR_ANY);
3147                         if (smbfile) {
3148                                 rc = server->ops->flush(xid, tcon, &smbfile->fid);
3149                                 cifsFileInfo_put(smbfile);
3150                         } else
3151                                 cifs_dbg(FYI, "ignore fsync for file not open for write\n");
3152                 } else
3153                         rc = server->ops->flush(xid, tcon, &smbfile->fid);
3154         }
3155
3156 strict_fsync_exit:
3157         free_xid(xid);
3158         return rc;
3159 }
3160
3161 int cifs_fsync(struct file *file, loff_t start, loff_t end, int datasync)
3162 {
3163         unsigned int xid;
3164         int rc = 0;
3165         struct cifs_tcon *tcon;
3166         struct TCP_Server_Info *server;
3167         struct cifsFileInfo *smbfile = file->private_data;
3168         struct inode *inode = file_inode(file);
3169         struct cifs_sb_info *cifs_sb = CIFS_FILE_SB(file);
3170
3171         rc = file_write_and_wait_range(file, start, end);
3172         if (rc) {
3173                 trace_cifs_fsync_err(file_inode(file)->i_ino, rc);
3174                 return rc;
3175         }
3176
3177         xid = get_xid();
3178
3179         cifs_dbg(FYI, "Sync file - name: %pD datasync: 0x%x\n",
3180                  file, datasync);
3181
3182         tcon = tlink_tcon(smbfile->tlink);
3183         if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) {
3184                 server = tcon->ses->server;
3185                 if (server->ops->flush == NULL) {
3186                         rc = -ENOSYS;
3187                         goto fsync_exit;
3188                 }
3189
3190                 if ((OPEN_FMODE(smbfile->f_flags) & FMODE_WRITE) == 0) {
3191                         smbfile = find_writable_file(CIFS_I(inode), FIND_WR_ANY);
3192                         if (smbfile) {
3193                                 rc = server->ops->flush(xid, tcon, &smbfile->fid);
3194                                 cifsFileInfo_put(smbfile);
3195                         } else
3196                                 cifs_dbg(FYI, "ignore fsync for file not open for write\n");
3197                 } else
3198                         rc = server->ops->flush(xid, tcon, &smbfile->fid);
3199         }
3200
3201 fsync_exit:
3202         free_xid(xid);
3203         return rc;
3204 }
3205
3206 /*
3207  * As file closes, flush all cached write data for this inode checking
3208  * for write behind errors.
3209  */
3210 int cifs_flush(struct file *file, fl_owner_t id)
3211 {
3212         struct inode *inode = file_inode(file);
3213         int rc = 0;
3214
3215         if (file->f_mode & FMODE_WRITE)
3216                 rc = filemap_write_and_wait(inode->i_mapping);
3217
3218         cifs_dbg(FYI, "Flush inode %p file %p rc %d\n", inode, file, rc);
3219         if (rc) {
3220                 /* get more nuanced writeback errors */
3221                 rc = filemap_check_wb_err(file->f_mapping, 0);
3222                 trace_cifs_flush_err(inode->i_ino, rc);
3223         }
3224         return rc;
3225 }
3226
3227 static void
3228 cifs_uncached_writedata_release(struct kref *refcount)
3229 {
3230         struct cifs_writedata *wdata = container_of(refcount,
3231                                         struct cifs_writedata, refcount);
3232
3233         kref_put(&wdata->ctx->refcount, cifs_aio_ctx_release);
3234         cifs_writedata_release(refcount);
3235 }
3236
3237 static void collect_uncached_write_data(struct cifs_aio_ctx *ctx);
3238
3239 static void
3240 cifs_uncached_writev_complete(struct work_struct *work)
3241 {
3242         struct cifs_writedata *wdata = container_of(work,
3243                                         struct cifs_writedata, work);
3244         struct inode *inode = d_inode(wdata->cfile->dentry);
3245         struct cifsInodeInfo *cifsi = CIFS_I(inode);
3246
3247         spin_lock(&inode->i_lock);
3248         cifs_update_eof(cifsi, wdata->offset, wdata->bytes);
3249         if (cifsi->server_eof > inode->i_size)
3250                 i_size_write(inode, cifsi->server_eof);
3251         spin_unlock(&inode->i_lock);
3252
3253         complete(&wdata->done);
3254         collect_uncached_write_data(wdata->ctx);
3255         /* the below call can possibly free the last ref to aio ctx */
3256         kref_put(&wdata->refcount, cifs_uncached_writedata_release);
3257 }
3258
3259 static int
3260 cifs_resend_wdata(struct cifs_writedata *wdata, struct list_head *wdata_list,
3261         struct cifs_aio_ctx *ctx)
3262 {
3263         unsigned int wsize;
3264         struct cifs_credits credits;
3265         int rc;
3266         struct TCP_Server_Info *server = wdata->server;
3267
3268         do {
3269                 if (wdata->cfile->invalidHandle) {
3270                         rc = cifs_reopen_file(wdata->cfile, false);
3271                         if (rc == -EAGAIN)
3272                                 continue;
3273                         else if (rc)
3274                                 break;
3275                 }
3276
3277
3278                 /*
3279                  * Wait for credits to resend this wdata.
3280                  * Note: we are attempting to resend the whole wdata not in
3281                  * segments
3282                  */
3283                 do {
3284                         rc = server->ops->wait_mtu_credits(server, wdata->bytes,
3285                                                 &wsize, &credits);
3286                         if (rc)
3287                                 goto fail;
3288
3289                         if (wsize < wdata->bytes) {
3290                                 add_credits_and_wake_if(server, &credits, 0);
3291                                 msleep(1000);
3292                         }
3293                 } while (wsize < wdata->bytes);
3294                 wdata->credits = credits;
3295
3296                 rc = adjust_credits(server, &wdata->credits, wdata->bytes);
3297
3298                 if (!rc) {
3299                         if (wdata->cfile->invalidHandle)
3300                                 rc = -EAGAIN;
3301                         else {
3302 #ifdef CONFIG_CIFS_SMB_DIRECT
3303                                 if (wdata->mr) {
3304                                         wdata->mr->need_invalidate = true;
3305                                         smbd_deregister_mr(wdata->mr);
3306                                         wdata->mr = NULL;
3307                                 }
3308 #endif
3309                                 rc = server->ops->async_writev(wdata,
3310                                         cifs_uncached_writedata_release);
3311                         }
3312                 }
3313
3314                 /* If the write was successfully sent, we are done */
3315                 if (!rc) {
3316                         list_add_tail(&wdata->list, wdata_list);
3317                         return 0;
3318                 }
3319
3320                 /* Roll back credits and retry if needed */
3321                 add_credits_and_wake_if(server, &wdata->credits, 0);
3322         } while (rc == -EAGAIN);
3323
3324 fail:
3325         kref_put(&wdata->refcount, cifs_uncached_writedata_release);
3326         return rc;
3327 }
3328
3329 /*
3330  * Select span of a bvec iterator we're going to use.  Limit it by both maximum
3331  * size and maximum number of segments.
3332  */
3333 static size_t cifs_limit_bvec_subset(const struct iov_iter *iter, size_t max_size,
3334                                      size_t max_segs, unsigned int *_nsegs)
3335 {
3336         const struct bio_vec *bvecs = iter->bvec;
3337         unsigned int nbv = iter->nr_segs, ix = 0, nsegs = 0;
3338         size_t len, span = 0, n = iter->count;
3339         size_t skip = iter->iov_offset;
3340
3341         if (WARN_ON(!iov_iter_is_bvec(iter)) || n == 0)
3342                 return 0;
3343
3344         while (n && ix < nbv && skip) {
3345                 len = bvecs[ix].bv_len;
3346                 if (skip < len)
3347                         break;
3348                 skip -= len;
3349                 n -= len;
3350                 ix++;
3351         }
3352
3353         while (n && ix < nbv) {
3354                 len = min3(n, bvecs[ix].bv_len - skip, max_size);
3355                 span += len;
3356                 max_size -= len;
3357                 nsegs++;
3358                 ix++;
3359                 if (max_size == 0 || nsegs >= max_segs)
3360                         break;
3361                 skip = 0;
3362                 n -= len;
3363         }
3364
3365         *_nsegs = nsegs;
3366         return span;
3367 }
3368
3369 static int
3370 cifs_write_from_iter(loff_t fpos, size_t len, struct iov_iter *from,
3371                      struct cifsFileInfo *open_file,
3372                      struct cifs_sb_info *cifs_sb, struct list_head *wdata_list,
3373                      struct cifs_aio_ctx *ctx)
3374 {
3375         int rc = 0;
3376         size_t cur_len, max_len;
3377         struct cifs_writedata *wdata;
3378         pid_t pid;
3379         struct TCP_Server_Info *server;
3380         unsigned int xid, max_segs = INT_MAX;
3381
3382         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
3383                 pid = open_file->pid;
3384         else
3385                 pid = current->tgid;
3386
3387         server = cifs_pick_channel(tlink_tcon(open_file->tlink)->ses);
3388         xid = get_xid();
3389
3390 #ifdef CONFIG_CIFS_SMB_DIRECT
3391         if (server->smbd_conn)
3392                 max_segs = server->smbd_conn->max_frmr_depth;
3393 #endif
3394
3395         do {
3396                 struct cifs_credits credits_on_stack;
3397                 struct cifs_credits *credits = &credits_on_stack;
3398                 unsigned int wsize, nsegs = 0;
3399
3400                 if (signal_pending(current)) {
3401                         rc = -EINTR;
3402                         break;
3403                 }
3404
3405                 if (open_file->invalidHandle) {
3406                         rc = cifs_reopen_file(open_file, false);
3407                         if (rc == -EAGAIN)
3408                                 continue;
3409                         else if (rc)
3410                                 break;
3411                 }
3412
3413                 rc = server->ops->wait_mtu_credits(server, cifs_sb->ctx->wsize,
3414                                                    &wsize, credits);
3415                 if (rc)
3416                         break;
3417
3418                 max_len = min_t(const size_t, len, wsize);
3419                 if (!max_len) {
3420                         rc = -EAGAIN;
3421                         add_credits_and_wake_if(server, credits, 0);
3422                         break;
3423                 }
3424
3425                 cur_len = cifs_limit_bvec_subset(from, max_len, max_segs, &nsegs);
3426                 cifs_dbg(FYI, "write_from_iter len=%zx/%zx nsegs=%u/%lu/%u\n",
3427                          cur_len, max_len, nsegs, from->nr_segs, max_segs);
3428                 if (cur_len == 0) {
3429                         rc = -EIO;
3430                         add_credits_and_wake_if(server, credits, 0);
3431                         break;
3432                 }
3433
3434                 wdata = cifs_writedata_alloc(cifs_uncached_writev_complete);
3435                 if (!wdata) {
3436                         rc = -ENOMEM;
3437                         add_credits_and_wake_if(server, credits, 0);
3438                         break;
3439                 }
3440
3441                 wdata->sync_mode = WB_SYNC_ALL;
3442                 wdata->offset   = (__u64)fpos;
3443                 wdata->cfile    = cifsFileInfo_get(open_file);
3444                 wdata->server   = server;
3445                 wdata->pid      = pid;
3446                 wdata->bytes    = cur_len;
3447                 wdata->credits  = credits_on_stack;
3448                 wdata->iter     = *from;
3449                 wdata->ctx      = ctx;
3450                 kref_get(&ctx->refcount);
3451
3452                 iov_iter_truncate(&wdata->iter, cur_len);
3453
3454                 rc = adjust_credits(server, &wdata->credits, wdata->bytes);
3455
3456                 if (!rc) {
3457                         if (wdata->cfile->invalidHandle)
3458                                 rc = -EAGAIN;
3459                         else
3460                                 rc = server->ops->async_writev(wdata,
3461                                         cifs_uncached_writedata_release);
3462                 }
3463
3464                 if (rc) {
3465                         add_credits_and_wake_if(server, &wdata->credits, 0);
3466                         kref_put(&wdata->refcount,
3467                                  cifs_uncached_writedata_release);
3468                         if (rc == -EAGAIN)
3469                                 continue;
3470                         break;
3471                 }
3472
3473                 list_add_tail(&wdata->list, wdata_list);
3474                 iov_iter_advance(from, cur_len);
3475                 fpos += cur_len;
3476                 len -= cur_len;
3477         } while (len > 0);
3478
3479         free_xid(xid);
3480         return rc;
3481 }
3482
3483 static void collect_uncached_write_data(struct cifs_aio_ctx *ctx)
3484 {
3485         struct cifs_writedata *wdata, *tmp;
3486         struct cifs_tcon *tcon;
3487         struct cifs_sb_info *cifs_sb;
3488         struct dentry *dentry = ctx->cfile->dentry;
3489         ssize_t rc;
3490
3491         tcon = tlink_tcon(ctx->cfile->tlink);
3492         cifs_sb = CIFS_SB(dentry->d_sb);
3493
3494         mutex_lock(&ctx->aio_mutex);
3495
3496         if (list_empty(&ctx->list)) {
3497                 mutex_unlock(&ctx->aio_mutex);
3498                 return;
3499         }
3500
3501         rc = ctx->rc;
3502         /*
3503          * Wait for and collect replies for any successful sends in order of
3504          * increasing offset. Once an error is hit, then return without waiting
3505          * for any more replies.
3506          */
3507 restart_loop:
3508         list_for_each_entry_safe(wdata, tmp, &ctx->list, list) {
3509                 if (!rc) {
3510                         if (!try_wait_for_completion(&wdata->done)) {
3511                                 mutex_unlock(&ctx->aio_mutex);
3512                                 return;
3513                         }
3514
3515                         if (wdata->result)
3516                                 rc = wdata->result;
3517                         else
3518                                 ctx->total_len += wdata->bytes;
3519
3520                         /* resend call if it's a retryable error */
3521                         if (rc == -EAGAIN) {
3522                                 struct list_head tmp_list;
3523                                 struct iov_iter tmp_from = ctx->iter;
3524
3525                                 INIT_LIST_HEAD(&tmp_list);
3526                                 list_del_init(&wdata->list);
3527
3528                                 if (ctx->direct_io)
3529                                         rc = cifs_resend_wdata(
3530                                                 wdata, &tmp_list, ctx);
3531                                 else {
3532                                         iov_iter_advance(&tmp_from,
3533                                                  wdata->offset - ctx->pos);
3534
3535                                         rc = cifs_write_from_iter(wdata->offset,
3536                                                 wdata->bytes, &tmp_from,
3537                                                 ctx->cfile, cifs_sb, &tmp_list,
3538                                                 ctx);
3539
3540                                         kref_put(&wdata->refcount,
3541                                                 cifs_uncached_writedata_release);
3542                                 }
3543
3544                                 list_splice(&tmp_list, &ctx->list);
3545                                 goto restart_loop;
3546                         }
3547                 }
3548                 list_del_init(&wdata->list);
3549                 kref_put(&wdata->refcount, cifs_uncached_writedata_release);
3550         }
3551
3552         cifs_stats_bytes_written(tcon, ctx->total_len);
3553         set_bit(CIFS_INO_INVALID_MAPPING, &CIFS_I(dentry->d_inode)->flags);
3554
3555         ctx->rc = (rc == 0) ? ctx->total_len : rc;
3556
3557         mutex_unlock(&ctx->aio_mutex);
3558
3559         if (ctx->iocb && ctx->iocb->ki_complete)
3560                 ctx->iocb->ki_complete(ctx->iocb, ctx->rc);
3561         else
3562                 complete(&ctx->done);
3563 }
3564
3565 static ssize_t __cifs_writev(
3566         struct kiocb *iocb, struct iov_iter *from, bool direct)
3567 {
3568         struct file *file = iocb->ki_filp;
3569         ssize_t total_written = 0;
3570         struct cifsFileInfo *cfile;
3571         struct cifs_tcon *tcon;
3572         struct cifs_sb_info *cifs_sb;
3573         struct cifs_aio_ctx *ctx;
3574         int rc;
3575
3576         rc = generic_write_checks(iocb, from);
3577         if (rc <= 0)
3578                 return rc;
3579
3580         cifs_sb = CIFS_FILE_SB(file);
3581         cfile = file->private_data;
3582         tcon = tlink_tcon(cfile->tlink);
3583
3584         if (!tcon->ses->server->ops->async_writev)
3585                 return -ENOSYS;
3586
3587         ctx = cifs_aio_ctx_alloc();
3588         if (!ctx)
3589                 return -ENOMEM;
3590
3591         ctx->cfile = cifsFileInfo_get(cfile);
3592
3593         if (!is_sync_kiocb(iocb))
3594                 ctx->iocb = iocb;
3595
3596         ctx->pos = iocb->ki_pos;
3597         ctx->direct_io = direct;
3598         ctx->nr_pinned_pages = 0;
3599
3600         if (user_backed_iter(from)) {
3601                 /*
3602                  * Extract IOVEC/UBUF-type iterators to a BVEC-type iterator as
3603                  * they contain references to the calling process's virtual
3604                  * memory layout which won't be available in an async worker
3605                  * thread.  This also takes a pin on every folio involved.
3606                  */
3607                 rc = netfs_extract_user_iter(from, iov_iter_count(from),
3608                                              &ctx->iter, 0);
3609                 if (rc < 0) {
3610                         kref_put(&ctx->refcount, cifs_aio_ctx_release);
3611                         return rc;
3612                 }
3613
3614                 ctx->nr_pinned_pages = rc;
3615                 ctx->bv = (void *)ctx->iter.bvec;
3616                 ctx->bv_need_unpin = iov_iter_extract_will_pin(from);
3617         } else if ((iov_iter_is_bvec(from) || iov_iter_is_kvec(from)) &&
3618                    !is_sync_kiocb(iocb)) {
3619                 /*
3620                  * If the op is asynchronous, we need to copy the list attached
3621                  * to a BVEC/KVEC-type iterator, but we assume that the storage
3622                  * will be pinned by the caller; in any case, we may or may not
3623                  * be able to pin the pages, so we don't try.
3624                  */
3625                 ctx->bv = (void *)dup_iter(&ctx->iter, from, GFP_KERNEL);
3626                 if (!ctx->bv) {
3627                         kref_put(&ctx->refcount, cifs_aio_ctx_release);
3628                         return -ENOMEM;
3629                 }
3630         } else {
3631                 /*
3632                  * Otherwise, we just pass the iterator down as-is and rely on
3633                  * the caller to make sure the pages referred to by the
3634                  * iterator don't evaporate.
3635                  */
3636                 ctx->iter = *from;
3637         }
3638
3639         ctx->len = iov_iter_count(&ctx->iter);
3640
3641         /* grab a lock here due to read response handlers can access ctx */
3642         mutex_lock(&ctx->aio_mutex);
3643
3644         rc = cifs_write_from_iter(iocb->ki_pos, ctx->len, &ctx->iter,
3645                                   cfile, cifs_sb, &ctx->list, ctx);
3646
3647         /*
3648          * If at least one write was successfully sent, then discard any rc
3649          * value from the later writes. If the other write succeeds, then
3650          * we'll end up returning whatever was written. If it fails, then
3651          * we'll get a new rc value from that.
3652          */
3653         if (!list_empty(&ctx->list))
3654                 rc = 0;
3655
3656         mutex_unlock(&ctx->aio_mutex);
3657
3658         if (rc) {
3659                 kref_put(&ctx->refcount, cifs_aio_ctx_release);
3660                 return rc;
3661         }
3662
3663         if (!is_sync_kiocb(iocb)) {
3664                 kref_put(&ctx->refcount, cifs_aio_ctx_release);
3665                 return -EIOCBQUEUED;
3666         }
3667
3668         rc = wait_for_completion_killable(&ctx->done);
3669         if (rc) {
3670                 mutex_lock(&ctx->aio_mutex);
3671                 ctx->rc = rc = -EINTR;
3672                 total_written = ctx->total_len;
3673                 mutex_unlock(&ctx->aio_mutex);
3674         } else {
3675                 rc = ctx->rc;
3676                 total_written = ctx->total_len;
3677         }
3678
3679         kref_put(&ctx->refcount, cifs_aio_ctx_release);
3680
3681         if (unlikely(!total_written))
3682                 return rc;
3683
3684         iocb->ki_pos += total_written;
3685         return total_written;
3686 }
3687
3688 ssize_t cifs_direct_writev(struct kiocb *iocb, struct iov_iter *from)
3689 {
3690         struct file *file = iocb->ki_filp;
3691
3692         cifs_revalidate_mapping(file->f_inode);
3693         return __cifs_writev(iocb, from, true);
3694 }
3695
3696 ssize_t cifs_user_writev(struct kiocb *iocb, struct iov_iter *from)
3697 {
3698         return __cifs_writev(iocb, from, false);
3699 }
3700
3701 static ssize_t
3702 cifs_writev(struct kiocb *iocb, struct iov_iter *from)
3703 {
3704         struct file *file = iocb->ki_filp;
3705         struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
3706         struct inode *inode = file->f_mapping->host;
3707         struct cifsInodeInfo *cinode = CIFS_I(inode);
3708         struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
3709         ssize_t rc;
3710
3711         inode_lock(inode);
3712         /*
3713          * We need to hold the sem to be sure nobody modifies lock list
3714          * with a brlock that prevents writing.
3715          */
3716         down_read(&cinode->lock_sem);
3717
3718         rc = generic_write_checks(iocb, from);
3719         if (rc <= 0)
3720                 goto out;
3721
3722         if (!cifs_find_lock_conflict(cfile, iocb->ki_pos, iov_iter_count(from),
3723                                      server->vals->exclusive_lock_type, 0,
3724                                      NULL, CIFS_WRITE_OP))
3725                 rc = __generic_file_write_iter(iocb, from);
3726         else
3727                 rc = -EACCES;
3728 out:
3729         up_read(&cinode->lock_sem);
3730         inode_unlock(inode);
3731
3732         if (rc > 0)
3733                 rc = generic_write_sync(iocb, rc);
3734         return rc;
3735 }
3736
3737 ssize_t
3738 cifs_strict_writev(struct kiocb *iocb, struct iov_iter *from)
3739 {
3740         struct inode *inode = file_inode(iocb->ki_filp);
3741         struct cifsInodeInfo *cinode = CIFS_I(inode);
3742         struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
3743         struct cifsFileInfo *cfile = (struct cifsFileInfo *)
3744                                                 iocb->ki_filp->private_data;
3745         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
3746         ssize_t written;
3747
3748         written = cifs_get_writer(cinode);
3749         if (written)
3750                 return written;
3751
3752         if (CIFS_CACHE_WRITE(cinode)) {
3753                 if (cap_unix(tcon->ses) &&
3754                 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability))
3755                   && ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0)) {
3756                         written = generic_file_write_iter(iocb, from);
3757                         goto out;
3758                 }
3759                 written = cifs_writev(iocb, from);
3760                 goto out;
3761         }
3762         /*
3763          * For non-oplocked files in strict cache mode we need to write the data
3764          * to the server exactly from the pos to pos+len-1 rather than flush all
3765          * affected pages because it may cause a error with mandatory locks on
3766          * these pages but not on the region from pos to ppos+len-1.
3767          */
3768         written = cifs_user_writev(iocb, from);
3769         if (CIFS_CACHE_READ(cinode)) {
3770                 /*
3771                  * We have read level caching and we have just sent a write
3772                  * request to the server thus making data in the cache stale.
3773                  * Zap the cache and set oplock/lease level to NONE to avoid
3774                  * reading stale data from the cache. All subsequent read
3775                  * operations will read new data from the server.
3776                  */
3777                 cifs_zap_mapping(inode);
3778                 cifs_dbg(FYI, "Set Oplock/Lease to NONE for inode=%p after write\n",
3779                          inode);
3780                 cinode->oplock = 0;
3781         }
3782 out:
3783         cifs_put_writer(cinode);
3784         return written;
3785 }
3786
3787 static struct cifs_readdata *cifs_readdata_alloc(work_func_t complete)
3788 {
3789         struct cifs_readdata *rdata;
3790
3791         rdata = kzalloc(sizeof(*rdata), GFP_KERNEL);
3792         if (rdata) {
3793                 kref_init(&rdata->refcount);
3794                 INIT_LIST_HEAD(&rdata->list);
3795                 init_completion(&rdata->done);
3796                 INIT_WORK(&rdata->work, complete);
3797         }
3798
3799         return rdata;
3800 }
3801
3802 void
3803 cifs_readdata_release(struct kref *refcount)
3804 {
3805         struct cifs_readdata *rdata = container_of(refcount,
3806                                         struct cifs_readdata, refcount);
3807
3808         if (rdata->ctx)
3809                 kref_put(&rdata->ctx->refcount, cifs_aio_ctx_release);
3810 #ifdef CONFIG_CIFS_SMB_DIRECT
3811         if (rdata->mr) {
3812                 smbd_deregister_mr(rdata->mr);
3813                 rdata->mr = NULL;
3814         }
3815 #endif
3816         if (rdata->cfile)
3817                 cifsFileInfo_put(rdata->cfile);
3818
3819         kfree(rdata);
3820 }
3821
3822 static void collect_uncached_read_data(struct cifs_aio_ctx *ctx);
3823
3824 static void
3825 cifs_uncached_readv_complete(struct work_struct *work)
3826 {
3827         struct cifs_readdata *rdata = container_of(work,
3828                                                 struct cifs_readdata, work);
3829
3830         complete(&rdata->done);
3831         collect_uncached_read_data(rdata->ctx);
3832         /* the below call can possibly free the last ref to aio ctx */
3833         kref_put(&rdata->refcount, cifs_readdata_release);
3834 }
3835
3836 static int cifs_resend_rdata(struct cifs_readdata *rdata,
3837                         struct list_head *rdata_list,
3838                         struct cifs_aio_ctx *ctx)
3839 {
3840         unsigned int rsize;
3841         struct cifs_credits credits;
3842         int rc;
3843         struct TCP_Server_Info *server;
3844
3845         /* XXX: should we pick a new channel here? */
3846         server = rdata->server;
3847
3848         do {
3849                 if (rdata->cfile->invalidHandle) {
3850                         rc = cifs_reopen_file(rdata->cfile, true);
3851                         if (rc == -EAGAIN)
3852                                 continue;
3853                         else if (rc)
3854                                 break;
3855                 }
3856
3857                 /*
3858                  * Wait for credits to resend this rdata.
3859                  * Note: we are attempting to resend the whole rdata not in
3860                  * segments
3861                  */
3862                 do {
3863                         rc = server->ops->wait_mtu_credits(server, rdata->bytes,
3864                                                 &rsize, &credits);
3865
3866                         if (rc)
3867                                 goto fail;
3868
3869                         if (rsize < rdata->bytes) {
3870                                 add_credits_and_wake_if(server, &credits, 0);
3871                                 msleep(1000);
3872                         }
3873                 } while (rsize < rdata->bytes);
3874                 rdata->credits = credits;
3875
3876                 rc = adjust_credits(server, &rdata->credits, rdata->bytes);
3877                 if (!rc) {
3878                         if (rdata->cfile->invalidHandle)
3879                                 rc = -EAGAIN;
3880                         else {
3881 #ifdef CONFIG_CIFS_SMB_DIRECT
3882                                 if (rdata->mr) {
3883                                         rdata->mr->need_invalidate = true;
3884                                         smbd_deregister_mr(rdata->mr);
3885                                         rdata->mr = NULL;
3886                                 }
3887 #endif
3888                                 rc = server->ops->async_readv(rdata);
3889                         }
3890                 }
3891
3892                 /* If the read was successfully sent, we are done */
3893                 if (!rc) {
3894                         /* Add to aio pending list */
3895                         list_add_tail(&rdata->list, rdata_list);
3896                         return 0;
3897                 }
3898
3899                 /* Roll back credits and retry if needed */
3900                 add_credits_and_wake_if(server, &rdata->credits, 0);
3901         } while (rc == -EAGAIN);
3902
3903 fail:
3904         kref_put(&rdata->refcount, cifs_readdata_release);
3905         return rc;
3906 }
3907
3908 static int
3909 cifs_send_async_read(loff_t fpos, size_t len, struct cifsFileInfo *open_file,
3910                      struct cifs_sb_info *cifs_sb, struct list_head *rdata_list,
3911                      struct cifs_aio_ctx *ctx)
3912 {
3913         struct cifs_readdata *rdata;
3914         unsigned int rsize, nsegs, max_segs = INT_MAX;
3915         struct cifs_credits credits_on_stack;
3916         struct cifs_credits *credits = &credits_on_stack;
3917         size_t cur_len, max_len;
3918         int rc;
3919         pid_t pid;
3920         struct TCP_Server_Info *server;
3921
3922         server = cifs_pick_channel(tlink_tcon(open_file->tlink)->ses);
3923
3924 #ifdef CONFIG_CIFS_SMB_DIRECT
3925         if (server->smbd_conn)
3926                 max_segs = server->smbd_conn->max_frmr_depth;
3927 #endif
3928
3929         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
3930                 pid = open_file->pid;
3931         else
3932                 pid = current->tgid;
3933
3934         do {
3935                 if (open_file->invalidHandle) {
3936                         rc = cifs_reopen_file(open_file, true);
3937                         if (rc == -EAGAIN)
3938                                 continue;
3939                         else if (rc)
3940                                 break;
3941                 }
3942
3943                 if (cifs_sb->ctx->rsize == 0)
3944                         cifs_sb->ctx->rsize =
3945                                 server->ops->negotiate_rsize(tlink_tcon(open_file->tlink),
3946                                                              cifs_sb->ctx);
3947
3948                 rc = server->ops->wait_mtu_credits(server, cifs_sb->ctx->rsize,
3949                                                    &rsize, credits);
3950                 if (rc)
3951                         break;
3952
3953                 max_len = min_t(size_t, len, rsize);
3954
3955                 cur_len = cifs_limit_bvec_subset(&ctx->iter, max_len,
3956                                                  max_segs, &nsegs);
3957                 cifs_dbg(FYI, "read-to-iter len=%zx/%zx nsegs=%u/%lu/%u\n",
3958                          cur_len, max_len, nsegs, ctx->iter.nr_segs, max_segs);
3959                 if (cur_len == 0) {
3960                         rc = -EIO;
3961                         add_credits_and_wake_if(server, credits, 0);
3962                         break;
3963                 }
3964
3965                 rdata = cifs_readdata_alloc(cifs_uncached_readv_complete);
3966                 if (!rdata) {
3967                         add_credits_and_wake_if(server, credits, 0);
3968                         rc = -ENOMEM;
3969                         break;
3970                 }
3971
3972                 rdata->server   = server;
3973                 rdata->cfile    = cifsFileInfo_get(open_file);
3974                 rdata->offset   = fpos;
3975                 rdata->bytes    = cur_len;
3976                 rdata->pid      = pid;
3977                 rdata->credits  = credits_on_stack;
3978                 rdata->ctx      = ctx;
3979                 kref_get(&ctx->refcount);
3980
3981                 rdata->iter     = ctx->iter;
3982                 iov_iter_truncate(&rdata->iter, cur_len);
3983
3984                 rc = adjust_credits(server, &rdata->credits, rdata->bytes);
3985
3986                 if (!rc) {
3987                         if (rdata->cfile->invalidHandle)
3988                                 rc = -EAGAIN;
3989                         else
3990                                 rc = server->ops->async_readv(rdata);
3991                 }
3992
3993                 if (rc) {
3994                         add_credits_and_wake_if(server, &rdata->credits, 0);
3995                         kref_put(&rdata->refcount, cifs_readdata_release);
3996                         if (rc == -EAGAIN)
3997                                 continue;
3998                         break;
3999                 }
4000
4001                 list_add_tail(&rdata->list, rdata_list);
4002                 iov_iter_advance(&ctx->iter, cur_len);
4003                 fpos += cur_len;
4004                 len -= cur_len;
4005         } while (len > 0);
4006
4007         return rc;
4008 }
4009
4010 static void
4011 collect_uncached_read_data(struct cifs_aio_ctx *ctx)
4012 {
4013         struct cifs_readdata *rdata, *tmp;
4014         struct cifs_sb_info *cifs_sb;
4015         int rc;
4016
4017         cifs_sb = CIFS_SB(ctx->cfile->dentry->d_sb);
4018
4019         mutex_lock(&ctx->aio_mutex);
4020
4021         if (list_empty(&ctx->list)) {
4022                 mutex_unlock(&ctx->aio_mutex);
4023                 return;
4024         }
4025
4026         rc = ctx->rc;
4027         /* the loop below should proceed in the order of increasing offsets */
4028 again:
4029         list_for_each_entry_safe(rdata, tmp, &ctx->list, list) {
4030                 if (!rc) {
4031                         if (!try_wait_for_completion(&rdata->done)) {
4032                                 mutex_unlock(&ctx->aio_mutex);
4033                                 return;
4034                         }
4035
4036                         if (rdata->result == -EAGAIN) {
4037                                 /* resend call if it's a retryable error */
4038                                 struct list_head tmp_list;
4039                                 unsigned int got_bytes = rdata->got_bytes;
4040
4041                                 list_del_init(&rdata->list);
4042                                 INIT_LIST_HEAD(&tmp_list);
4043
4044                                 if (ctx->direct_io) {
4045                                         /*
4046                                          * Re-use rdata as this is a
4047                                          * direct I/O
4048                                          */
4049                                         rc = cifs_resend_rdata(
4050                                                 rdata,
4051                                                 &tmp_list, ctx);
4052                                 } else {
4053                                         rc = cifs_send_async_read(
4054                                                 rdata->offset + got_bytes,
4055                                                 rdata->bytes - got_bytes,
4056                                                 rdata->cfile, cifs_sb,
4057                                                 &tmp_list, ctx);
4058
4059                                         kref_put(&rdata->refcount,
4060                                                 cifs_readdata_release);
4061                                 }
4062
4063                                 list_splice(&tmp_list, &ctx->list);
4064
4065                                 goto again;
4066                         } else if (rdata->result)
4067                                 rc = rdata->result;
4068
4069                         /* if there was a short read -- discard anything left */
4070                         if (rdata->got_bytes && rdata->got_bytes < rdata->bytes)
4071                                 rc = -ENODATA;
4072
4073                         ctx->total_len += rdata->got_bytes;
4074                 }
4075                 list_del_init(&rdata->list);
4076                 kref_put(&rdata->refcount, cifs_readdata_release);
4077         }
4078
4079         /* mask nodata case */
4080         if (rc == -ENODATA)
4081                 rc = 0;
4082
4083         ctx->rc = (rc == 0) ? (ssize_t)ctx->total_len : rc;
4084
4085         mutex_unlock(&ctx->aio_mutex);
4086
4087         if (ctx->iocb && ctx->iocb->ki_complete)
4088                 ctx->iocb->ki_complete(ctx->iocb, ctx->rc);
4089         else
4090                 complete(&ctx->done);
4091 }
4092
4093 static ssize_t __cifs_readv(
4094         struct kiocb *iocb, struct iov_iter *to, bool direct)
4095 {
4096         size_t len;
4097         struct file *file = iocb->ki_filp;
4098         struct cifs_sb_info *cifs_sb;
4099         struct cifsFileInfo *cfile;
4100         struct cifs_tcon *tcon;
4101         ssize_t rc, total_read = 0;
4102         loff_t offset = iocb->ki_pos;
4103         struct cifs_aio_ctx *ctx;
4104
4105         len = iov_iter_count(to);
4106         if (!len)
4107                 return 0;
4108
4109         cifs_sb = CIFS_FILE_SB(file);
4110         cfile = file->private_data;
4111         tcon = tlink_tcon(cfile->tlink);
4112
4113         if (!tcon->ses->server->ops->async_readv)
4114                 return -ENOSYS;
4115
4116         if ((file->f_flags & O_ACCMODE) == O_WRONLY)
4117                 cifs_dbg(FYI, "attempting read on write only file instance\n");
4118
4119         ctx = cifs_aio_ctx_alloc();
4120         if (!ctx)
4121                 return -ENOMEM;
4122
4123         ctx->pos        = offset;
4124         ctx->direct_io  = direct;
4125         ctx->len        = len;
4126         ctx->cfile      = cifsFileInfo_get(cfile);
4127         ctx->nr_pinned_pages = 0;
4128
4129         if (!is_sync_kiocb(iocb))
4130                 ctx->iocb = iocb;
4131
4132         if (user_backed_iter(to)) {
4133                 /*
4134                  * Extract IOVEC/UBUF-type iterators to a BVEC-type iterator as
4135                  * they contain references to the calling process's virtual
4136                  * memory layout which won't be available in an async worker
4137                  * thread.  This also takes a pin on every folio involved.
4138                  */
4139                 rc = netfs_extract_user_iter(to, iov_iter_count(to),
4140                                              &ctx->iter, 0);
4141                 if (rc < 0) {
4142                         kref_put(&ctx->refcount, cifs_aio_ctx_release);
4143                         return rc;
4144                 }
4145
4146                 ctx->nr_pinned_pages = rc;
4147                 ctx->bv = (void *)ctx->iter.bvec;
4148                 ctx->bv_need_unpin = iov_iter_extract_will_pin(to);
4149                 ctx->should_dirty = true;
4150         } else if ((iov_iter_is_bvec(to) || iov_iter_is_kvec(to)) &&
4151                    !is_sync_kiocb(iocb)) {
4152                 /*
4153                  * If the op is asynchronous, we need to copy the list attached
4154                  * to a BVEC/KVEC-type iterator, but we assume that the storage
4155                  * will be retained by the caller; in any case, we may or may
4156                  * not be able to pin the pages, so we don't try.
4157                  */
4158                 ctx->bv = (void *)dup_iter(&ctx->iter, to, GFP_KERNEL);
4159                 if (!ctx->bv) {
4160                         kref_put(&ctx->refcount, cifs_aio_ctx_release);
4161                         return -ENOMEM;
4162                 }
4163         } else {
4164                 /*
4165                  * Otherwise, we just pass the iterator down as-is and rely on
4166                  * the caller to make sure the pages referred to by the
4167                  * iterator don't evaporate.
4168                  */
4169                 ctx->iter = *to;
4170         }
4171
4172         if (direct) {
4173                 rc = filemap_write_and_wait_range(file->f_inode->i_mapping,
4174                                                   offset, offset + len - 1);
4175                 if (rc) {
4176                         kref_put(&ctx->refcount, cifs_aio_ctx_release);
4177                         return -EAGAIN;
4178                 }
4179         }
4180
4181         /* grab a lock here due to read response handlers can access ctx */
4182         mutex_lock(&ctx->aio_mutex);
4183
4184         rc = cifs_send_async_read(offset, len, cfile, cifs_sb, &ctx->list, ctx);
4185
4186         /* if at least one read request send succeeded, then reset rc */
4187         if (!list_empty(&ctx->list))
4188                 rc = 0;
4189
4190         mutex_unlock(&ctx->aio_mutex);
4191
4192         if (rc) {
4193                 kref_put(&ctx->refcount, cifs_aio_ctx_release);
4194                 return rc;
4195         }
4196
4197         if (!is_sync_kiocb(iocb)) {
4198                 kref_put(&ctx->refcount, cifs_aio_ctx_release);
4199                 return -EIOCBQUEUED;
4200         }
4201
4202         rc = wait_for_completion_killable(&ctx->done);
4203         if (rc) {
4204                 mutex_lock(&ctx->aio_mutex);
4205                 ctx->rc = rc = -EINTR;
4206                 total_read = ctx->total_len;
4207                 mutex_unlock(&ctx->aio_mutex);
4208         } else {
4209                 rc = ctx->rc;
4210                 total_read = ctx->total_len;
4211         }
4212
4213         kref_put(&ctx->refcount, cifs_aio_ctx_release);
4214
4215         if (total_read) {
4216                 iocb->ki_pos += total_read;
4217                 return total_read;
4218         }
4219         return rc;
4220 }
4221
4222 ssize_t cifs_direct_readv(struct kiocb *iocb, struct iov_iter *to)
4223 {
4224         return __cifs_readv(iocb, to, true);
4225 }
4226
4227 ssize_t cifs_user_readv(struct kiocb *iocb, struct iov_iter *to)
4228 {
4229         return __cifs_readv(iocb, to, false);
4230 }
4231
4232 ssize_t
4233 cifs_strict_readv(struct kiocb *iocb, struct iov_iter *to)
4234 {
4235         struct inode *inode = file_inode(iocb->ki_filp);
4236         struct cifsInodeInfo *cinode = CIFS_I(inode);
4237         struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
4238         struct cifsFileInfo *cfile = (struct cifsFileInfo *)
4239                                                 iocb->ki_filp->private_data;
4240         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
4241         int rc = -EACCES;
4242
4243         /*
4244          * In strict cache mode we need to read from the server all the time
4245          * if we don't have level II oplock because the server can delay mtime
4246          * change - so we can't make a decision about inode invalidating.
4247          * And we can also fail with pagereading if there are mandatory locks
4248          * on pages affected by this read but not on the region from pos to
4249          * pos+len-1.
4250          */
4251         if (!CIFS_CACHE_READ(cinode))
4252                 return cifs_user_readv(iocb, to);
4253
4254         if (cap_unix(tcon->ses) &&
4255             (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
4256             ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
4257                 return generic_file_read_iter(iocb, to);
4258
4259         /*
4260          * We need to hold the sem to be sure nobody modifies lock list
4261          * with a brlock that prevents reading.
4262          */
4263         down_read(&cinode->lock_sem);
4264         if (!cifs_find_lock_conflict(cfile, iocb->ki_pos, iov_iter_count(to),
4265                                      tcon->ses->server->vals->shared_lock_type,
4266                                      0, NULL, CIFS_READ_OP))
4267                 rc = generic_file_read_iter(iocb, to);
4268         up_read(&cinode->lock_sem);
4269         return rc;
4270 }
4271
4272 static ssize_t
4273 cifs_read(struct file *file, char *read_data, size_t read_size, loff_t *offset)
4274 {
4275         int rc = -EACCES;
4276         unsigned int bytes_read = 0;
4277         unsigned int total_read;
4278         unsigned int current_read_size;
4279         unsigned int rsize;
4280         struct cifs_sb_info *cifs_sb;
4281         struct cifs_tcon *tcon;
4282         struct TCP_Server_Info *server;
4283         unsigned int xid;
4284         char *cur_offset;
4285         struct cifsFileInfo *open_file;
4286         struct cifs_io_parms io_parms = {0};
4287         int buf_type = CIFS_NO_BUFFER;
4288         __u32 pid;
4289
4290         xid = get_xid();
4291         cifs_sb = CIFS_FILE_SB(file);
4292
4293         /* FIXME: set up handlers for larger reads and/or convert to async */
4294         rsize = min_t(unsigned int, cifs_sb->ctx->rsize, CIFSMaxBufSize);
4295
4296         if (file->private_data == NULL) {
4297                 rc = -EBADF;
4298                 free_xid(xid);
4299                 return rc;
4300         }
4301         open_file = file->private_data;
4302         tcon = tlink_tcon(open_file->tlink);
4303         server = cifs_pick_channel(tcon->ses);
4304
4305         if (!server->ops->sync_read) {
4306                 free_xid(xid);
4307                 return -ENOSYS;
4308         }
4309
4310         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
4311                 pid = open_file->pid;
4312         else
4313                 pid = current->tgid;
4314
4315         if ((file->f_flags & O_ACCMODE) == O_WRONLY)
4316                 cifs_dbg(FYI, "attempting read on write only file instance\n");
4317
4318         for (total_read = 0, cur_offset = read_data; read_size > total_read;
4319              total_read += bytes_read, cur_offset += bytes_read) {
4320                 do {
4321                         current_read_size = min_t(uint, read_size - total_read,
4322                                                   rsize);
4323                         /*
4324                          * For windows me and 9x we do not want to request more
4325                          * than it negotiated since it will refuse the read
4326                          * then.
4327                          */
4328                         if (!(tcon->ses->capabilities &
4329                                 tcon->ses->server->vals->cap_large_files)) {
4330                                 current_read_size = min_t(uint,
4331                                         current_read_size, CIFSMaxBufSize);
4332                         }
4333                         if (open_file->invalidHandle) {
4334                                 rc = cifs_reopen_file(open_file, true);
4335                                 if (rc != 0)
4336                                         break;
4337                         }
4338                         io_parms.pid = pid;
4339                         io_parms.tcon = tcon;
4340                         io_parms.offset = *offset;
4341                         io_parms.length = current_read_size;
4342                         io_parms.server = server;
4343                         rc = server->ops->sync_read(xid, &open_file->fid, &io_parms,
4344                                                     &bytes_read, &cur_offset,
4345                                                     &buf_type);
4346                 } while (rc == -EAGAIN);
4347
4348                 if (rc || (bytes_read == 0)) {
4349                         if (total_read) {
4350                                 break;
4351                         } else {
4352                                 free_xid(xid);
4353                                 return rc;
4354                         }
4355                 } else {
4356                         cifs_stats_bytes_read(tcon, total_read);
4357                         *offset += bytes_read;
4358                 }
4359         }
4360         free_xid(xid);
4361         return total_read;
4362 }
4363
4364 /*
4365  * If the page is mmap'ed into a process' page tables, then we need to make
4366  * sure that it doesn't change while being written back.
4367  */
4368 static vm_fault_t cifs_page_mkwrite(struct vm_fault *vmf)
4369 {
4370         struct folio *folio = page_folio(vmf->page);
4371
4372         /* Wait for the folio to be written to the cache before we allow it to
4373          * be modified.  We then assume the entire folio will need writing back.
4374          */
4375 #ifdef CONFIG_CIFS_FSCACHE
4376         if (folio_test_fscache(folio) &&
4377             folio_wait_fscache_killable(folio) < 0)
4378                 return VM_FAULT_RETRY;
4379 #endif
4380
4381         folio_wait_writeback(folio);
4382
4383         if (folio_lock_killable(folio) < 0)
4384                 return VM_FAULT_RETRY;
4385         return VM_FAULT_LOCKED;
4386 }
4387
4388 static const struct vm_operations_struct cifs_file_vm_ops = {
4389         .fault = filemap_fault,
4390         .map_pages = filemap_map_pages,
4391         .page_mkwrite = cifs_page_mkwrite,
4392 };
4393
4394 int cifs_file_strict_mmap(struct file *file, struct vm_area_struct *vma)
4395 {
4396         int xid, rc = 0;
4397         struct inode *inode = file_inode(file);
4398
4399         xid = get_xid();
4400
4401         if (!CIFS_CACHE_READ(CIFS_I(inode)))
4402                 rc = cifs_zap_mapping(inode);
4403         if (!rc)
4404                 rc = generic_file_mmap(file, vma);
4405         if (!rc)
4406                 vma->vm_ops = &cifs_file_vm_ops;
4407
4408         free_xid(xid);
4409         return rc;
4410 }
4411
4412 int cifs_file_mmap(struct file *file, struct vm_area_struct *vma)
4413 {
4414         int rc, xid;
4415
4416         xid = get_xid();
4417
4418         rc = cifs_revalidate_file(file);
4419         if (rc)
4420                 cifs_dbg(FYI, "Validation prior to mmap failed, error=%d\n",
4421                          rc);
4422         if (!rc)
4423                 rc = generic_file_mmap(file, vma);
4424         if (!rc)
4425                 vma->vm_ops = &cifs_file_vm_ops;
4426
4427         free_xid(xid);
4428         return rc;
4429 }
4430
4431 /*
4432  * Unlock a bunch of folios in the pagecache.
4433  */
4434 static void cifs_unlock_folios(struct address_space *mapping, pgoff_t first, pgoff_t last)
4435 {
4436         struct folio *folio;
4437         XA_STATE(xas, &mapping->i_pages, first);
4438
4439         rcu_read_lock();
4440         xas_for_each(&xas, folio, last) {
4441                 folio_unlock(folio);
4442         }
4443         rcu_read_unlock();
4444 }
4445
4446 static void cifs_readahead_complete(struct work_struct *work)
4447 {
4448         struct cifs_readdata *rdata = container_of(work,
4449                                                    struct cifs_readdata, work);
4450         struct folio *folio;
4451         pgoff_t last;
4452         bool good = rdata->result == 0 || (rdata->result == -EAGAIN && rdata->got_bytes);
4453
4454         XA_STATE(xas, &rdata->mapping->i_pages, rdata->offset / PAGE_SIZE);
4455
4456         if (good)
4457                 cifs_readahead_to_fscache(rdata->mapping->host,
4458                                           rdata->offset, rdata->bytes);
4459
4460         if (iov_iter_count(&rdata->iter) > 0)
4461                 iov_iter_zero(iov_iter_count(&rdata->iter), &rdata->iter);
4462
4463         last = (rdata->offset + rdata->bytes - 1) / PAGE_SIZE;
4464
4465         rcu_read_lock();
4466         xas_for_each(&xas, folio, last) {
4467                 if (good) {
4468                         flush_dcache_folio(folio);
4469                         folio_mark_uptodate(folio);
4470                 }
4471                 folio_unlock(folio);
4472         }
4473         rcu_read_unlock();
4474
4475         kref_put(&rdata->refcount, cifs_readdata_release);
4476 }
4477
4478 static void cifs_readahead(struct readahead_control *ractl)
4479 {
4480         struct cifsFileInfo *open_file = ractl->file->private_data;
4481         struct cifs_sb_info *cifs_sb = CIFS_FILE_SB(ractl->file);
4482         struct TCP_Server_Info *server;
4483         unsigned int xid, nr_pages, cache_nr_pages = 0;
4484         unsigned int ra_pages;
4485         pgoff_t next_cached = ULONG_MAX, ra_index;
4486         bool caching = fscache_cookie_enabled(cifs_inode_cookie(ractl->mapping->host)) &&
4487                 cifs_inode_cookie(ractl->mapping->host)->cache_priv;
4488         bool check_cache = caching;
4489         pid_t pid;
4490         int rc = 0;
4491
4492         /* Note that readahead_count() lags behind our dequeuing of pages from
4493          * the ractl, wo we have to keep track for ourselves.
4494          */
4495         ra_pages = readahead_count(ractl);
4496         ra_index = readahead_index(ractl);
4497
4498         xid = get_xid();
4499
4500         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
4501                 pid = open_file->pid;
4502         else
4503                 pid = current->tgid;
4504
4505         server = cifs_pick_channel(tlink_tcon(open_file->tlink)->ses);
4506
4507         cifs_dbg(FYI, "%s: file=%p mapping=%p num_pages=%u\n",
4508                  __func__, ractl->file, ractl->mapping, ra_pages);
4509
4510         /*
4511          * Chop the readahead request up into rsize-sized read requests.
4512          */
4513         while ((nr_pages = ra_pages)) {
4514                 unsigned int i, rsize;
4515                 struct cifs_readdata *rdata;
4516                 struct cifs_credits credits_on_stack;
4517                 struct cifs_credits *credits = &credits_on_stack;
4518                 struct folio *folio;
4519                 pgoff_t fsize;
4520
4521                 /*
4522                  * Find out if we have anything cached in the range of
4523                  * interest, and if so, where the next chunk of cached data is.
4524                  */
4525                 if (caching) {
4526                         if (check_cache) {
4527                                 rc = cifs_fscache_query_occupancy(
4528                                         ractl->mapping->host, ra_index, nr_pages,
4529                                         &next_cached, &cache_nr_pages);
4530                                 if (rc < 0)
4531                                         caching = false;
4532                                 check_cache = false;
4533                         }
4534
4535                         if (ra_index == next_cached) {
4536                                 /*
4537                                  * TODO: Send a whole batch of pages to be read
4538                                  * by the cache.
4539                                  */
4540                                 folio = readahead_folio(ractl);
4541                                 fsize = folio_nr_pages(folio);
4542                                 ra_pages -= fsize;
4543                                 ra_index += fsize;
4544                                 if (cifs_readpage_from_fscache(ractl->mapping->host,
4545                                                                &folio->page) < 0) {
4546                                         /*
4547                                          * TODO: Deal with cache read failure
4548                                          * here, but for the moment, delegate
4549                                          * that to readpage.
4550                                          */
4551                                         caching = false;
4552                                 }
4553                                 folio_unlock(folio);
4554                                 next_cached += fsize;
4555                                 cache_nr_pages -= fsize;
4556                                 if (cache_nr_pages == 0)
4557                                         check_cache = true;
4558                                 continue;
4559                         }
4560                 }
4561
4562                 if (open_file->invalidHandle) {
4563                         rc = cifs_reopen_file(open_file, true);
4564                         if (rc) {
4565                                 if (rc == -EAGAIN)
4566                                         continue;
4567                                 break;
4568                         }
4569                 }
4570
4571                 if (cifs_sb->ctx->rsize == 0)
4572                         cifs_sb->ctx->rsize =
4573                                 server->ops->negotiate_rsize(tlink_tcon(open_file->tlink),
4574                                                              cifs_sb->ctx);
4575
4576                 rc = server->ops->wait_mtu_credits(server, cifs_sb->ctx->rsize,
4577                                                    &rsize, credits);
4578                 if (rc)
4579                         break;
4580                 nr_pages = min_t(size_t, rsize / PAGE_SIZE, ra_pages);
4581                 if (next_cached != ULONG_MAX)
4582                         nr_pages = min_t(size_t, nr_pages, next_cached - ra_index);
4583
4584                 /*
4585                  * Give up immediately if rsize is too small to read an entire
4586                  * page. The VFS will fall back to readpage. We should never
4587                  * reach this point however since we set ra_pages to 0 when the
4588                  * rsize is smaller than a cache page.
4589                  */
4590                 if (unlikely(!nr_pages)) {
4591                         add_credits_and_wake_if(server, credits, 0);
4592                         break;
4593                 }
4594
4595                 rdata = cifs_readdata_alloc(cifs_readahead_complete);
4596                 if (!rdata) {
4597                         /* best to give up if we're out of mem */
4598                         add_credits_and_wake_if(server, credits, 0);
4599                         break;
4600                 }
4601
4602                 rdata->offset   = ra_index * PAGE_SIZE;
4603                 rdata->bytes    = nr_pages * PAGE_SIZE;
4604                 rdata->cfile    = cifsFileInfo_get(open_file);
4605                 rdata->server   = server;
4606                 rdata->mapping  = ractl->mapping;
4607                 rdata->pid      = pid;
4608                 rdata->credits  = credits_on_stack;
4609
4610                 for (i = 0; i < nr_pages; i++) {
4611                         if (!readahead_folio(ractl))
4612                                 WARN_ON(1);
4613                 }
4614                 ra_pages -= nr_pages;
4615                 ra_index += nr_pages;
4616
4617                 iov_iter_xarray(&rdata->iter, ITER_DEST, &rdata->mapping->i_pages,
4618                                 rdata->offset, rdata->bytes);
4619
4620                 rc = adjust_credits(server, &rdata->credits, rdata->bytes);
4621                 if (!rc) {
4622                         if (rdata->cfile->invalidHandle)
4623                                 rc = -EAGAIN;
4624                         else
4625                                 rc = server->ops->async_readv(rdata);
4626                 }
4627
4628                 if (rc) {
4629                         add_credits_and_wake_if(server, &rdata->credits, 0);
4630                         cifs_unlock_folios(rdata->mapping,
4631                                            rdata->offset / PAGE_SIZE,
4632                                            (rdata->offset + rdata->bytes - 1) / PAGE_SIZE);
4633                         /* Fallback to the readpage in error/reconnect cases */
4634                         kref_put(&rdata->refcount, cifs_readdata_release);
4635                         break;
4636                 }
4637
4638                 kref_put(&rdata->refcount, cifs_readdata_release);
4639         }
4640
4641         free_xid(xid);
4642 }
4643
4644 /*
4645  * cifs_readpage_worker must be called with the page pinned
4646  */
4647 static int cifs_readpage_worker(struct file *file, struct page *page,
4648         loff_t *poffset)
4649 {
4650         char *read_data;
4651         int rc;
4652
4653         /* Is the page cached? */
4654         rc = cifs_readpage_from_fscache(file_inode(file), page);
4655         if (rc == 0)
4656                 goto read_complete;
4657
4658         read_data = kmap(page);
4659         /* for reads over a certain size could initiate async read ahead */
4660
4661         rc = cifs_read(file, read_data, PAGE_SIZE, poffset);
4662
4663         if (rc < 0)
4664                 goto io_error;
4665         else
4666                 cifs_dbg(FYI, "Bytes read %d\n", rc);
4667
4668         /* we do not want atime to be less than mtime, it broke some apps */
4669         file_inode(file)->i_atime = current_time(file_inode(file));
4670         if (timespec64_compare(&(file_inode(file)->i_atime), &(file_inode(file)->i_mtime)))
4671                 file_inode(file)->i_atime = file_inode(file)->i_mtime;
4672         else
4673                 file_inode(file)->i_atime = current_time(file_inode(file));
4674
4675         if (PAGE_SIZE > rc)
4676                 memset(read_data + rc, 0, PAGE_SIZE - rc);
4677
4678         flush_dcache_page(page);
4679         SetPageUptodate(page);
4680         rc = 0;
4681
4682 io_error:
4683         kunmap(page);
4684
4685 read_complete:
4686         unlock_page(page);
4687         return rc;
4688 }
4689
4690 static int cifs_read_folio(struct file *file, struct folio *folio)
4691 {
4692         struct page *page = &folio->page;
4693         loff_t offset = page_file_offset(page);
4694         int rc = -EACCES;
4695         unsigned int xid;
4696
4697         xid = get_xid();
4698
4699         if (file->private_data == NULL) {
4700                 rc = -EBADF;
4701                 free_xid(xid);
4702                 return rc;
4703         }
4704
4705         cifs_dbg(FYI, "read_folio %p at offset %d 0x%x\n",
4706                  page, (int)offset, (int)offset);
4707
4708         rc = cifs_readpage_worker(file, page, &offset);
4709
4710         free_xid(xid);
4711         return rc;
4712 }
4713
4714 static int is_inode_writable(struct cifsInodeInfo *cifs_inode)
4715 {
4716         struct cifsFileInfo *open_file;
4717
4718         spin_lock(&cifs_inode->open_file_lock);
4719         list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
4720                 if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) {
4721                         spin_unlock(&cifs_inode->open_file_lock);
4722                         return 1;
4723                 }
4724         }
4725         spin_unlock(&cifs_inode->open_file_lock);
4726         return 0;
4727 }
4728
4729 /* We do not want to update the file size from server for inodes
4730    open for write - to avoid races with writepage extending
4731    the file - in the future we could consider allowing
4732    refreshing the inode only on increases in the file size
4733    but this is tricky to do without racing with writebehind
4734    page caching in the current Linux kernel design */
4735 bool is_size_safe_to_change(struct cifsInodeInfo *cifsInode, __u64 end_of_file)
4736 {
4737         if (!cifsInode)
4738                 return true;
4739
4740         if (is_inode_writable(cifsInode)) {
4741                 /* This inode is open for write at least once */
4742                 struct cifs_sb_info *cifs_sb;
4743
4744                 cifs_sb = CIFS_SB(cifsInode->netfs.inode.i_sb);
4745                 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DIRECT_IO) {
4746                         /* since no page cache to corrupt on directio
4747                         we can change size safely */
4748                         return true;
4749                 }
4750
4751                 if (i_size_read(&cifsInode->netfs.inode) < end_of_file)
4752                         return true;
4753
4754                 return false;
4755         } else
4756                 return true;
4757 }
4758
4759 static int cifs_write_begin(struct file *file, struct address_space *mapping,
4760                         loff_t pos, unsigned len,
4761                         struct page **pagep, void **fsdata)
4762 {
4763         int oncethru = 0;
4764         pgoff_t index = pos >> PAGE_SHIFT;
4765         loff_t offset = pos & (PAGE_SIZE - 1);
4766         loff_t page_start = pos & PAGE_MASK;
4767         loff_t i_size;
4768         struct page *page;
4769         int rc = 0;
4770
4771         cifs_dbg(FYI, "write_begin from %lld len %d\n", (long long)pos, len);
4772
4773 start:
4774         page = grab_cache_page_write_begin(mapping, index);
4775         if (!page) {
4776                 rc = -ENOMEM;
4777                 goto out;
4778         }
4779
4780         if (PageUptodate(page))
4781                 goto out;
4782
4783         /*
4784          * If we write a full page it will be up to date, no need to read from
4785          * the server. If the write is short, we'll end up doing a sync write
4786          * instead.
4787          */
4788         if (len == PAGE_SIZE)
4789                 goto out;
4790
4791         /*
4792          * optimize away the read when we have an oplock, and we're not
4793          * expecting to use any of the data we'd be reading in. That
4794          * is, when the page lies beyond the EOF, or straddles the EOF
4795          * and the write will cover all of the existing data.
4796          */
4797         if (CIFS_CACHE_READ(CIFS_I(mapping->host))) {
4798                 i_size = i_size_read(mapping->host);
4799                 if (page_start >= i_size ||
4800                     (offset == 0 && (pos + len) >= i_size)) {
4801                         zero_user_segments(page, 0, offset,
4802                                            offset + len,
4803                                            PAGE_SIZE);
4804                         /*
4805                          * PageChecked means that the parts of the page
4806                          * to which we're not writing are considered up
4807                          * to date. Once the data is copied to the
4808                          * page, it can be set uptodate.
4809                          */
4810                         SetPageChecked(page);
4811                         goto out;
4812                 }
4813         }
4814
4815         if ((file->f_flags & O_ACCMODE) != O_WRONLY && !oncethru) {
4816                 /*
4817                  * might as well read a page, it is fast enough. If we get
4818                  * an error, we don't need to return it. cifs_write_end will
4819                  * do a sync write instead since PG_uptodate isn't set.
4820                  */
4821                 cifs_readpage_worker(file, page, &page_start);
4822                 put_page(page);
4823                 oncethru = 1;
4824                 goto start;
4825         } else {
4826                 /* we could try using another file handle if there is one -
4827                    but how would we lock it to prevent close of that handle
4828                    racing with this read? In any case
4829                    this will be written out by write_end so is fine */
4830         }
4831 out:
4832         *pagep = page;
4833         return rc;
4834 }
4835
4836 static bool cifs_release_folio(struct folio *folio, gfp_t gfp)
4837 {
4838         if (folio_test_private(folio))
4839                 return 0;
4840         if (folio_test_fscache(folio)) {
4841                 if (current_is_kswapd() || !(gfp & __GFP_FS))
4842                         return false;
4843                 folio_wait_fscache(folio);
4844         }
4845         fscache_note_page_release(cifs_inode_cookie(folio->mapping->host));
4846         return true;
4847 }
4848
4849 static void cifs_invalidate_folio(struct folio *folio, size_t offset,
4850                                  size_t length)
4851 {
4852         folio_wait_fscache(folio);
4853 }
4854
4855 static int cifs_launder_folio(struct folio *folio)
4856 {
4857         int rc = 0;
4858         loff_t range_start = folio_pos(folio);
4859         loff_t range_end = range_start + folio_size(folio);
4860         struct writeback_control wbc = {
4861                 .sync_mode = WB_SYNC_ALL,
4862                 .nr_to_write = 0,
4863                 .range_start = range_start,
4864                 .range_end = range_end,
4865         };
4866
4867         cifs_dbg(FYI, "Launder page: %lu\n", folio->index);
4868
4869         if (folio_clear_dirty_for_io(folio))
4870                 rc = cifs_writepage_locked(&folio->page, &wbc);
4871
4872         folio_wait_fscache(folio);
4873         return rc;
4874 }
4875
4876 void cifs_oplock_break(struct work_struct *work)
4877 {
4878         struct cifsFileInfo *cfile = container_of(work, struct cifsFileInfo,
4879                                                   oplock_break);
4880         struct inode *inode = d_inode(cfile->dentry);
4881         struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
4882         struct cifsInodeInfo *cinode = CIFS_I(inode);
4883         struct cifs_tcon *tcon;
4884         struct TCP_Server_Info *server;
4885         struct tcon_link *tlink;
4886         int rc = 0;
4887         bool purge_cache = false, oplock_break_cancelled;
4888         __u64 persistent_fid, volatile_fid;
4889         __u16 net_fid;
4890
4891         wait_on_bit(&cinode->flags, CIFS_INODE_PENDING_WRITERS,
4892                         TASK_UNINTERRUPTIBLE);
4893
4894         tlink = cifs_sb_tlink(cifs_sb);
4895         if (IS_ERR(tlink))
4896                 goto out;
4897         tcon = tlink_tcon(tlink);
4898         server = tcon->ses->server;
4899
4900         server->ops->downgrade_oplock(server, cinode, cfile->oplock_level,
4901                                       cfile->oplock_epoch, &purge_cache);
4902
4903         if (!CIFS_CACHE_WRITE(cinode) && CIFS_CACHE_READ(cinode) &&
4904                                                 cifs_has_mand_locks(cinode)) {
4905                 cifs_dbg(FYI, "Reset oplock to None for inode=%p due to mand locks\n",
4906                          inode);
4907                 cinode->oplock = 0;
4908         }
4909
4910         if (inode && S_ISREG(inode->i_mode)) {
4911                 if (CIFS_CACHE_READ(cinode))
4912                         break_lease(inode, O_RDONLY);
4913                 else
4914                         break_lease(inode, O_WRONLY);
4915                 rc = filemap_fdatawrite(inode->i_mapping);
4916                 if (!CIFS_CACHE_READ(cinode) || purge_cache) {
4917                         rc = filemap_fdatawait(inode->i_mapping);
4918                         mapping_set_error(inode->i_mapping, rc);
4919                         cifs_zap_mapping(inode);
4920                 }
4921                 cifs_dbg(FYI, "Oplock flush inode %p rc %d\n", inode, rc);
4922                 if (CIFS_CACHE_WRITE(cinode))
4923                         goto oplock_break_ack;
4924         }
4925
4926         rc = cifs_push_locks(cfile);
4927         if (rc)
4928                 cifs_dbg(VFS, "Push locks rc = %d\n", rc);
4929
4930 oplock_break_ack:
4931         /*
4932          * When oplock break is received and there are no active
4933          * file handles but cached, then schedule deferred close immediately.
4934          * So, new open will not use cached handle.
4935          */
4936
4937         if (!CIFS_CACHE_HANDLE(cinode) && !list_empty(&cinode->deferred_closes))
4938                 cifs_close_deferred_file(cinode);
4939
4940         persistent_fid = cfile->fid.persistent_fid;
4941         volatile_fid = cfile->fid.volatile_fid;
4942         net_fid = cfile->fid.netfid;
4943         oplock_break_cancelled = cfile->oplock_break_cancelled;
4944
4945         _cifsFileInfo_put(cfile, false /* do not wait for ourself */, false);
4946         /*
4947          * MS-SMB2 3.2.5.19.1 and 3.2.5.19.2 (and MS-CIFS 3.2.5.42) do not require
4948          * an acknowledgment to be sent when the file has already been closed.
4949          */
4950         spin_lock(&cinode->open_file_lock);
4951         /* check list empty since can race with kill_sb calling tree disconnect */
4952         if (!oplock_break_cancelled && !list_empty(&cinode->openFileList)) {
4953                 spin_unlock(&cinode->open_file_lock);
4954                 rc = server->ops->oplock_response(tcon, persistent_fid,
4955                                                   volatile_fid, net_fid, cinode);
4956                 cifs_dbg(FYI, "Oplock release rc = %d\n", rc);
4957         } else
4958                 spin_unlock(&cinode->open_file_lock);
4959
4960         cifs_put_tlink(tlink);
4961 out:
4962         cifs_done_oplock_break(cinode);
4963 }
4964
4965 /*
4966  * The presence of cifs_direct_io() in the address space ops vector
4967  * allowes open() O_DIRECT flags which would have failed otherwise.
4968  *
4969  * In the non-cached mode (mount with cache=none), we shunt off direct read and write requests
4970  * so this method should never be called.
4971  *
4972  * Direct IO is not yet supported in the cached mode.
4973  */
4974 static ssize_t
4975 cifs_direct_io(struct kiocb *iocb, struct iov_iter *iter)
4976 {
4977         /*
4978          * FIXME
4979          * Eventually need to support direct IO for non forcedirectio mounts
4980          */
4981         return -EINVAL;
4982 }
4983
4984 static int cifs_swap_activate(struct swap_info_struct *sis,
4985                               struct file *swap_file, sector_t *span)
4986 {
4987         struct cifsFileInfo *cfile = swap_file->private_data;
4988         struct inode *inode = swap_file->f_mapping->host;
4989         unsigned long blocks;
4990         long long isize;
4991
4992         cifs_dbg(FYI, "swap activate\n");
4993
4994         if (!swap_file->f_mapping->a_ops->swap_rw)
4995                 /* Cannot support swap */
4996                 return -EINVAL;
4997
4998         spin_lock(&inode->i_lock);
4999         blocks = inode->i_blocks;
5000         isize = inode->i_size;
5001         spin_unlock(&inode->i_lock);
5002         if (blocks*512 < isize) {
5003                 pr_warn("swap activate: swapfile has holes\n");
5004                 return -EINVAL;
5005         }
5006         *span = sis->pages;
5007
5008         pr_warn_once("Swap support over SMB3 is experimental\n");
5009
5010         /*
5011          * TODO: consider adding ACL (or documenting how) to prevent other
5012          * users (on this or other systems) from reading it
5013          */
5014
5015
5016         /* TODO: add sk_set_memalloc(inet) or similar */
5017
5018         if (cfile)
5019                 cfile->swapfile = true;
5020         /*
5021          * TODO: Since file already open, we can't open with DENY_ALL here
5022          * but we could add call to grab a byte range lock to prevent others
5023          * from reading or writing the file
5024          */
5025
5026         sis->flags |= SWP_FS_OPS;
5027         return add_swap_extent(sis, 0, sis->max, 0);
5028 }
5029
5030 static void cifs_swap_deactivate(struct file *file)
5031 {
5032         struct cifsFileInfo *cfile = file->private_data;
5033
5034         cifs_dbg(FYI, "swap deactivate\n");
5035
5036         /* TODO: undo sk_set_memalloc(inet) will eventually be needed */
5037
5038         if (cfile)
5039                 cfile->swapfile = false;
5040
5041         /* do we need to unpin (or unlock) the file */
5042 }
5043
5044 /*
5045  * Mark a page as having been made dirty and thus needing writeback.  We also
5046  * need to pin the cache object to write back to.
5047  */
5048 #ifdef CONFIG_CIFS_FSCACHE
5049 static bool cifs_dirty_folio(struct address_space *mapping, struct folio *folio)
5050 {
5051         return fscache_dirty_folio(mapping, folio,
5052                                         cifs_inode_cookie(mapping->host));
5053 }
5054 #else
5055 #define cifs_dirty_folio filemap_dirty_folio
5056 #endif
5057
5058 const struct address_space_operations cifs_addr_ops = {
5059         .read_folio = cifs_read_folio,
5060         .readahead = cifs_readahead,
5061         .writepages = cifs_writepages,
5062         .write_begin = cifs_write_begin,
5063         .write_end = cifs_write_end,
5064         .dirty_folio = cifs_dirty_folio,
5065         .release_folio = cifs_release_folio,
5066         .direct_IO = cifs_direct_io,
5067         .invalidate_folio = cifs_invalidate_folio,
5068         .launder_folio = cifs_launder_folio,
5069         .migrate_folio = filemap_migrate_folio,
5070         /*
5071          * TODO: investigate and if useful we could add an is_dirty_writeback
5072          * helper if needed
5073          */
5074         .swap_activate = cifs_swap_activate,
5075         .swap_deactivate = cifs_swap_deactivate,
5076 };
5077
5078 /*
5079  * cifs_readahead requires the server to support a buffer large enough to
5080  * contain the header plus one complete page of data.  Otherwise, we need
5081  * to leave cifs_readahead out of the address space operations.
5082  */
5083 const struct address_space_operations cifs_addr_ops_smallbuf = {
5084         .read_folio = cifs_read_folio,
5085         .writepages = cifs_writepages,
5086         .write_begin = cifs_write_begin,
5087         .write_end = cifs_write_end,
5088         .dirty_folio = cifs_dirty_folio,
5089         .release_folio = cifs_release_folio,
5090         .invalidate_folio = cifs_invalidate_folio,
5091         .launder_folio = cifs_launder_folio,
5092         .migrate_folio = filemap_migrate_folio,
5093 };