cifs: Fix cifs_writepages_region()
[platform/kernel/linux-rpi.git] / fs / cifs / file.c
1 // SPDX-License-Identifier: LGPL-2.1
2 /*
3  *
4  *   vfs operations that deal with files
5  *
6  *   Copyright (C) International Business Machines  Corp., 2002,2010
7  *   Author(s): Steve French (sfrench@us.ibm.com)
8  *              Jeremy Allison (jra@samba.org)
9  *
10  */
11 #include <linux/fs.h>
12 #include <linux/filelock.h>
13 #include <linux/backing-dev.h>
14 #include <linux/stat.h>
15 #include <linux/fcntl.h>
16 #include <linux/pagemap.h>
17 #include <linux/pagevec.h>
18 #include <linux/writeback.h>
19 #include <linux/task_io_accounting_ops.h>
20 #include <linux/delay.h>
21 #include <linux/mount.h>
22 #include <linux/slab.h>
23 #include <linux/swap.h>
24 #include <linux/mm.h>
25 #include <asm/div64.h>
26 #include "cifsfs.h"
27 #include "cifspdu.h"
28 #include "cifsglob.h"
29 #include "cifsproto.h"
30 #include "smb2proto.h"
31 #include "cifs_unicode.h"
32 #include "cifs_debug.h"
33 #include "cifs_fs_sb.h"
34 #include "fscache.h"
35 #include "smbdirect.h"
36 #include "fs_context.h"
37 #include "cifs_ioctl.h"
38 #include "cached_dir.h"
39
40 /*
41  * Remove the dirty flags from a span of pages.
42  */
43 static void cifs_undirty_folios(struct inode *inode, loff_t start, unsigned int len)
44 {
45         struct address_space *mapping = inode->i_mapping;
46         struct folio *folio;
47         pgoff_t end;
48
49         XA_STATE(xas, &mapping->i_pages, start / PAGE_SIZE);
50
51         rcu_read_lock();
52
53         end = (start + len - 1) / PAGE_SIZE;
54         xas_for_each_marked(&xas, folio, end, PAGECACHE_TAG_DIRTY) {
55                 xas_pause(&xas);
56                 rcu_read_unlock();
57                 folio_lock(folio);
58                 folio_clear_dirty_for_io(folio);
59                 folio_unlock(folio);
60                 rcu_read_lock();
61         }
62
63         rcu_read_unlock();
64 }
65
66 /*
67  * Completion of write to server.
68  */
69 void cifs_pages_written_back(struct inode *inode, loff_t start, unsigned int len)
70 {
71         struct address_space *mapping = inode->i_mapping;
72         struct folio *folio;
73         pgoff_t end;
74
75         XA_STATE(xas, &mapping->i_pages, start / PAGE_SIZE);
76
77         if (!len)
78                 return;
79
80         rcu_read_lock();
81
82         end = (start + len - 1) / PAGE_SIZE;
83         xas_for_each(&xas, folio, end) {
84                 if (!folio_test_writeback(folio)) {
85                         WARN_ONCE(1, "bad %x @%llx page %lx %lx\n",
86                                   len, start, folio_index(folio), end);
87                         continue;
88                 }
89
90                 folio_detach_private(folio);
91                 folio_end_writeback(folio);
92         }
93
94         rcu_read_unlock();
95 }
96
97 /*
98  * Failure of write to server.
99  */
100 void cifs_pages_write_failed(struct inode *inode, loff_t start, unsigned int len)
101 {
102         struct address_space *mapping = inode->i_mapping;
103         struct folio *folio;
104         pgoff_t end;
105
106         XA_STATE(xas, &mapping->i_pages, start / PAGE_SIZE);
107
108         if (!len)
109                 return;
110
111         rcu_read_lock();
112
113         end = (start + len - 1) / PAGE_SIZE;
114         xas_for_each(&xas, folio, end) {
115                 if (!folio_test_writeback(folio)) {
116                         WARN_ONCE(1, "bad %x @%llx page %lx %lx\n",
117                                   len, start, folio_index(folio), end);
118                         continue;
119                 }
120
121                 folio_set_error(folio);
122                 folio_end_writeback(folio);
123         }
124
125         rcu_read_unlock();
126 }
127
128 /*
129  * Redirty pages after a temporary failure.
130  */
131 void cifs_pages_write_redirty(struct inode *inode, loff_t start, unsigned int len)
132 {
133         struct address_space *mapping = inode->i_mapping;
134         struct folio *folio;
135         pgoff_t end;
136
137         XA_STATE(xas, &mapping->i_pages, start / PAGE_SIZE);
138
139         if (!len)
140                 return;
141
142         rcu_read_lock();
143
144         end = (start + len - 1) / PAGE_SIZE;
145         xas_for_each(&xas, folio, end) {
146                 if (!folio_test_writeback(folio)) {
147                         WARN_ONCE(1, "bad %x @%llx page %lx %lx\n",
148                                   len, start, folio_index(folio), end);
149                         continue;
150                 }
151
152                 filemap_dirty_folio(folio->mapping, folio);
153                 folio_end_writeback(folio);
154         }
155
156         rcu_read_unlock();
157 }
158
159 /*
160  * Mark as invalid, all open files on tree connections since they
161  * were closed when session to server was lost.
162  */
163 void
164 cifs_mark_open_files_invalid(struct cifs_tcon *tcon)
165 {
166         struct cifsFileInfo *open_file = NULL;
167         struct list_head *tmp;
168         struct list_head *tmp1;
169
170         /* only send once per connect */
171         spin_lock(&tcon->ses->ses_lock);
172         if ((tcon->ses->ses_status != SES_GOOD) || (tcon->status != TID_NEED_RECON)) {
173                 spin_unlock(&tcon->ses->ses_lock);
174                 return;
175         }
176         tcon->status = TID_IN_FILES_INVALIDATE;
177         spin_unlock(&tcon->ses->ses_lock);
178
179         /* list all files open on tree connection and mark them invalid */
180         spin_lock(&tcon->open_file_lock);
181         list_for_each_safe(tmp, tmp1, &tcon->openFileList) {
182                 open_file = list_entry(tmp, struct cifsFileInfo, tlist);
183                 open_file->invalidHandle = true;
184                 open_file->oplock_break_cancelled = true;
185         }
186         spin_unlock(&tcon->open_file_lock);
187
188         invalidate_all_cached_dirs(tcon);
189         spin_lock(&tcon->tc_lock);
190         if (tcon->status == TID_IN_FILES_INVALIDATE)
191                 tcon->status = TID_NEED_TCON;
192         spin_unlock(&tcon->tc_lock);
193
194         /*
195          * BB Add call to invalidate_inodes(sb) for all superblocks mounted
196          * to this tcon.
197          */
198 }
199
200 static inline int cifs_convert_flags(unsigned int flags)
201 {
202         if ((flags & O_ACCMODE) == O_RDONLY)
203                 return GENERIC_READ;
204         else if ((flags & O_ACCMODE) == O_WRONLY)
205                 return GENERIC_WRITE;
206         else if ((flags & O_ACCMODE) == O_RDWR) {
207                 /* GENERIC_ALL is too much permission to request
208                    can cause unnecessary access denied on create */
209                 /* return GENERIC_ALL; */
210                 return (GENERIC_READ | GENERIC_WRITE);
211         }
212
213         return (READ_CONTROL | FILE_WRITE_ATTRIBUTES | FILE_READ_ATTRIBUTES |
214                 FILE_WRITE_EA | FILE_APPEND_DATA | FILE_WRITE_DATA |
215                 FILE_READ_DATA);
216 }
217
218 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
219 static u32 cifs_posix_convert_flags(unsigned int flags)
220 {
221         u32 posix_flags = 0;
222
223         if ((flags & O_ACCMODE) == O_RDONLY)
224                 posix_flags = SMB_O_RDONLY;
225         else if ((flags & O_ACCMODE) == O_WRONLY)
226                 posix_flags = SMB_O_WRONLY;
227         else if ((flags & O_ACCMODE) == O_RDWR)
228                 posix_flags = SMB_O_RDWR;
229
230         if (flags & O_CREAT) {
231                 posix_flags |= SMB_O_CREAT;
232                 if (flags & O_EXCL)
233                         posix_flags |= SMB_O_EXCL;
234         } else if (flags & O_EXCL)
235                 cifs_dbg(FYI, "Application %s pid %d has incorrectly set O_EXCL flag but not O_CREAT on file open. Ignoring O_EXCL\n",
236                          current->comm, current->tgid);
237
238         if (flags & O_TRUNC)
239                 posix_flags |= SMB_O_TRUNC;
240         /* be safe and imply O_SYNC for O_DSYNC */
241         if (flags & O_DSYNC)
242                 posix_flags |= SMB_O_SYNC;
243         if (flags & O_DIRECTORY)
244                 posix_flags |= SMB_O_DIRECTORY;
245         if (flags & O_NOFOLLOW)
246                 posix_flags |= SMB_O_NOFOLLOW;
247         if (flags & O_DIRECT)
248                 posix_flags |= SMB_O_DIRECT;
249
250         return posix_flags;
251 }
252 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
253
254 static inline int cifs_get_disposition(unsigned int flags)
255 {
256         if ((flags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL))
257                 return FILE_CREATE;
258         else if ((flags & (O_CREAT | O_TRUNC)) == (O_CREAT | O_TRUNC))
259                 return FILE_OVERWRITE_IF;
260         else if ((flags & O_CREAT) == O_CREAT)
261                 return FILE_OPEN_IF;
262         else if ((flags & O_TRUNC) == O_TRUNC)
263                 return FILE_OVERWRITE;
264         else
265                 return FILE_OPEN;
266 }
267
268 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
269 int cifs_posix_open(const char *full_path, struct inode **pinode,
270                         struct super_block *sb, int mode, unsigned int f_flags,
271                         __u32 *poplock, __u16 *pnetfid, unsigned int xid)
272 {
273         int rc;
274         FILE_UNIX_BASIC_INFO *presp_data;
275         __u32 posix_flags = 0;
276         struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
277         struct cifs_fattr fattr;
278         struct tcon_link *tlink;
279         struct cifs_tcon *tcon;
280
281         cifs_dbg(FYI, "posix open %s\n", full_path);
282
283         presp_data = kzalloc(sizeof(FILE_UNIX_BASIC_INFO), GFP_KERNEL);
284         if (presp_data == NULL)
285                 return -ENOMEM;
286
287         tlink = cifs_sb_tlink(cifs_sb);
288         if (IS_ERR(tlink)) {
289                 rc = PTR_ERR(tlink);
290                 goto posix_open_ret;
291         }
292
293         tcon = tlink_tcon(tlink);
294         mode &= ~current_umask();
295
296         posix_flags = cifs_posix_convert_flags(f_flags);
297         rc = CIFSPOSIXCreate(xid, tcon, posix_flags, mode, pnetfid, presp_data,
298                              poplock, full_path, cifs_sb->local_nls,
299                              cifs_remap(cifs_sb));
300         cifs_put_tlink(tlink);
301
302         if (rc)
303                 goto posix_open_ret;
304
305         if (presp_data->Type == cpu_to_le32(-1))
306                 goto posix_open_ret; /* open ok, caller does qpathinfo */
307
308         if (!pinode)
309                 goto posix_open_ret; /* caller does not need info */
310
311         cifs_unix_basic_to_fattr(&fattr, presp_data, cifs_sb);
312
313         /* get new inode and set it up */
314         if (*pinode == NULL) {
315                 cifs_fill_uniqueid(sb, &fattr);
316                 *pinode = cifs_iget(sb, &fattr);
317                 if (!*pinode) {
318                         rc = -ENOMEM;
319                         goto posix_open_ret;
320                 }
321         } else {
322                 cifs_revalidate_mapping(*pinode);
323                 rc = cifs_fattr_to_inode(*pinode, &fattr);
324         }
325
326 posix_open_ret:
327         kfree(presp_data);
328         return rc;
329 }
330 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
331
332 static int cifs_nt_open(const char *full_path, struct inode *inode, struct cifs_sb_info *cifs_sb,
333                         struct cifs_tcon *tcon, unsigned int f_flags, __u32 *oplock,
334                         struct cifs_fid *fid, unsigned int xid, struct cifs_open_info_data *buf)
335 {
336         int rc;
337         int desired_access;
338         int disposition;
339         int create_options = CREATE_NOT_DIR;
340         struct TCP_Server_Info *server = tcon->ses->server;
341         struct cifs_open_parms oparms;
342
343         if (!server->ops->open)
344                 return -ENOSYS;
345
346         desired_access = cifs_convert_flags(f_flags);
347
348 /*********************************************************************
349  *  open flag mapping table:
350  *
351  *      POSIX Flag            CIFS Disposition
352  *      ----------            ----------------
353  *      O_CREAT               FILE_OPEN_IF
354  *      O_CREAT | O_EXCL      FILE_CREATE
355  *      O_CREAT | O_TRUNC     FILE_OVERWRITE_IF
356  *      O_TRUNC               FILE_OVERWRITE
357  *      none of the above     FILE_OPEN
358  *
359  *      Note that there is not a direct match between disposition
360  *      FILE_SUPERSEDE (ie create whether or not file exists although
361  *      O_CREAT | O_TRUNC is similar but truncates the existing
362  *      file rather than creating a new file as FILE_SUPERSEDE does
363  *      (which uses the attributes / metadata passed in on open call)
364  *?
365  *?  O_SYNC is a reasonable match to CIFS writethrough flag
366  *?  and the read write flags match reasonably.  O_LARGEFILE
367  *?  is irrelevant because largefile support is always used
368  *?  by this client. Flags O_APPEND, O_DIRECT, O_DIRECTORY,
369  *       O_FASYNC, O_NOFOLLOW, O_NONBLOCK need further investigation
370  *********************************************************************/
371
372         disposition = cifs_get_disposition(f_flags);
373
374         /* BB pass O_SYNC flag through on file attributes .. BB */
375
376         /* O_SYNC also has bit for O_DSYNC so following check picks up either */
377         if (f_flags & O_SYNC)
378                 create_options |= CREATE_WRITE_THROUGH;
379
380         if (f_flags & O_DIRECT)
381                 create_options |= CREATE_NO_BUFFER;
382
383         oparms = (struct cifs_open_parms) {
384                 .tcon = tcon,
385                 .cifs_sb = cifs_sb,
386                 .desired_access = desired_access,
387                 .create_options = cifs_create_options(cifs_sb, create_options),
388                 .disposition = disposition,
389                 .path = full_path,
390                 .fid = fid,
391         };
392
393         rc = server->ops->open(xid, &oparms, oplock, buf);
394         if (rc)
395                 return rc;
396
397         /* TODO: Add support for calling posix query info but with passing in fid */
398         if (tcon->unix_ext)
399                 rc = cifs_get_inode_info_unix(&inode, full_path, inode->i_sb,
400                                               xid);
401         else
402                 rc = cifs_get_inode_info(&inode, full_path, buf, inode->i_sb,
403                                          xid, fid);
404
405         if (rc) {
406                 server->ops->close(xid, tcon, fid);
407                 if (rc == -ESTALE)
408                         rc = -EOPENSTALE;
409         }
410
411         return rc;
412 }
413
414 static bool
415 cifs_has_mand_locks(struct cifsInodeInfo *cinode)
416 {
417         struct cifs_fid_locks *cur;
418         bool has_locks = false;
419
420         down_read(&cinode->lock_sem);
421         list_for_each_entry(cur, &cinode->llist, llist) {
422                 if (!list_empty(&cur->locks)) {
423                         has_locks = true;
424                         break;
425                 }
426         }
427         up_read(&cinode->lock_sem);
428         return has_locks;
429 }
430
431 void
432 cifs_down_write(struct rw_semaphore *sem)
433 {
434         while (!down_write_trylock(sem))
435                 msleep(10);
436 }
437
438 static void cifsFileInfo_put_work(struct work_struct *work);
439
440 struct cifsFileInfo *cifs_new_fileinfo(struct cifs_fid *fid, struct file *file,
441                                        struct tcon_link *tlink, __u32 oplock,
442                                        const char *symlink_target)
443 {
444         struct dentry *dentry = file_dentry(file);
445         struct inode *inode = d_inode(dentry);
446         struct cifsInodeInfo *cinode = CIFS_I(inode);
447         struct cifsFileInfo *cfile;
448         struct cifs_fid_locks *fdlocks;
449         struct cifs_tcon *tcon = tlink_tcon(tlink);
450         struct TCP_Server_Info *server = tcon->ses->server;
451
452         cfile = kzalloc(sizeof(struct cifsFileInfo), GFP_KERNEL);
453         if (cfile == NULL)
454                 return cfile;
455
456         fdlocks = kzalloc(sizeof(struct cifs_fid_locks), GFP_KERNEL);
457         if (!fdlocks) {
458                 kfree(cfile);
459                 return NULL;
460         }
461
462         if (symlink_target) {
463                 cfile->symlink_target = kstrdup(symlink_target, GFP_KERNEL);
464                 if (!cfile->symlink_target) {
465                         kfree(fdlocks);
466                         kfree(cfile);
467                         return NULL;
468                 }
469         }
470
471         INIT_LIST_HEAD(&fdlocks->locks);
472         fdlocks->cfile = cfile;
473         cfile->llist = fdlocks;
474
475         cfile->count = 1;
476         cfile->pid = current->tgid;
477         cfile->uid = current_fsuid();
478         cfile->dentry = dget(dentry);
479         cfile->f_flags = file->f_flags;
480         cfile->invalidHandle = false;
481         cfile->deferred_close_scheduled = false;
482         cfile->tlink = cifs_get_tlink(tlink);
483         INIT_WORK(&cfile->oplock_break, cifs_oplock_break);
484         INIT_WORK(&cfile->put, cifsFileInfo_put_work);
485         INIT_DELAYED_WORK(&cfile->deferred, smb2_deferred_work_close);
486         mutex_init(&cfile->fh_mutex);
487         spin_lock_init(&cfile->file_info_lock);
488
489         cifs_sb_active(inode->i_sb);
490
491         /*
492          * If the server returned a read oplock and we have mandatory brlocks,
493          * set oplock level to None.
494          */
495         if (server->ops->is_read_op(oplock) && cifs_has_mand_locks(cinode)) {
496                 cifs_dbg(FYI, "Reset oplock val from read to None due to mand locks\n");
497                 oplock = 0;
498         }
499
500         cifs_down_write(&cinode->lock_sem);
501         list_add(&fdlocks->llist, &cinode->llist);
502         up_write(&cinode->lock_sem);
503
504         spin_lock(&tcon->open_file_lock);
505         if (fid->pending_open->oplock != CIFS_OPLOCK_NO_CHANGE && oplock)
506                 oplock = fid->pending_open->oplock;
507         list_del(&fid->pending_open->olist);
508
509         fid->purge_cache = false;
510         server->ops->set_fid(cfile, fid, oplock);
511
512         list_add(&cfile->tlist, &tcon->openFileList);
513         atomic_inc(&tcon->num_local_opens);
514
515         /* if readable file instance put first in list*/
516         spin_lock(&cinode->open_file_lock);
517         if (file->f_mode & FMODE_READ)
518                 list_add(&cfile->flist, &cinode->openFileList);
519         else
520                 list_add_tail(&cfile->flist, &cinode->openFileList);
521         spin_unlock(&cinode->open_file_lock);
522         spin_unlock(&tcon->open_file_lock);
523
524         if (fid->purge_cache)
525                 cifs_zap_mapping(inode);
526
527         file->private_data = cfile;
528         return cfile;
529 }
530
531 struct cifsFileInfo *
532 cifsFileInfo_get(struct cifsFileInfo *cifs_file)
533 {
534         spin_lock(&cifs_file->file_info_lock);
535         cifsFileInfo_get_locked(cifs_file);
536         spin_unlock(&cifs_file->file_info_lock);
537         return cifs_file;
538 }
539
540 static void cifsFileInfo_put_final(struct cifsFileInfo *cifs_file)
541 {
542         struct inode *inode = d_inode(cifs_file->dentry);
543         struct cifsInodeInfo *cifsi = CIFS_I(inode);
544         struct cifsLockInfo *li, *tmp;
545         struct super_block *sb = inode->i_sb;
546
547         /*
548          * Delete any outstanding lock records. We'll lose them when the file
549          * is closed anyway.
550          */
551         cifs_down_write(&cifsi->lock_sem);
552         list_for_each_entry_safe(li, tmp, &cifs_file->llist->locks, llist) {
553                 list_del(&li->llist);
554                 cifs_del_lock_waiters(li);
555                 kfree(li);
556         }
557         list_del(&cifs_file->llist->llist);
558         kfree(cifs_file->llist);
559         up_write(&cifsi->lock_sem);
560
561         cifs_put_tlink(cifs_file->tlink);
562         dput(cifs_file->dentry);
563         cifs_sb_deactive(sb);
564         kfree(cifs_file->symlink_target);
565         kfree(cifs_file);
566 }
567
568 static void cifsFileInfo_put_work(struct work_struct *work)
569 {
570         struct cifsFileInfo *cifs_file = container_of(work,
571                         struct cifsFileInfo, put);
572
573         cifsFileInfo_put_final(cifs_file);
574 }
575
576 /**
577  * cifsFileInfo_put - release a reference of file priv data
578  *
579  * Always potentially wait for oplock handler. See _cifsFileInfo_put().
580  *
581  * @cifs_file:  cifs/smb3 specific info (eg refcounts) for an open file
582  */
583 void cifsFileInfo_put(struct cifsFileInfo *cifs_file)
584 {
585         _cifsFileInfo_put(cifs_file, true, true);
586 }
587
588 /**
589  * _cifsFileInfo_put - release a reference of file priv data
590  *
591  * This may involve closing the filehandle @cifs_file out on the
592  * server. Must be called without holding tcon->open_file_lock,
593  * cinode->open_file_lock and cifs_file->file_info_lock.
594  *
595  * If @wait_for_oplock_handler is true and we are releasing the last
596  * reference, wait for any running oplock break handler of the file
597  * and cancel any pending one.
598  *
599  * @cifs_file:  cifs/smb3 specific info (eg refcounts) for an open file
600  * @wait_oplock_handler: must be false if called from oplock_break_handler
601  * @offload:    not offloaded on close and oplock breaks
602  *
603  */
604 void _cifsFileInfo_put(struct cifsFileInfo *cifs_file,
605                        bool wait_oplock_handler, bool offload)
606 {
607         struct inode *inode = d_inode(cifs_file->dentry);
608         struct cifs_tcon *tcon = tlink_tcon(cifs_file->tlink);
609         struct TCP_Server_Info *server = tcon->ses->server;
610         struct cifsInodeInfo *cifsi = CIFS_I(inode);
611         struct super_block *sb = inode->i_sb;
612         struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
613         struct cifs_fid fid = {};
614         struct cifs_pending_open open;
615         bool oplock_break_cancelled;
616
617         spin_lock(&tcon->open_file_lock);
618         spin_lock(&cifsi->open_file_lock);
619         spin_lock(&cifs_file->file_info_lock);
620         if (--cifs_file->count > 0) {
621                 spin_unlock(&cifs_file->file_info_lock);
622                 spin_unlock(&cifsi->open_file_lock);
623                 spin_unlock(&tcon->open_file_lock);
624                 return;
625         }
626         spin_unlock(&cifs_file->file_info_lock);
627
628         if (server->ops->get_lease_key)
629                 server->ops->get_lease_key(inode, &fid);
630
631         /* store open in pending opens to make sure we don't miss lease break */
632         cifs_add_pending_open_locked(&fid, cifs_file->tlink, &open);
633
634         /* remove it from the lists */
635         list_del(&cifs_file->flist);
636         list_del(&cifs_file->tlist);
637         atomic_dec(&tcon->num_local_opens);
638
639         if (list_empty(&cifsi->openFileList)) {
640                 cifs_dbg(FYI, "closing last open instance for inode %p\n",
641                          d_inode(cifs_file->dentry));
642                 /*
643                  * In strict cache mode we need invalidate mapping on the last
644                  * close  because it may cause a error when we open this file
645                  * again and get at least level II oplock.
646                  */
647                 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO)
648                         set_bit(CIFS_INO_INVALID_MAPPING, &cifsi->flags);
649                 cifs_set_oplock_level(cifsi, 0);
650         }
651
652         spin_unlock(&cifsi->open_file_lock);
653         spin_unlock(&tcon->open_file_lock);
654
655         oplock_break_cancelled = wait_oplock_handler ?
656                 cancel_work_sync(&cifs_file->oplock_break) : false;
657
658         if (!tcon->need_reconnect && !cifs_file->invalidHandle) {
659                 struct TCP_Server_Info *server = tcon->ses->server;
660                 unsigned int xid;
661
662                 xid = get_xid();
663                 if (server->ops->close_getattr)
664                         server->ops->close_getattr(xid, tcon, cifs_file);
665                 else if (server->ops->close)
666                         server->ops->close(xid, tcon, &cifs_file->fid);
667                 _free_xid(xid);
668         }
669
670         if (oplock_break_cancelled)
671                 cifs_done_oplock_break(cifsi);
672
673         cifs_del_pending_open(&open);
674
675         if (offload)
676                 queue_work(fileinfo_put_wq, &cifs_file->put);
677         else
678                 cifsFileInfo_put_final(cifs_file);
679 }
680
681 int cifs_open(struct inode *inode, struct file *file)
682
683 {
684         int rc = -EACCES;
685         unsigned int xid;
686         __u32 oplock;
687         struct cifs_sb_info *cifs_sb;
688         struct TCP_Server_Info *server;
689         struct cifs_tcon *tcon;
690         struct tcon_link *tlink;
691         struct cifsFileInfo *cfile = NULL;
692         void *page;
693         const char *full_path;
694         bool posix_open_ok = false;
695         struct cifs_fid fid = {};
696         struct cifs_pending_open open;
697         struct cifs_open_info_data data = {};
698
699         xid = get_xid();
700
701         cifs_sb = CIFS_SB(inode->i_sb);
702         if (unlikely(cifs_forced_shutdown(cifs_sb))) {
703                 free_xid(xid);
704                 return -EIO;
705         }
706
707         tlink = cifs_sb_tlink(cifs_sb);
708         if (IS_ERR(tlink)) {
709                 free_xid(xid);
710                 return PTR_ERR(tlink);
711         }
712         tcon = tlink_tcon(tlink);
713         server = tcon->ses->server;
714
715         page = alloc_dentry_path();
716         full_path = build_path_from_dentry(file_dentry(file), page);
717         if (IS_ERR(full_path)) {
718                 rc = PTR_ERR(full_path);
719                 goto out;
720         }
721
722         cifs_dbg(FYI, "inode = 0x%p file flags are 0x%x for %s\n",
723                  inode, file->f_flags, full_path);
724
725         if (file->f_flags & O_DIRECT &&
726             cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO) {
727                 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_BRL)
728                         file->f_op = &cifs_file_direct_nobrl_ops;
729                 else
730                         file->f_op = &cifs_file_direct_ops;
731         }
732
733         /* Get the cached handle as SMB2 close is deferred */
734         rc = cifs_get_readable_path(tcon, full_path, &cfile);
735         if (rc == 0) {
736                 if (file->f_flags == cfile->f_flags) {
737                         file->private_data = cfile;
738                         spin_lock(&CIFS_I(inode)->deferred_lock);
739                         cifs_del_deferred_close(cfile);
740                         spin_unlock(&CIFS_I(inode)->deferred_lock);
741                         goto use_cache;
742                 } else {
743                         _cifsFileInfo_put(cfile, true, false);
744                 }
745         }
746
747         if (server->oplocks)
748                 oplock = REQ_OPLOCK;
749         else
750                 oplock = 0;
751
752 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
753         if (!tcon->broken_posix_open && tcon->unix_ext &&
754             cap_unix(tcon->ses) && (CIFS_UNIX_POSIX_PATH_OPS_CAP &
755                                 le64_to_cpu(tcon->fsUnixInfo.Capability))) {
756                 /* can not refresh inode info since size could be stale */
757                 rc = cifs_posix_open(full_path, &inode, inode->i_sb,
758                                 cifs_sb->ctx->file_mode /* ignored */,
759                                 file->f_flags, &oplock, &fid.netfid, xid);
760                 if (rc == 0) {
761                         cifs_dbg(FYI, "posix open succeeded\n");
762                         posix_open_ok = true;
763                 } else if ((rc == -EINVAL) || (rc == -EOPNOTSUPP)) {
764                         if (tcon->ses->serverNOS)
765                                 cifs_dbg(VFS, "server %s of type %s returned unexpected error on SMB posix open, disabling posix open support. Check if server update available.\n",
766                                          tcon->ses->ip_addr,
767                                          tcon->ses->serverNOS);
768                         tcon->broken_posix_open = true;
769                 } else if ((rc != -EIO) && (rc != -EREMOTE) &&
770                          (rc != -EOPNOTSUPP)) /* path not found or net err */
771                         goto out;
772                 /*
773                  * Else fallthrough to retry open the old way on network i/o
774                  * or DFS errors.
775                  */
776         }
777 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
778
779         if (server->ops->get_lease_key)
780                 server->ops->get_lease_key(inode, &fid);
781
782         cifs_add_pending_open(&fid, tlink, &open);
783
784         if (!posix_open_ok) {
785                 if (server->ops->get_lease_key)
786                         server->ops->get_lease_key(inode, &fid);
787
788                 rc = cifs_nt_open(full_path, inode, cifs_sb, tcon, file->f_flags, &oplock, &fid,
789                                   xid, &data);
790                 if (rc) {
791                         cifs_del_pending_open(&open);
792                         goto out;
793                 }
794         }
795
796         cfile = cifs_new_fileinfo(&fid, file, tlink, oplock, data.symlink_target);
797         if (cfile == NULL) {
798                 if (server->ops->close)
799                         server->ops->close(xid, tcon, &fid);
800                 cifs_del_pending_open(&open);
801                 rc = -ENOMEM;
802                 goto out;
803         }
804
805 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
806         if ((oplock & CIFS_CREATE_ACTION) && !posix_open_ok && tcon->unix_ext) {
807                 /*
808                  * Time to set mode which we can not set earlier due to
809                  * problems creating new read-only files.
810                  */
811                 struct cifs_unix_set_info_args args = {
812                         .mode   = inode->i_mode,
813                         .uid    = INVALID_UID, /* no change */
814                         .gid    = INVALID_GID, /* no change */
815                         .ctime  = NO_CHANGE_64,
816                         .atime  = NO_CHANGE_64,
817                         .mtime  = NO_CHANGE_64,
818                         .device = 0,
819                 };
820                 CIFSSMBUnixSetFileInfo(xid, tcon, &args, fid.netfid,
821                                        cfile->pid);
822         }
823 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
824
825 use_cache:
826         fscache_use_cookie(cifs_inode_cookie(file_inode(file)),
827                            file->f_mode & FMODE_WRITE);
828         if (file->f_flags & O_DIRECT &&
829             (!((file->f_flags & O_ACCMODE) != O_RDONLY) ||
830              file->f_flags & O_APPEND))
831                 cifs_invalidate_cache(file_inode(file),
832                                       FSCACHE_INVAL_DIO_WRITE);
833
834 out:
835         free_dentry_path(page);
836         free_xid(xid);
837         cifs_put_tlink(tlink);
838         cifs_free_open_info(&data);
839         return rc;
840 }
841
842 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
843 static int cifs_push_posix_locks(struct cifsFileInfo *cfile);
844 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
845
846 /*
847  * Try to reacquire byte range locks that were released when session
848  * to server was lost.
849  */
850 static int
851 cifs_relock_file(struct cifsFileInfo *cfile)
852 {
853         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
854         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
855         int rc = 0;
856 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
857         struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
858 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
859
860         down_read_nested(&cinode->lock_sem, SINGLE_DEPTH_NESTING);
861         if (cinode->can_cache_brlcks) {
862                 /* can cache locks - no need to relock */
863                 up_read(&cinode->lock_sem);
864                 return rc;
865         }
866
867 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
868         if (cap_unix(tcon->ses) &&
869             (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
870             ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
871                 rc = cifs_push_posix_locks(cfile);
872         else
873 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
874                 rc = tcon->ses->server->ops->push_mand_locks(cfile);
875
876         up_read(&cinode->lock_sem);
877         return rc;
878 }
879
880 static int
881 cifs_reopen_file(struct cifsFileInfo *cfile, bool can_flush)
882 {
883         int rc = -EACCES;
884         unsigned int xid;
885         __u32 oplock;
886         struct cifs_sb_info *cifs_sb;
887         struct cifs_tcon *tcon;
888         struct TCP_Server_Info *server;
889         struct cifsInodeInfo *cinode;
890         struct inode *inode;
891         void *page;
892         const char *full_path;
893         int desired_access;
894         int disposition = FILE_OPEN;
895         int create_options = CREATE_NOT_DIR;
896         struct cifs_open_parms oparms;
897
898         xid = get_xid();
899         mutex_lock(&cfile->fh_mutex);
900         if (!cfile->invalidHandle) {
901                 mutex_unlock(&cfile->fh_mutex);
902                 free_xid(xid);
903                 return 0;
904         }
905
906         inode = d_inode(cfile->dentry);
907         cifs_sb = CIFS_SB(inode->i_sb);
908         tcon = tlink_tcon(cfile->tlink);
909         server = tcon->ses->server;
910
911         /*
912          * Can not grab rename sem here because various ops, including those
913          * that already have the rename sem can end up causing writepage to get
914          * called and if the server was down that means we end up here, and we
915          * can never tell if the caller already has the rename_sem.
916          */
917         page = alloc_dentry_path();
918         full_path = build_path_from_dentry(cfile->dentry, page);
919         if (IS_ERR(full_path)) {
920                 mutex_unlock(&cfile->fh_mutex);
921                 free_dentry_path(page);
922                 free_xid(xid);
923                 return PTR_ERR(full_path);
924         }
925
926         cifs_dbg(FYI, "inode = 0x%p file flags 0x%x for %s\n",
927                  inode, cfile->f_flags, full_path);
928
929         if (tcon->ses->server->oplocks)
930                 oplock = REQ_OPLOCK;
931         else
932                 oplock = 0;
933
934 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
935         if (tcon->unix_ext && cap_unix(tcon->ses) &&
936             (CIFS_UNIX_POSIX_PATH_OPS_CAP &
937                                 le64_to_cpu(tcon->fsUnixInfo.Capability))) {
938                 /*
939                  * O_CREAT, O_EXCL and O_TRUNC already had their effect on the
940                  * original open. Must mask them off for a reopen.
941                  */
942                 unsigned int oflags = cfile->f_flags &
943                                                 ~(O_CREAT | O_EXCL | O_TRUNC);
944
945                 rc = cifs_posix_open(full_path, NULL, inode->i_sb,
946                                      cifs_sb->ctx->file_mode /* ignored */,
947                                      oflags, &oplock, &cfile->fid.netfid, xid);
948                 if (rc == 0) {
949                         cifs_dbg(FYI, "posix reopen succeeded\n");
950                         oparms.reconnect = true;
951                         goto reopen_success;
952                 }
953                 /*
954                  * fallthrough to retry open the old way on errors, especially
955                  * in the reconnect path it is important to retry hard
956                  */
957         }
958 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
959
960         desired_access = cifs_convert_flags(cfile->f_flags);
961
962         /* O_SYNC also has bit for O_DSYNC so following check picks up either */
963         if (cfile->f_flags & O_SYNC)
964                 create_options |= CREATE_WRITE_THROUGH;
965
966         if (cfile->f_flags & O_DIRECT)
967                 create_options |= CREATE_NO_BUFFER;
968
969         if (server->ops->get_lease_key)
970                 server->ops->get_lease_key(inode, &cfile->fid);
971
972         oparms = (struct cifs_open_parms) {
973                 .tcon = tcon,
974                 .cifs_sb = cifs_sb,
975                 .desired_access = desired_access,
976                 .create_options = cifs_create_options(cifs_sb, create_options),
977                 .disposition = disposition,
978                 .path = full_path,
979                 .fid = &cfile->fid,
980                 .reconnect = true,
981         };
982
983         /*
984          * Can not refresh inode by passing in file_info buf to be returned by
985          * ops->open and then calling get_inode_info with returned buf since
986          * file might have write behind data that needs to be flushed and server
987          * version of file size can be stale. If we knew for sure that inode was
988          * not dirty locally we could do this.
989          */
990         rc = server->ops->open(xid, &oparms, &oplock, NULL);
991         if (rc == -ENOENT && oparms.reconnect == false) {
992                 /* durable handle timeout is expired - open the file again */
993                 rc = server->ops->open(xid, &oparms, &oplock, NULL);
994                 /* indicate that we need to relock the file */
995                 oparms.reconnect = true;
996         }
997
998         if (rc) {
999                 mutex_unlock(&cfile->fh_mutex);
1000                 cifs_dbg(FYI, "cifs_reopen returned 0x%x\n", rc);
1001                 cifs_dbg(FYI, "oplock: %d\n", oplock);
1002                 goto reopen_error_exit;
1003         }
1004
1005 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
1006 reopen_success:
1007 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
1008         cfile->invalidHandle = false;
1009         mutex_unlock(&cfile->fh_mutex);
1010         cinode = CIFS_I(inode);
1011
1012         if (can_flush) {
1013                 rc = filemap_write_and_wait(inode->i_mapping);
1014                 if (!is_interrupt_error(rc))
1015                         mapping_set_error(inode->i_mapping, rc);
1016
1017                 if (tcon->posix_extensions)
1018                         rc = smb311_posix_get_inode_info(&inode, full_path, inode->i_sb, xid);
1019                 else if (tcon->unix_ext)
1020                         rc = cifs_get_inode_info_unix(&inode, full_path,
1021                                                       inode->i_sb, xid);
1022                 else
1023                         rc = cifs_get_inode_info(&inode, full_path, NULL,
1024                                                  inode->i_sb, xid, NULL);
1025         }
1026         /*
1027          * Else we are writing out data to server already and could deadlock if
1028          * we tried to flush data, and since we do not know if we have data that
1029          * would invalidate the current end of file on the server we can not go
1030          * to the server to get the new inode info.
1031          */
1032
1033         /*
1034          * If the server returned a read oplock and we have mandatory brlocks,
1035          * set oplock level to None.
1036          */
1037         if (server->ops->is_read_op(oplock) && cifs_has_mand_locks(cinode)) {
1038                 cifs_dbg(FYI, "Reset oplock val from read to None due to mand locks\n");
1039                 oplock = 0;
1040         }
1041
1042         server->ops->set_fid(cfile, &cfile->fid, oplock);
1043         if (oparms.reconnect)
1044                 cifs_relock_file(cfile);
1045
1046 reopen_error_exit:
1047         free_dentry_path(page);
1048         free_xid(xid);
1049         return rc;
1050 }
1051
1052 void smb2_deferred_work_close(struct work_struct *work)
1053 {
1054         struct cifsFileInfo *cfile = container_of(work,
1055                         struct cifsFileInfo, deferred.work);
1056
1057         spin_lock(&CIFS_I(d_inode(cfile->dentry))->deferred_lock);
1058         cifs_del_deferred_close(cfile);
1059         cfile->deferred_close_scheduled = false;
1060         spin_unlock(&CIFS_I(d_inode(cfile->dentry))->deferred_lock);
1061         _cifsFileInfo_put(cfile, true, false);
1062 }
1063
1064 int cifs_close(struct inode *inode, struct file *file)
1065 {
1066         struct cifsFileInfo *cfile;
1067         struct cifsInodeInfo *cinode = CIFS_I(inode);
1068         struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
1069         struct cifs_deferred_close *dclose;
1070
1071         cifs_fscache_unuse_inode_cookie(inode, file->f_mode & FMODE_WRITE);
1072
1073         if (file->private_data != NULL) {
1074                 cfile = file->private_data;
1075                 file->private_data = NULL;
1076                 dclose = kmalloc(sizeof(struct cifs_deferred_close), GFP_KERNEL);
1077                 if ((cinode->oplock == CIFS_CACHE_RHW_FLG) &&
1078                     cinode->lease_granted &&
1079                     !test_bit(CIFS_INO_CLOSE_ON_LOCK, &cinode->flags) &&
1080                     dclose) {
1081                         if (test_and_clear_bit(CIFS_INO_MODIFIED_ATTR, &cinode->flags)) {
1082                                 inode->i_ctime = inode->i_mtime = current_time(inode);
1083                         }
1084                         spin_lock(&cinode->deferred_lock);
1085                         cifs_add_deferred_close(cfile, dclose);
1086                         if (cfile->deferred_close_scheduled &&
1087                             delayed_work_pending(&cfile->deferred)) {
1088                                 /*
1089                                  * If there is no pending work, mod_delayed_work queues new work.
1090                                  * So, Increase the ref count to avoid use-after-free.
1091                                  */
1092                                 if (!mod_delayed_work(deferredclose_wq,
1093                                                 &cfile->deferred, cifs_sb->ctx->closetimeo))
1094                                         cifsFileInfo_get(cfile);
1095                         } else {
1096                                 /* Deferred close for files */
1097                                 queue_delayed_work(deferredclose_wq,
1098                                                 &cfile->deferred, cifs_sb->ctx->closetimeo);
1099                                 cfile->deferred_close_scheduled = true;
1100                                 spin_unlock(&cinode->deferred_lock);
1101                                 return 0;
1102                         }
1103                         spin_unlock(&cinode->deferred_lock);
1104                         _cifsFileInfo_put(cfile, true, false);
1105                 } else {
1106                         _cifsFileInfo_put(cfile, true, false);
1107                         kfree(dclose);
1108                 }
1109         }
1110
1111         /* return code from the ->release op is always ignored */
1112         return 0;
1113 }
1114
1115 void
1116 cifs_reopen_persistent_handles(struct cifs_tcon *tcon)
1117 {
1118         struct cifsFileInfo *open_file, *tmp;
1119         struct list_head tmp_list;
1120
1121         if (!tcon->use_persistent || !tcon->need_reopen_files)
1122                 return;
1123
1124         tcon->need_reopen_files = false;
1125
1126         cifs_dbg(FYI, "Reopen persistent handles\n");
1127         INIT_LIST_HEAD(&tmp_list);
1128
1129         /* list all files open on tree connection, reopen resilient handles  */
1130         spin_lock(&tcon->open_file_lock);
1131         list_for_each_entry(open_file, &tcon->openFileList, tlist) {
1132                 if (!open_file->invalidHandle)
1133                         continue;
1134                 cifsFileInfo_get(open_file);
1135                 list_add_tail(&open_file->rlist, &tmp_list);
1136         }
1137         spin_unlock(&tcon->open_file_lock);
1138
1139         list_for_each_entry_safe(open_file, tmp, &tmp_list, rlist) {
1140                 if (cifs_reopen_file(open_file, false /* do not flush */))
1141                         tcon->need_reopen_files = true;
1142                 list_del_init(&open_file->rlist);
1143                 cifsFileInfo_put(open_file);
1144         }
1145 }
1146
1147 int cifs_closedir(struct inode *inode, struct file *file)
1148 {
1149         int rc = 0;
1150         unsigned int xid;
1151         struct cifsFileInfo *cfile = file->private_data;
1152         struct cifs_tcon *tcon;
1153         struct TCP_Server_Info *server;
1154         char *buf;
1155
1156         cifs_dbg(FYI, "Closedir inode = 0x%p\n", inode);
1157
1158         if (cfile == NULL)
1159                 return rc;
1160
1161         xid = get_xid();
1162         tcon = tlink_tcon(cfile->tlink);
1163         server = tcon->ses->server;
1164
1165         cifs_dbg(FYI, "Freeing private data in close dir\n");
1166         spin_lock(&cfile->file_info_lock);
1167         if (server->ops->dir_needs_close(cfile)) {
1168                 cfile->invalidHandle = true;
1169                 spin_unlock(&cfile->file_info_lock);
1170                 if (server->ops->close_dir)
1171                         rc = server->ops->close_dir(xid, tcon, &cfile->fid);
1172                 else
1173                         rc = -ENOSYS;
1174                 cifs_dbg(FYI, "Closing uncompleted readdir with rc %d\n", rc);
1175                 /* not much we can do if it fails anyway, ignore rc */
1176                 rc = 0;
1177         } else
1178                 spin_unlock(&cfile->file_info_lock);
1179
1180         buf = cfile->srch_inf.ntwrk_buf_start;
1181         if (buf) {
1182                 cifs_dbg(FYI, "closedir free smb buf in srch struct\n");
1183                 cfile->srch_inf.ntwrk_buf_start = NULL;
1184                 if (cfile->srch_inf.smallBuf)
1185                         cifs_small_buf_release(buf);
1186                 else
1187                         cifs_buf_release(buf);
1188         }
1189
1190         cifs_put_tlink(cfile->tlink);
1191         kfree(file->private_data);
1192         file->private_data = NULL;
1193         /* BB can we lock the filestruct while this is going on? */
1194         free_xid(xid);
1195         return rc;
1196 }
1197
1198 static struct cifsLockInfo *
1199 cifs_lock_init(__u64 offset, __u64 length, __u8 type, __u16 flags)
1200 {
1201         struct cifsLockInfo *lock =
1202                 kmalloc(sizeof(struct cifsLockInfo), GFP_KERNEL);
1203         if (!lock)
1204                 return lock;
1205         lock->offset = offset;
1206         lock->length = length;
1207         lock->type = type;
1208         lock->pid = current->tgid;
1209         lock->flags = flags;
1210         INIT_LIST_HEAD(&lock->blist);
1211         init_waitqueue_head(&lock->block_q);
1212         return lock;
1213 }
1214
1215 void
1216 cifs_del_lock_waiters(struct cifsLockInfo *lock)
1217 {
1218         struct cifsLockInfo *li, *tmp;
1219         list_for_each_entry_safe(li, tmp, &lock->blist, blist) {
1220                 list_del_init(&li->blist);
1221                 wake_up(&li->block_q);
1222         }
1223 }
1224
1225 #define CIFS_LOCK_OP    0
1226 #define CIFS_READ_OP    1
1227 #define CIFS_WRITE_OP   2
1228
1229 /* @rw_check : 0 - no op, 1 - read, 2 - write */
1230 static bool
1231 cifs_find_fid_lock_conflict(struct cifs_fid_locks *fdlocks, __u64 offset,
1232                             __u64 length, __u8 type, __u16 flags,
1233                             struct cifsFileInfo *cfile,
1234                             struct cifsLockInfo **conf_lock, int rw_check)
1235 {
1236         struct cifsLockInfo *li;
1237         struct cifsFileInfo *cur_cfile = fdlocks->cfile;
1238         struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
1239
1240         list_for_each_entry(li, &fdlocks->locks, llist) {
1241                 if (offset + length <= li->offset ||
1242                     offset >= li->offset + li->length)
1243                         continue;
1244                 if (rw_check != CIFS_LOCK_OP && current->tgid == li->pid &&
1245                     server->ops->compare_fids(cfile, cur_cfile)) {
1246                         /* shared lock prevents write op through the same fid */
1247                         if (!(li->type & server->vals->shared_lock_type) ||
1248                             rw_check != CIFS_WRITE_OP)
1249                                 continue;
1250                 }
1251                 if ((type & server->vals->shared_lock_type) &&
1252                     ((server->ops->compare_fids(cfile, cur_cfile) &&
1253                      current->tgid == li->pid) || type == li->type))
1254                         continue;
1255                 if (rw_check == CIFS_LOCK_OP &&
1256                     (flags & FL_OFDLCK) && (li->flags & FL_OFDLCK) &&
1257                     server->ops->compare_fids(cfile, cur_cfile))
1258                         continue;
1259                 if (conf_lock)
1260                         *conf_lock = li;
1261                 return true;
1262         }
1263         return false;
1264 }
1265
1266 bool
1267 cifs_find_lock_conflict(struct cifsFileInfo *cfile, __u64 offset, __u64 length,
1268                         __u8 type, __u16 flags,
1269                         struct cifsLockInfo **conf_lock, int rw_check)
1270 {
1271         bool rc = false;
1272         struct cifs_fid_locks *cur;
1273         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1274
1275         list_for_each_entry(cur, &cinode->llist, llist) {
1276                 rc = cifs_find_fid_lock_conflict(cur, offset, length, type,
1277                                                  flags, cfile, conf_lock,
1278                                                  rw_check);
1279                 if (rc)
1280                         break;
1281         }
1282
1283         return rc;
1284 }
1285
1286 /*
1287  * Check if there is another lock that prevents us to set the lock (mandatory
1288  * style). If such a lock exists, update the flock structure with its
1289  * properties. Otherwise, set the flock type to F_UNLCK if we can cache brlocks
1290  * or leave it the same if we can't. Returns 0 if we don't need to request to
1291  * the server or 1 otherwise.
1292  */
1293 static int
1294 cifs_lock_test(struct cifsFileInfo *cfile, __u64 offset, __u64 length,
1295                __u8 type, struct file_lock *flock)
1296 {
1297         int rc = 0;
1298         struct cifsLockInfo *conf_lock;
1299         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1300         struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
1301         bool exist;
1302
1303         down_read(&cinode->lock_sem);
1304
1305         exist = cifs_find_lock_conflict(cfile, offset, length, type,
1306                                         flock->fl_flags, &conf_lock,
1307                                         CIFS_LOCK_OP);
1308         if (exist) {
1309                 flock->fl_start = conf_lock->offset;
1310                 flock->fl_end = conf_lock->offset + conf_lock->length - 1;
1311                 flock->fl_pid = conf_lock->pid;
1312                 if (conf_lock->type & server->vals->shared_lock_type)
1313                         flock->fl_type = F_RDLCK;
1314                 else
1315                         flock->fl_type = F_WRLCK;
1316         } else if (!cinode->can_cache_brlcks)
1317                 rc = 1;
1318         else
1319                 flock->fl_type = F_UNLCK;
1320
1321         up_read(&cinode->lock_sem);
1322         return rc;
1323 }
1324
1325 static void
1326 cifs_lock_add(struct cifsFileInfo *cfile, struct cifsLockInfo *lock)
1327 {
1328         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1329         cifs_down_write(&cinode->lock_sem);
1330         list_add_tail(&lock->llist, &cfile->llist->locks);
1331         up_write(&cinode->lock_sem);
1332 }
1333
1334 /*
1335  * Set the byte-range lock (mandatory style). Returns:
1336  * 1) 0, if we set the lock and don't need to request to the server;
1337  * 2) 1, if no locks prevent us but we need to request to the server;
1338  * 3) -EACCES, if there is a lock that prevents us and wait is false.
1339  */
1340 static int
1341 cifs_lock_add_if(struct cifsFileInfo *cfile, struct cifsLockInfo *lock,
1342                  bool wait)
1343 {
1344         struct cifsLockInfo *conf_lock;
1345         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1346         bool exist;
1347         int rc = 0;
1348
1349 try_again:
1350         exist = false;
1351         cifs_down_write(&cinode->lock_sem);
1352
1353         exist = cifs_find_lock_conflict(cfile, lock->offset, lock->length,
1354                                         lock->type, lock->flags, &conf_lock,
1355                                         CIFS_LOCK_OP);
1356         if (!exist && cinode->can_cache_brlcks) {
1357                 list_add_tail(&lock->llist, &cfile->llist->locks);
1358                 up_write(&cinode->lock_sem);
1359                 return rc;
1360         }
1361
1362         if (!exist)
1363                 rc = 1;
1364         else if (!wait)
1365                 rc = -EACCES;
1366         else {
1367                 list_add_tail(&lock->blist, &conf_lock->blist);
1368                 up_write(&cinode->lock_sem);
1369                 rc = wait_event_interruptible(lock->block_q,
1370                                         (lock->blist.prev == &lock->blist) &&
1371                                         (lock->blist.next == &lock->blist));
1372                 if (!rc)
1373                         goto try_again;
1374                 cifs_down_write(&cinode->lock_sem);
1375                 list_del_init(&lock->blist);
1376         }
1377
1378         up_write(&cinode->lock_sem);
1379         return rc;
1380 }
1381
1382 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
1383 /*
1384  * Check if there is another lock that prevents us to set the lock (posix
1385  * style). If such a lock exists, update the flock structure with its
1386  * properties. Otherwise, set the flock type to F_UNLCK if we can cache brlocks
1387  * or leave it the same if we can't. Returns 0 if we don't need to request to
1388  * the server or 1 otherwise.
1389  */
1390 static int
1391 cifs_posix_lock_test(struct file *file, struct file_lock *flock)
1392 {
1393         int rc = 0;
1394         struct cifsInodeInfo *cinode = CIFS_I(file_inode(file));
1395         unsigned char saved_type = flock->fl_type;
1396
1397         if ((flock->fl_flags & FL_POSIX) == 0)
1398                 return 1;
1399
1400         down_read(&cinode->lock_sem);
1401         posix_test_lock(file, flock);
1402
1403         if (flock->fl_type == F_UNLCK && !cinode->can_cache_brlcks) {
1404                 flock->fl_type = saved_type;
1405                 rc = 1;
1406         }
1407
1408         up_read(&cinode->lock_sem);
1409         return rc;
1410 }
1411
1412 /*
1413  * Set the byte-range lock (posix style). Returns:
1414  * 1) <0, if the error occurs while setting the lock;
1415  * 2) 0, if we set the lock and don't need to request to the server;
1416  * 3) FILE_LOCK_DEFERRED, if we will wait for some other file_lock;
1417  * 4) FILE_LOCK_DEFERRED + 1, if we need to request to the server.
1418  */
1419 static int
1420 cifs_posix_lock_set(struct file *file, struct file_lock *flock)
1421 {
1422         struct cifsInodeInfo *cinode = CIFS_I(file_inode(file));
1423         int rc = FILE_LOCK_DEFERRED + 1;
1424
1425         if ((flock->fl_flags & FL_POSIX) == 0)
1426                 return rc;
1427
1428         cifs_down_write(&cinode->lock_sem);
1429         if (!cinode->can_cache_brlcks) {
1430                 up_write(&cinode->lock_sem);
1431                 return rc;
1432         }
1433
1434         rc = posix_lock_file(file, flock, NULL);
1435         up_write(&cinode->lock_sem);
1436         return rc;
1437 }
1438
1439 int
1440 cifs_push_mandatory_locks(struct cifsFileInfo *cfile)
1441 {
1442         unsigned int xid;
1443         int rc = 0, stored_rc;
1444         struct cifsLockInfo *li, *tmp;
1445         struct cifs_tcon *tcon;
1446         unsigned int num, max_num, max_buf;
1447         LOCKING_ANDX_RANGE *buf, *cur;
1448         static const int types[] = {
1449                 LOCKING_ANDX_LARGE_FILES,
1450                 LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES
1451         };
1452         int i;
1453
1454         xid = get_xid();
1455         tcon = tlink_tcon(cfile->tlink);
1456
1457         /*
1458          * Accessing maxBuf is racy with cifs_reconnect - need to store value
1459          * and check it before using.
1460          */
1461         max_buf = tcon->ses->server->maxBuf;
1462         if (max_buf < (sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE))) {
1463                 free_xid(xid);
1464                 return -EINVAL;
1465         }
1466
1467         BUILD_BUG_ON(sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE) >
1468                      PAGE_SIZE);
1469         max_buf = min_t(unsigned int, max_buf - sizeof(struct smb_hdr),
1470                         PAGE_SIZE);
1471         max_num = (max_buf - sizeof(struct smb_hdr)) /
1472                                                 sizeof(LOCKING_ANDX_RANGE);
1473         buf = kcalloc(max_num, sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
1474         if (!buf) {
1475                 free_xid(xid);
1476                 return -ENOMEM;
1477         }
1478
1479         for (i = 0; i < 2; i++) {
1480                 cur = buf;
1481                 num = 0;
1482                 list_for_each_entry_safe(li, tmp, &cfile->llist->locks, llist) {
1483                         if (li->type != types[i])
1484                                 continue;
1485                         cur->Pid = cpu_to_le16(li->pid);
1486                         cur->LengthLow = cpu_to_le32((u32)li->length);
1487                         cur->LengthHigh = cpu_to_le32((u32)(li->length>>32));
1488                         cur->OffsetLow = cpu_to_le32((u32)li->offset);
1489                         cur->OffsetHigh = cpu_to_le32((u32)(li->offset>>32));
1490                         if (++num == max_num) {
1491                                 stored_rc = cifs_lockv(xid, tcon,
1492                                                        cfile->fid.netfid,
1493                                                        (__u8)li->type, 0, num,
1494                                                        buf);
1495                                 if (stored_rc)
1496                                         rc = stored_rc;
1497                                 cur = buf;
1498                                 num = 0;
1499                         } else
1500                                 cur++;
1501                 }
1502
1503                 if (num) {
1504                         stored_rc = cifs_lockv(xid, tcon, cfile->fid.netfid,
1505                                                (__u8)types[i], 0, num, buf);
1506                         if (stored_rc)
1507                                 rc = stored_rc;
1508                 }
1509         }
1510
1511         kfree(buf);
1512         free_xid(xid);
1513         return rc;
1514 }
1515
1516 static __u32
1517 hash_lockowner(fl_owner_t owner)
1518 {
1519         return cifs_lock_secret ^ hash32_ptr((const void *)owner);
1520 }
1521 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
1522
1523 struct lock_to_push {
1524         struct list_head llist;
1525         __u64 offset;
1526         __u64 length;
1527         __u32 pid;
1528         __u16 netfid;
1529         __u8 type;
1530 };
1531
1532 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
1533 static int
1534 cifs_push_posix_locks(struct cifsFileInfo *cfile)
1535 {
1536         struct inode *inode = d_inode(cfile->dentry);
1537         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1538         struct file_lock *flock;
1539         struct file_lock_context *flctx = locks_inode_context(inode);
1540         unsigned int count = 0, i;
1541         int rc = 0, xid, type;
1542         struct list_head locks_to_send, *el;
1543         struct lock_to_push *lck, *tmp;
1544         __u64 length;
1545
1546         xid = get_xid();
1547
1548         if (!flctx)
1549                 goto out;
1550
1551         spin_lock(&flctx->flc_lock);
1552         list_for_each(el, &flctx->flc_posix) {
1553                 count++;
1554         }
1555         spin_unlock(&flctx->flc_lock);
1556
1557         INIT_LIST_HEAD(&locks_to_send);
1558
1559         /*
1560          * Allocating count locks is enough because no FL_POSIX locks can be
1561          * added to the list while we are holding cinode->lock_sem that
1562          * protects locking operations of this inode.
1563          */
1564         for (i = 0; i < count; i++) {
1565                 lck = kmalloc(sizeof(struct lock_to_push), GFP_KERNEL);
1566                 if (!lck) {
1567                         rc = -ENOMEM;
1568                         goto err_out;
1569                 }
1570                 list_add_tail(&lck->llist, &locks_to_send);
1571         }
1572
1573         el = locks_to_send.next;
1574         spin_lock(&flctx->flc_lock);
1575         list_for_each_entry(flock, &flctx->flc_posix, fl_list) {
1576                 if (el == &locks_to_send) {
1577                         /*
1578                          * The list ended. We don't have enough allocated
1579                          * structures - something is really wrong.
1580                          */
1581                         cifs_dbg(VFS, "Can't push all brlocks!\n");
1582                         break;
1583                 }
1584                 length = cifs_flock_len(flock);
1585                 if (flock->fl_type == F_RDLCK || flock->fl_type == F_SHLCK)
1586                         type = CIFS_RDLCK;
1587                 else
1588                         type = CIFS_WRLCK;
1589                 lck = list_entry(el, struct lock_to_push, llist);
1590                 lck->pid = hash_lockowner(flock->fl_owner);
1591                 lck->netfid = cfile->fid.netfid;
1592                 lck->length = length;
1593                 lck->type = type;
1594                 lck->offset = flock->fl_start;
1595         }
1596         spin_unlock(&flctx->flc_lock);
1597
1598         list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) {
1599                 int stored_rc;
1600
1601                 stored_rc = CIFSSMBPosixLock(xid, tcon, lck->netfid, lck->pid,
1602                                              lck->offset, lck->length, NULL,
1603                                              lck->type, 0);
1604                 if (stored_rc)
1605                         rc = stored_rc;
1606                 list_del(&lck->llist);
1607                 kfree(lck);
1608         }
1609
1610 out:
1611         free_xid(xid);
1612         return rc;
1613 err_out:
1614         list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) {
1615                 list_del(&lck->llist);
1616                 kfree(lck);
1617         }
1618         goto out;
1619 }
1620 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
1621
1622 static int
1623 cifs_push_locks(struct cifsFileInfo *cfile)
1624 {
1625         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1626         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1627         int rc = 0;
1628 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
1629         struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
1630 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
1631
1632         /* we are going to update can_cache_brlcks here - need a write access */
1633         cifs_down_write(&cinode->lock_sem);
1634         if (!cinode->can_cache_brlcks) {
1635                 up_write(&cinode->lock_sem);
1636                 return rc;
1637         }
1638
1639 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
1640         if (cap_unix(tcon->ses) &&
1641             (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
1642             ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
1643                 rc = cifs_push_posix_locks(cfile);
1644         else
1645 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
1646                 rc = tcon->ses->server->ops->push_mand_locks(cfile);
1647
1648         cinode->can_cache_brlcks = false;
1649         up_write(&cinode->lock_sem);
1650         return rc;
1651 }
1652
1653 static void
1654 cifs_read_flock(struct file_lock *flock, __u32 *type, int *lock, int *unlock,
1655                 bool *wait_flag, struct TCP_Server_Info *server)
1656 {
1657         if (flock->fl_flags & FL_POSIX)
1658                 cifs_dbg(FYI, "Posix\n");
1659         if (flock->fl_flags & FL_FLOCK)
1660                 cifs_dbg(FYI, "Flock\n");
1661         if (flock->fl_flags & FL_SLEEP) {
1662                 cifs_dbg(FYI, "Blocking lock\n");
1663                 *wait_flag = true;
1664         }
1665         if (flock->fl_flags & FL_ACCESS)
1666                 cifs_dbg(FYI, "Process suspended by mandatory locking - not implemented yet\n");
1667         if (flock->fl_flags & FL_LEASE)
1668                 cifs_dbg(FYI, "Lease on file - not implemented yet\n");
1669         if (flock->fl_flags &
1670             (~(FL_POSIX | FL_FLOCK | FL_SLEEP |
1671                FL_ACCESS | FL_LEASE | FL_CLOSE | FL_OFDLCK)))
1672                 cifs_dbg(FYI, "Unknown lock flags 0x%x\n", flock->fl_flags);
1673
1674         *type = server->vals->large_lock_type;
1675         if (flock->fl_type == F_WRLCK) {
1676                 cifs_dbg(FYI, "F_WRLCK\n");
1677                 *type |= server->vals->exclusive_lock_type;
1678                 *lock = 1;
1679         } else if (flock->fl_type == F_UNLCK) {
1680                 cifs_dbg(FYI, "F_UNLCK\n");
1681                 *type |= server->vals->unlock_lock_type;
1682                 *unlock = 1;
1683                 /* Check if unlock includes more than one lock range */
1684         } else if (flock->fl_type == F_RDLCK) {
1685                 cifs_dbg(FYI, "F_RDLCK\n");
1686                 *type |= server->vals->shared_lock_type;
1687                 *lock = 1;
1688         } else if (flock->fl_type == F_EXLCK) {
1689                 cifs_dbg(FYI, "F_EXLCK\n");
1690                 *type |= server->vals->exclusive_lock_type;
1691                 *lock = 1;
1692         } else if (flock->fl_type == F_SHLCK) {
1693                 cifs_dbg(FYI, "F_SHLCK\n");
1694                 *type |= server->vals->shared_lock_type;
1695                 *lock = 1;
1696         } else
1697                 cifs_dbg(FYI, "Unknown type of lock\n");
1698 }
1699
1700 static int
1701 cifs_getlk(struct file *file, struct file_lock *flock, __u32 type,
1702            bool wait_flag, bool posix_lck, unsigned int xid)
1703 {
1704         int rc = 0;
1705         __u64 length = cifs_flock_len(flock);
1706         struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
1707         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1708         struct TCP_Server_Info *server = tcon->ses->server;
1709 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
1710         __u16 netfid = cfile->fid.netfid;
1711
1712         if (posix_lck) {
1713                 int posix_lock_type;
1714
1715                 rc = cifs_posix_lock_test(file, flock);
1716                 if (!rc)
1717                         return rc;
1718
1719                 if (type & server->vals->shared_lock_type)
1720                         posix_lock_type = CIFS_RDLCK;
1721                 else
1722                         posix_lock_type = CIFS_WRLCK;
1723                 rc = CIFSSMBPosixLock(xid, tcon, netfid,
1724                                       hash_lockowner(flock->fl_owner),
1725                                       flock->fl_start, length, flock,
1726                                       posix_lock_type, wait_flag);
1727                 return rc;
1728         }
1729 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
1730
1731         rc = cifs_lock_test(cfile, flock->fl_start, length, type, flock);
1732         if (!rc)
1733                 return rc;
1734
1735         /* BB we could chain these into one lock request BB */
1736         rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length, type,
1737                                     1, 0, false);
1738         if (rc == 0) {
1739                 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1740                                             type, 0, 1, false);
1741                 flock->fl_type = F_UNLCK;
1742                 if (rc != 0)
1743                         cifs_dbg(VFS, "Error unlocking previously locked range %d during test of lock\n",
1744                                  rc);
1745                 return 0;
1746         }
1747
1748         if (type & server->vals->shared_lock_type) {
1749                 flock->fl_type = F_WRLCK;
1750                 return 0;
1751         }
1752
1753         type &= ~server->vals->exclusive_lock_type;
1754
1755         rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1756                                     type | server->vals->shared_lock_type,
1757                                     1, 0, false);
1758         if (rc == 0) {
1759                 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1760                         type | server->vals->shared_lock_type, 0, 1, false);
1761                 flock->fl_type = F_RDLCK;
1762                 if (rc != 0)
1763                         cifs_dbg(VFS, "Error unlocking previously locked range %d during test of lock\n",
1764                                  rc);
1765         } else
1766                 flock->fl_type = F_WRLCK;
1767
1768         return 0;
1769 }
1770
1771 void
1772 cifs_move_llist(struct list_head *source, struct list_head *dest)
1773 {
1774         struct list_head *li, *tmp;
1775         list_for_each_safe(li, tmp, source)
1776                 list_move(li, dest);
1777 }
1778
1779 void
1780 cifs_free_llist(struct list_head *llist)
1781 {
1782         struct cifsLockInfo *li, *tmp;
1783         list_for_each_entry_safe(li, tmp, llist, llist) {
1784                 cifs_del_lock_waiters(li);
1785                 list_del(&li->llist);
1786                 kfree(li);
1787         }
1788 }
1789
1790 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
1791 int
1792 cifs_unlock_range(struct cifsFileInfo *cfile, struct file_lock *flock,
1793                   unsigned int xid)
1794 {
1795         int rc = 0, stored_rc;
1796         static const int types[] = {
1797                 LOCKING_ANDX_LARGE_FILES,
1798                 LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES
1799         };
1800         unsigned int i;
1801         unsigned int max_num, num, max_buf;
1802         LOCKING_ANDX_RANGE *buf, *cur;
1803         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1804         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1805         struct cifsLockInfo *li, *tmp;
1806         __u64 length = cifs_flock_len(flock);
1807         struct list_head tmp_llist;
1808
1809         INIT_LIST_HEAD(&tmp_llist);
1810
1811         /*
1812          * Accessing maxBuf is racy with cifs_reconnect - need to store value
1813          * and check it before using.
1814          */
1815         max_buf = tcon->ses->server->maxBuf;
1816         if (max_buf < (sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE)))
1817                 return -EINVAL;
1818
1819         BUILD_BUG_ON(sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE) >
1820                      PAGE_SIZE);
1821         max_buf = min_t(unsigned int, max_buf - sizeof(struct smb_hdr),
1822                         PAGE_SIZE);
1823         max_num = (max_buf - sizeof(struct smb_hdr)) /
1824                                                 sizeof(LOCKING_ANDX_RANGE);
1825         buf = kcalloc(max_num, sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
1826         if (!buf)
1827                 return -ENOMEM;
1828
1829         cifs_down_write(&cinode->lock_sem);
1830         for (i = 0; i < 2; i++) {
1831                 cur = buf;
1832                 num = 0;
1833                 list_for_each_entry_safe(li, tmp, &cfile->llist->locks, llist) {
1834                         if (flock->fl_start > li->offset ||
1835                             (flock->fl_start + length) <
1836                             (li->offset + li->length))
1837                                 continue;
1838                         if (current->tgid != li->pid)
1839                                 continue;
1840                         if (types[i] != li->type)
1841                                 continue;
1842                         if (cinode->can_cache_brlcks) {
1843                                 /*
1844                                  * We can cache brlock requests - simply remove
1845                                  * a lock from the file's list.
1846                                  */
1847                                 list_del(&li->llist);
1848                                 cifs_del_lock_waiters(li);
1849                                 kfree(li);
1850                                 continue;
1851                         }
1852                         cur->Pid = cpu_to_le16(li->pid);
1853                         cur->LengthLow = cpu_to_le32((u32)li->length);
1854                         cur->LengthHigh = cpu_to_le32((u32)(li->length>>32));
1855                         cur->OffsetLow = cpu_to_le32((u32)li->offset);
1856                         cur->OffsetHigh = cpu_to_le32((u32)(li->offset>>32));
1857                         /*
1858                          * We need to save a lock here to let us add it again to
1859                          * the file's list if the unlock range request fails on
1860                          * the server.
1861                          */
1862                         list_move(&li->llist, &tmp_llist);
1863                         if (++num == max_num) {
1864                                 stored_rc = cifs_lockv(xid, tcon,
1865                                                        cfile->fid.netfid,
1866                                                        li->type, num, 0, buf);
1867                                 if (stored_rc) {
1868                                         /*
1869                                          * We failed on the unlock range
1870                                          * request - add all locks from the tmp
1871                                          * list to the head of the file's list.
1872                                          */
1873                                         cifs_move_llist(&tmp_llist,
1874                                                         &cfile->llist->locks);
1875                                         rc = stored_rc;
1876                                 } else
1877                                         /*
1878                                          * The unlock range request succeed -
1879                                          * free the tmp list.
1880                                          */
1881                                         cifs_free_llist(&tmp_llist);
1882                                 cur = buf;
1883                                 num = 0;
1884                         } else
1885                                 cur++;
1886                 }
1887                 if (num) {
1888                         stored_rc = cifs_lockv(xid, tcon, cfile->fid.netfid,
1889                                                types[i], num, 0, buf);
1890                         if (stored_rc) {
1891                                 cifs_move_llist(&tmp_llist,
1892                                                 &cfile->llist->locks);
1893                                 rc = stored_rc;
1894                         } else
1895                                 cifs_free_llist(&tmp_llist);
1896                 }
1897         }
1898
1899         up_write(&cinode->lock_sem);
1900         kfree(buf);
1901         return rc;
1902 }
1903 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
1904
1905 static int
1906 cifs_setlk(struct file *file, struct file_lock *flock, __u32 type,
1907            bool wait_flag, bool posix_lck, int lock, int unlock,
1908            unsigned int xid)
1909 {
1910         int rc = 0;
1911         __u64 length = cifs_flock_len(flock);
1912         struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
1913         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1914         struct TCP_Server_Info *server = tcon->ses->server;
1915         struct inode *inode = d_inode(cfile->dentry);
1916
1917 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
1918         if (posix_lck) {
1919                 int posix_lock_type;
1920
1921                 rc = cifs_posix_lock_set(file, flock);
1922                 if (rc <= FILE_LOCK_DEFERRED)
1923                         return rc;
1924
1925                 if (type & server->vals->shared_lock_type)
1926                         posix_lock_type = CIFS_RDLCK;
1927                 else
1928                         posix_lock_type = CIFS_WRLCK;
1929
1930                 if (unlock == 1)
1931                         posix_lock_type = CIFS_UNLCK;
1932
1933                 rc = CIFSSMBPosixLock(xid, tcon, cfile->fid.netfid,
1934                                       hash_lockowner(flock->fl_owner),
1935                                       flock->fl_start, length,
1936                                       NULL, posix_lock_type, wait_flag);
1937                 goto out;
1938         }
1939 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
1940         if (lock) {
1941                 struct cifsLockInfo *lock;
1942
1943                 lock = cifs_lock_init(flock->fl_start, length, type,
1944                                       flock->fl_flags);
1945                 if (!lock)
1946                         return -ENOMEM;
1947
1948                 rc = cifs_lock_add_if(cfile, lock, wait_flag);
1949                 if (rc < 0) {
1950                         kfree(lock);
1951                         return rc;
1952                 }
1953                 if (!rc)
1954                         goto out;
1955
1956                 /*
1957                  * Windows 7 server can delay breaking lease from read to None
1958                  * if we set a byte-range lock on a file - break it explicitly
1959                  * before sending the lock to the server to be sure the next
1960                  * read won't conflict with non-overlapted locks due to
1961                  * pagereading.
1962                  */
1963                 if (!CIFS_CACHE_WRITE(CIFS_I(inode)) &&
1964                                         CIFS_CACHE_READ(CIFS_I(inode))) {
1965                         cifs_zap_mapping(inode);
1966                         cifs_dbg(FYI, "Set no oplock for inode=%p due to mand locks\n",
1967                                  inode);
1968                         CIFS_I(inode)->oplock = 0;
1969                 }
1970
1971                 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1972                                             type, 1, 0, wait_flag);
1973                 if (rc) {
1974                         kfree(lock);
1975                         return rc;
1976                 }
1977
1978                 cifs_lock_add(cfile, lock);
1979         } else if (unlock)
1980                 rc = server->ops->mand_unlock_range(cfile, flock, xid);
1981
1982 out:
1983         if ((flock->fl_flags & FL_POSIX) || (flock->fl_flags & FL_FLOCK)) {
1984                 /*
1985                  * If this is a request to remove all locks because we
1986                  * are closing the file, it doesn't matter if the
1987                  * unlocking failed as both cifs.ko and the SMB server
1988                  * remove the lock on file close
1989                  */
1990                 if (rc) {
1991                         cifs_dbg(VFS, "%s failed rc=%d\n", __func__, rc);
1992                         if (!(flock->fl_flags & FL_CLOSE))
1993                                 return rc;
1994                 }
1995                 rc = locks_lock_file_wait(file, flock);
1996         }
1997         return rc;
1998 }
1999
2000 int cifs_flock(struct file *file, int cmd, struct file_lock *fl)
2001 {
2002         int rc, xid;
2003         int lock = 0, unlock = 0;
2004         bool wait_flag = false;
2005         bool posix_lck = false;
2006         struct cifs_sb_info *cifs_sb;
2007         struct cifs_tcon *tcon;
2008         struct cifsFileInfo *cfile;
2009         __u32 type;
2010
2011         xid = get_xid();
2012
2013         if (!(fl->fl_flags & FL_FLOCK)) {
2014                 rc = -ENOLCK;
2015                 free_xid(xid);
2016                 return rc;
2017         }
2018
2019         cfile = (struct cifsFileInfo *)file->private_data;
2020         tcon = tlink_tcon(cfile->tlink);
2021
2022         cifs_read_flock(fl, &type, &lock, &unlock, &wait_flag,
2023                         tcon->ses->server);
2024         cifs_sb = CIFS_FILE_SB(file);
2025
2026         if (cap_unix(tcon->ses) &&
2027             (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
2028             ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
2029                 posix_lck = true;
2030
2031         if (!lock && !unlock) {
2032                 /*
2033                  * if no lock or unlock then nothing to do since we do not
2034                  * know what it is
2035                  */
2036                 rc = -EOPNOTSUPP;
2037                 free_xid(xid);
2038                 return rc;
2039         }
2040
2041         rc = cifs_setlk(file, fl, type, wait_flag, posix_lck, lock, unlock,
2042                         xid);
2043         free_xid(xid);
2044         return rc;
2045
2046
2047 }
2048
2049 int cifs_lock(struct file *file, int cmd, struct file_lock *flock)
2050 {
2051         int rc, xid;
2052         int lock = 0, unlock = 0;
2053         bool wait_flag = false;
2054         bool posix_lck = false;
2055         struct cifs_sb_info *cifs_sb;
2056         struct cifs_tcon *tcon;
2057         struct cifsFileInfo *cfile;
2058         __u32 type;
2059
2060         rc = -EACCES;
2061         xid = get_xid();
2062
2063         cifs_dbg(FYI, "%s: %pD2 cmd=0x%x type=0x%x flags=0x%x r=%lld:%lld\n", __func__, file, cmd,
2064                  flock->fl_flags, flock->fl_type, (long long)flock->fl_start,
2065                  (long long)flock->fl_end);
2066
2067         cfile = (struct cifsFileInfo *)file->private_data;
2068         tcon = tlink_tcon(cfile->tlink);
2069
2070         cifs_read_flock(flock, &type, &lock, &unlock, &wait_flag,
2071                         tcon->ses->server);
2072         cifs_sb = CIFS_FILE_SB(file);
2073         set_bit(CIFS_INO_CLOSE_ON_LOCK, &CIFS_I(d_inode(cfile->dentry))->flags);
2074
2075         if (cap_unix(tcon->ses) &&
2076             (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
2077             ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
2078                 posix_lck = true;
2079         /*
2080          * BB add code here to normalize offset and length to account for
2081          * negative length which we can not accept over the wire.
2082          */
2083         if (IS_GETLK(cmd)) {
2084                 rc = cifs_getlk(file, flock, type, wait_flag, posix_lck, xid);
2085                 free_xid(xid);
2086                 return rc;
2087         }
2088
2089         if (!lock && !unlock) {
2090                 /*
2091                  * if no lock or unlock then nothing to do since we do not
2092                  * know what it is
2093                  */
2094                 free_xid(xid);
2095                 return -EOPNOTSUPP;
2096         }
2097
2098         rc = cifs_setlk(file, flock, type, wait_flag, posix_lck, lock, unlock,
2099                         xid);
2100         free_xid(xid);
2101         return rc;
2102 }
2103
2104 /*
2105  * update the file size (if needed) after a write. Should be called with
2106  * the inode->i_lock held
2107  */
2108 void
2109 cifs_update_eof(struct cifsInodeInfo *cifsi, loff_t offset,
2110                       unsigned int bytes_written)
2111 {
2112         loff_t end_of_write = offset + bytes_written;
2113
2114         if (end_of_write > cifsi->server_eof)
2115                 cifsi->server_eof = end_of_write;
2116 }
2117
2118 static ssize_t
2119 cifs_write(struct cifsFileInfo *open_file, __u32 pid, const char *write_data,
2120            size_t write_size, loff_t *offset)
2121 {
2122         int rc = 0;
2123         unsigned int bytes_written = 0;
2124         unsigned int total_written;
2125         struct cifs_tcon *tcon;
2126         struct TCP_Server_Info *server;
2127         unsigned int xid;
2128         struct dentry *dentry = open_file->dentry;
2129         struct cifsInodeInfo *cifsi = CIFS_I(d_inode(dentry));
2130         struct cifs_io_parms io_parms = {0};
2131
2132         cifs_dbg(FYI, "write %zd bytes to offset %lld of %pd\n",
2133                  write_size, *offset, dentry);
2134
2135         tcon = tlink_tcon(open_file->tlink);
2136         server = tcon->ses->server;
2137
2138         if (!server->ops->sync_write)
2139                 return -ENOSYS;
2140
2141         xid = get_xid();
2142
2143         for (total_written = 0; write_size > total_written;
2144              total_written += bytes_written) {
2145                 rc = -EAGAIN;
2146                 while (rc == -EAGAIN) {
2147                         struct kvec iov[2];
2148                         unsigned int len;
2149
2150                         if (open_file->invalidHandle) {
2151                                 /* we could deadlock if we called
2152                                    filemap_fdatawait from here so tell
2153                                    reopen_file not to flush data to
2154                                    server now */
2155                                 rc = cifs_reopen_file(open_file, false);
2156                                 if (rc != 0)
2157                                         break;
2158                         }
2159
2160                         len = min(server->ops->wp_retry_size(d_inode(dentry)),
2161                                   (unsigned int)write_size - total_written);
2162                         /* iov[0] is reserved for smb header */
2163                         iov[1].iov_base = (char *)write_data + total_written;
2164                         iov[1].iov_len = len;
2165                         io_parms.pid = pid;
2166                         io_parms.tcon = tcon;
2167                         io_parms.offset = *offset;
2168                         io_parms.length = len;
2169                         rc = server->ops->sync_write(xid, &open_file->fid,
2170                                         &io_parms, &bytes_written, iov, 1);
2171                 }
2172                 if (rc || (bytes_written == 0)) {
2173                         if (total_written)
2174                                 break;
2175                         else {
2176                                 free_xid(xid);
2177                                 return rc;
2178                         }
2179                 } else {
2180                         spin_lock(&d_inode(dentry)->i_lock);
2181                         cifs_update_eof(cifsi, *offset, bytes_written);
2182                         spin_unlock(&d_inode(dentry)->i_lock);
2183                         *offset += bytes_written;
2184                 }
2185         }
2186
2187         cifs_stats_bytes_written(tcon, total_written);
2188
2189         if (total_written > 0) {
2190                 spin_lock(&d_inode(dentry)->i_lock);
2191                 if (*offset > d_inode(dentry)->i_size) {
2192                         i_size_write(d_inode(dentry), *offset);
2193                         d_inode(dentry)->i_blocks = (512 - 1 + *offset) >> 9;
2194                 }
2195                 spin_unlock(&d_inode(dentry)->i_lock);
2196         }
2197         mark_inode_dirty_sync(d_inode(dentry));
2198         free_xid(xid);
2199         return total_written;
2200 }
2201
2202 struct cifsFileInfo *find_readable_file(struct cifsInodeInfo *cifs_inode,
2203                                         bool fsuid_only)
2204 {
2205         struct cifsFileInfo *open_file = NULL;
2206         struct cifs_sb_info *cifs_sb = CIFS_SB(cifs_inode->netfs.inode.i_sb);
2207
2208         /* only filter by fsuid on multiuser mounts */
2209         if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
2210                 fsuid_only = false;
2211
2212         spin_lock(&cifs_inode->open_file_lock);
2213         /* we could simply get the first_list_entry since write-only entries
2214            are always at the end of the list but since the first entry might
2215            have a close pending, we go through the whole list */
2216         list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
2217                 if (fsuid_only && !uid_eq(open_file->uid, current_fsuid()))
2218                         continue;
2219                 if (OPEN_FMODE(open_file->f_flags) & FMODE_READ) {
2220                         if ((!open_file->invalidHandle)) {
2221                                 /* found a good file */
2222                                 /* lock it so it will not be closed on us */
2223                                 cifsFileInfo_get(open_file);
2224                                 spin_unlock(&cifs_inode->open_file_lock);
2225                                 return open_file;
2226                         } /* else might as well continue, and look for
2227                              another, or simply have the caller reopen it
2228                              again rather than trying to fix this handle */
2229                 } else /* write only file */
2230                         break; /* write only files are last so must be done */
2231         }
2232         spin_unlock(&cifs_inode->open_file_lock);
2233         return NULL;
2234 }
2235
2236 /* Return -EBADF if no handle is found and general rc otherwise */
2237 int
2238 cifs_get_writable_file(struct cifsInodeInfo *cifs_inode, int flags,
2239                        struct cifsFileInfo **ret_file)
2240 {
2241         struct cifsFileInfo *open_file, *inv_file = NULL;
2242         struct cifs_sb_info *cifs_sb;
2243         bool any_available = false;
2244         int rc = -EBADF;
2245         unsigned int refind = 0;
2246         bool fsuid_only = flags & FIND_WR_FSUID_ONLY;
2247         bool with_delete = flags & FIND_WR_WITH_DELETE;
2248         *ret_file = NULL;
2249
2250         /*
2251          * Having a null inode here (because mapping->host was set to zero by
2252          * the VFS or MM) should not happen but we had reports of on oops (due
2253          * to it being zero) during stress testcases so we need to check for it
2254          */
2255
2256         if (cifs_inode == NULL) {
2257                 cifs_dbg(VFS, "Null inode passed to cifs_writeable_file\n");
2258                 dump_stack();
2259                 return rc;
2260         }
2261
2262         cifs_sb = CIFS_SB(cifs_inode->netfs.inode.i_sb);
2263
2264         /* only filter by fsuid on multiuser mounts */
2265         if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
2266                 fsuid_only = false;
2267
2268         spin_lock(&cifs_inode->open_file_lock);
2269 refind_writable:
2270         if (refind > MAX_REOPEN_ATT) {
2271                 spin_unlock(&cifs_inode->open_file_lock);
2272                 return rc;
2273         }
2274         list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
2275                 if (!any_available && open_file->pid != current->tgid)
2276                         continue;
2277                 if (fsuid_only && !uid_eq(open_file->uid, current_fsuid()))
2278                         continue;
2279                 if (with_delete && !(open_file->fid.access & DELETE))
2280                         continue;
2281                 if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) {
2282                         if (!open_file->invalidHandle) {
2283                                 /* found a good writable file */
2284                                 cifsFileInfo_get(open_file);
2285                                 spin_unlock(&cifs_inode->open_file_lock);
2286                                 *ret_file = open_file;
2287                                 return 0;
2288                         } else {
2289                                 if (!inv_file)
2290                                         inv_file = open_file;
2291                         }
2292                 }
2293         }
2294         /* couldn't find useable FH with same pid, try any available */
2295         if (!any_available) {
2296                 any_available = true;
2297                 goto refind_writable;
2298         }
2299
2300         if (inv_file) {
2301                 any_available = false;
2302                 cifsFileInfo_get(inv_file);
2303         }
2304
2305         spin_unlock(&cifs_inode->open_file_lock);
2306
2307         if (inv_file) {
2308                 rc = cifs_reopen_file(inv_file, false);
2309                 if (!rc) {
2310                         *ret_file = inv_file;
2311                         return 0;
2312                 }
2313
2314                 spin_lock(&cifs_inode->open_file_lock);
2315                 list_move_tail(&inv_file->flist, &cifs_inode->openFileList);
2316                 spin_unlock(&cifs_inode->open_file_lock);
2317                 cifsFileInfo_put(inv_file);
2318                 ++refind;
2319                 inv_file = NULL;
2320                 spin_lock(&cifs_inode->open_file_lock);
2321                 goto refind_writable;
2322         }
2323
2324         return rc;
2325 }
2326
2327 struct cifsFileInfo *
2328 find_writable_file(struct cifsInodeInfo *cifs_inode, int flags)
2329 {
2330         struct cifsFileInfo *cfile;
2331         int rc;
2332
2333         rc = cifs_get_writable_file(cifs_inode, flags, &cfile);
2334         if (rc)
2335                 cifs_dbg(FYI, "Couldn't find writable handle rc=%d\n", rc);
2336
2337         return cfile;
2338 }
2339
2340 int
2341 cifs_get_writable_path(struct cifs_tcon *tcon, const char *name,
2342                        int flags,
2343                        struct cifsFileInfo **ret_file)
2344 {
2345         struct cifsFileInfo *cfile;
2346         void *page = alloc_dentry_path();
2347
2348         *ret_file = NULL;
2349
2350         spin_lock(&tcon->open_file_lock);
2351         list_for_each_entry(cfile, &tcon->openFileList, tlist) {
2352                 struct cifsInodeInfo *cinode;
2353                 const char *full_path = build_path_from_dentry(cfile->dentry, page);
2354                 if (IS_ERR(full_path)) {
2355                         spin_unlock(&tcon->open_file_lock);
2356                         free_dentry_path(page);
2357                         return PTR_ERR(full_path);
2358                 }
2359                 if (strcmp(full_path, name))
2360                         continue;
2361
2362                 cinode = CIFS_I(d_inode(cfile->dentry));
2363                 spin_unlock(&tcon->open_file_lock);
2364                 free_dentry_path(page);
2365                 return cifs_get_writable_file(cinode, flags, ret_file);
2366         }
2367
2368         spin_unlock(&tcon->open_file_lock);
2369         free_dentry_path(page);
2370         return -ENOENT;
2371 }
2372
2373 int
2374 cifs_get_readable_path(struct cifs_tcon *tcon, const char *name,
2375                        struct cifsFileInfo **ret_file)
2376 {
2377         struct cifsFileInfo *cfile;
2378         void *page = alloc_dentry_path();
2379
2380         *ret_file = NULL;
2381
2382         spin_lock(&tcon->open_file_lock);
2383         list_for_each_entry(cfile, &tcon->openFileList, tlist) {
2384                 struct cifsInodeInfo *cinode;
2385                 const char *full_path = build_path_from_dentry(cfile->dentry, page);
2386                 if (IS_ERR(full_path)) {
2387                         spin_unlock(&tcon->open_file_lock);
2388                         free_dentry_path(page);
2389                         return PTR_ERR(full_path);
2390                 }
2391                 if (strcmp(full_path, name))
2392                         continue;
2393
2394                 cinode = CIFS_I(d_inode(cfile->dentry));
2395                 spin_unlock(&tcon->open_file_lock);
2396                 free_dentry_path(page);
2397                 *ret_file = find_readable_file(cinode, 0);
2398                 return *ret_file ? 0 : -ENOENT;
2399         }
2400
2401         spin_unlock(&tcon->open_file_lock);
2402         free_dentry_path(page);
2403         return -ENOENT;
2404 }
2405
2406 void
2407 cifs_writedata_release(struct kref *refcount)
2408 {
2409         struct cifs_writedata *wdata = container_of(refcount,
2410                                         struct cifs_writedata, refcount);
2411 #ifdef CONFIG_CIFS_SMB_DIRECT
2412         if (wdata->mr) {
2413                 smbd_deregister_mr(wdata->mr);
2414                 wdata->mr = NULL;
2415         }
2416 #endif
2417
2418         if (wdata->cfile)
2419                 cifsFileInfo_put(wdata->cfile);
2420
2421         kfree(wdata);
2422 }
2423
2424 /*
2425  * Write failed with a retryable error. Resend the write request. It's also
2426  * possible that the page was redirtied so re-clean the page.
2427  */
2428 static void
2429 cifs_writev_requeue(struct cifs_writedata *wdata)
2430 {
2431         int rc = 0;
2432         struct inode *inode = d_inode(wdata->cfile->dentry);
2433         struct TCP_Server_Info *server;
2434         unsigned int rest_len = wdata->bytes;
2435         loff_t fpos = wdata->offset;
2436
2437         server = tlink_tcon(wdata->cfile->tlink)->ses->server;
2438         do {
2439                 struct cifs_writedata *wdata2;
2440                 unsigned int wsize, cur_len;
2441
2442                 wsize = server->ops->wp_retry_size(inode);
2443                 if (wsize < rest_len) {
2444                         if (wsize < PAGE_SIZE) {
2445                                 rc = -EOPNOTSUPP;
2446                                 break;
2447                         }
2448                         cur_len = min(round_down(wsize, PAGE_SIZE), rest_len);
2449                 } else {
2450                         cur_len = rest_len;
2451                 }
2452
2453                 wdata2 = cifs_writedata_alloc(cifs_writev_complete);
2454                 if (!wdata2) {
2455                         rc = -ENOMEM;
2456                         break;
2457                 }
2458
2459                 wdata2->sync_mode = wdata->sync_mode;
2460                 wdata2->offset  = fpos;
2461                 wdata2->bytes   = cur_len;
2462                 wdata2->iter    = wdata->iter;
2463
2464                 iov_iter_advance(&wdata2->iter, fpos - wdata->offset);
2465                 iov_iter_truncate(&wdata2->iter, wdata2->bytes);
2466
2467                 if (iov_iter_is_xarray(&wdata2->iter))
2468                         /* Check for pages having been redirtied and clean
2469                          * them.  We can do this by walking the xarray.  If
2470                          * it's not an xarray, then it's a DIO and we shouldn't
2471                          * be mucking around with the page bits.
2472                          */
2473                         cifs_undirty_folios(inode, fpos, cur_len);
2474
2475                 rc = cifs_get_writable_file(CIFS_I(inode), FIND_WR_ANY,
2476                                             &wdata2->cfile);
2477                 if (!wdata2->cfile) {
2478                         cifs_dbg(VFS, "No writable handle to retry writepages rc=%d\n",
2479                                  rc);
2480                         if (!is_retryable_error(rc))
2481                                 rc = -EBADF;
2482                 } else {
2483                         wdata2->pid = wdata2->cfile->pid;
2484                         rc = server->ops->async_writev(wdata2,
2485                                                        cifs_writedata_release);
2486                 }
2487
2488                 kref_put(&wdata2->refcount, cifs_writedata_release);
2489                 if (rc) {
2490                         if (is_retryable_error(rc))
2491                                 continue;
2492                         fpos += cur_len;
2493                         rest_len -= cur_len;
2494                         break;
2495                 }
2496
2497                 fpos += cur_len;
2498                 rest_len -= cur_len;
2499         } while (rest_len > 0);
2500
2501         /* Clean up remaining pages from the original wdata */
2502         if (iov_iter_is_xarray(&wdata->iter))
2503                 cifs_pages_write_failed(inode, fpos, rest_len);
2504
2505         if (rc != 0 && !is_retryable_error(rc))
2506                 mapping_set_error(inode->i_mapping, rc);
2507         kref_put(&wdata->refcount, cifs_writedata_release);
2508 }
2509
2510 void
2511 cifs_writev_complete(struct work_struct *work)
2512 {
2513         struct cifs_writedata *wdata = container_of(work,
2514                                                 struct cifs_writedata, work);
2515         struct inode *inode = d_inode(wdata->cfile->dentry);
2516
2517         if (wdata->result == 0) {
2518                 spin_lock(&inode->i_lock);
2519                 cifs_update_eof(CIFS_I(inode), wdata->offset, wdata->bytes);
2520                 spin_unlock(&inode->i_lock);
2521                 cifs_stats_bytes_written(tlink_tcon(wdata->cfile->tlink),
2522                                          wdata->bytes);
2523         } else if (wdata->sync_mode == WB_SYNC_ALL && wdata->result == -EAGAIN)
2524                 return cifs_writev_requeue(wdata);
2525
2526         if (wdata->result == -EAGAIN)
2527                 cifs_pages_write_redirty(inode, wdata->offset, wdata->bytes);
2528         else if (wdata->result < 0)
2529                 cifs_pages_write_failed(inode, wdata->offset, wdata->bytes);
2530         else
2531                 cifs_pages_written_back(inode, wdata->offset, wdata->bytes);
2532
2533         if (wdata->result != -EAGAIN)
2534                 mapping_set_error(inode->i_mapping, wdata->result);
2535         kref_put(&wdata->refcount, cifs_writedata_release);
2536 }
2537
2538 struct cifs_writedata *cifs_writedata_alloc(work_func_t complete)
2539 {
2540         struct cifs_writedata *wdata;
2541
2542         wdata = kzalloc(sizeof(*wdata), GFP_NOFS);
2543         if (wdata != NULL) {
2544                 kref_init(&wdata->refcount);
2545                 INIT_LIST_HEAD(&wdata->list);
2546                 init_completion(&wdata->done);
2547                 INIT_WORK(&wdata->work, complete);
2548         }
2549         return wdata;
2550 }
2551
2552 static int cifs_partialpagewrite(struct page *page, unsigned from, unsigned to)
2553 {
2554         struct address_space *mapping = page->mapping;
2555         loff_t offset = (loff_t)page->index << PAGE_SHIFT;
2556         char *write_data;
2557         int rc = -EFAULT;
2558         int bytes_written = 0;
2559         struct inode *inode;
2560         struct cifsFileInfo *open_file;
2561
2562         if (!mapping || !mapping->host)
2563                 return -EFAULT;
2564
2565         inode = page->mapping->host;
2566
2567         offset += (loff_t)from;
2568         write_data = kmap(page);
2569         write_data += from;
2570
2571         if ((to > PAGE_SIZE) || (from > to)) {
2572                 kunmap(page);
2573                 return -EIO;
2574         }
2575
2576         /* racing with truncate? */
2577         if (offset > mapping->host->i_size) {
2578                 kunmap(page);
2579                 return 0; /* don't care */
2580         }
2581
2582         /* check to make sure that we are not extending the file */
2583         if (mapping->host->i_size - offset < (loff_t)to)
2584                 to = (unsigned)(mapping->host->i_size - offset);
2585
2586         rc = cifs_get_writable_file(CIFS_I(mapping->host), FIND_WR_ANY,
2587                                     &open_file);
2588         if (!rc) {
2589                 bytes_written = cifs_write(open_file, open_file->pid,
2590                                            write_data, to - from, &offset);
2591                 cifsFileInfo_put(open_file);
2592                 /* Does mm or vfs already set times? */
2593                 inode->i_atime = inode->i_mtime = current_time(inode);
2594                 if ((bytes_written > 0) && (offset))
2595                         rc = 0;
2596                 else if (bytes_written < 0)
2597                         rc = bytes_written;
2598                 else
2599                         rc = -EFAULT;
2600         } else {
2601                 cifs_dbg(FYI, "No writable handle for write page rc=%d\n", rc);
2602                 if (!is_retryable_error(rc))
2603                         rc = -EIO;
2604         }
2605
2606         kunmap(page);
2607         return rc;
2608 }
2609
2610 /*
2611  * Extend the region to be written back to include subsequent contiguously
2612  * dirty pages if possible, but don't sleep while doing so.
2613  */
2614 static void cifs_extend_writeback(struct address_space *mapping,
2615                                   long *_count,
2616                                   loff_t start,
2617                                   int max_pages,
2618                                   size_t max_len,
2619                                   unsigned int *_len)
2620 {
2621         struct folio_batch batch;
2622         struct folio *folio;
2623         unsigned int psize, nr_pages;
2624         size_t len = *_len;
2625         pgoff_t index = (start + len) / PAGE_SIZE;
2626         bool stop = true;
2627         unsigned int i;
2628         XA_STATE(xas, &mapping->i_pages, index);
2629
2630         folio_batch_init(&batch);
2631
2632         do {
2633                 /* Firstly, we gather up a batch of contiguous dirty pages
2634                  * under the RCU read lock - but we can't clear the dirty flags
2635                  * there if any of those pages are mapped.
2636                  */
2637                 rcu_read_lock();
2638
2639                 xas_for_each(&xas, folio, ULONG_MAX) {
2640                         stop = true;
2641                         if (xas_retry(&xas, folio))
2642                                 continue;
2643                         if (xa_is_value(folio))
2644                                 break;
2645                         if (folio_index(folio) != index)
2646                                 break;
2647                         if (!folio_try_get_rcu(folio)) {
2648                                 xas_reset(&xas);
2649                                 continue;
2650                         }
2651                         nr_pages = folio_nr_pages(folio);
2652                         if (nr_pages > max_pages)
2653                                 break;
2654
2655                         /* Has the page moved or been split? */
2656                         if (unlikely(folio != xas_reload(&xas))) {
2657                                 folio_put(folio);
2658                                 break;
2659                         }
2660
2661                         if (!folio_trylock(folio)) {
2662                                 folio_put(folio);
2663                                 break;
2664                         }
2665                         if (!folio_test_dirty(folio) || folio_test_writeback(folio)) {
2666                                 folio_unlock(folio);
2667                                 folio_put(folio);
2668                                 break;
2669                         }
2670
2671                         max_pages -= nr_pages;
2672                         psize = folio_size(folio);
2673                         len += psize;
2674                         stop = false;
2675                         if (max_pages <= 0 || len >= max_len || *_count <= 0)
2676                                 stop = true;
2677
2678                         index += nr_pages;
2679                         if (!folio_batch_add(&batch, folio))
2680                                 break;
2681                         if (stop)
2682                                 break;
2683                 }
2684
2685                 if (!stop)
2686                         xas_pause(&xas);
2687                 rcu_read_unlock();
2688
2689                 /* Now, if we obtained any pages, we can shift them to being
2690                  * writable and mark them for caching.
2691                  */
2692                 if (!folio_batch_count(&batch))
2693                         break;
2694
2695                 for (i = 0; i < folio_batch_count(&batch); i++) {
2696                         folio = batch.folios[i];
2697                         /* The folio should be locked, dirty and not undergoing
2698                          * writeback from the loop above.
2699                          */
2700                         if (!folio_clear_dirty_for_io(folio))
2701                                 WARN_ON(1);
2702                         if (folio_start_writeback(folio))
2703                                 WARN_ON(1);
2704
2705                         *_count -= folio_nr_pages(folio);
2706                         folio_unlock(folio);
2707                 }
2708
2709                 folio_batch_release(&batch);
2710                 cond_resched();
2711         } while (!stop);
2712
2713         *_len = len;
2714 }
2715
2716 /*
2717  * Write back the locked page and any subsequent non-locked dirty pages.
2718  */
2719 static ssize_t cifs_write_back_from_locked_folio(struct address_space *mapping,
2720                                                  struct writeback_control *wbc,
2721                                                  struct folio *folio,
2722                                                  loff_t start, loff_t end)
2723 {
2724         struct inode *inode = mapping->host;
2725         struct TCP_Server_Info *server;
2726         struct cifs_writedata *wdata;
2727         struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
2728         struct cifs_credits credits_on_stack;
2729         struct cifs_credits *credits = &credits_on_stack;
2730         struct cifsFileInfo *cfile = NULL;
2731         unsigned int xid, wsize, len;
2732         loff_t i_size = i_size_read(inode);
2733         size_t max_len;
2734         long count = wbc->nr_to_write;
2735         int rc;
2736
2737         /* The folio should be locked, dirty and not undergoing writeback. */
2738         if (folio_start_writeback(folio))
2739                 WARN_ON(1);
2740
2741         count -= folio_nr_pages(folio);
2742         len = folio_size(folio);
2743
2744         xid = get_xid();
2745         server = cifs_pick_channel(cifs_sb_master_tcon(cifs_sb)->ses);
2746
2747         rc = cifs_get_writable_file(CIFS_I(inode), FIND_WR_ANY, &cfile);
2748         if (rc) {
2749                 cifs_dbg(VFS, "No writable handle in writepages rc=%d\n", rc);
2750                 goto err_xid;
2751         }
2752
2753         rc = server->ops->wait_mtu_credits(server, cifs_sb->ctx->wsize,
2754                                            &wsize, credits);
2755         if (rc != 0)
2756                 goto err_close;
2757
2758         wdata = cifs_writedata_alloc(cifs_writev_complete);
2759         if (!wdata) {
2760                 rc = -ENOMEM;
2761                 goto err_uncredit;
2762         }
2763
2764         wdata->sync_mode = wbc->sync_mode;
2765         wdata->offset = folio_pos(folio);
2766         wdata->pid = cfile->pid;
2767         wdata->credits = credits_on_stack;
2768         wdata->cfile = cfile;
2769         wdata->server = server;
2770         cfile = NULL;
2771
2772         /* Find all consecutive lockable dirty pages, stopping when we find a
2773          * page that is not immediately lockable, is not dirty or is missing,
2774          * or we reach the end of the range.
2775          */
2776         if (start < i_size) {
2777                 /* Trim the write to the EOF; the extra data is ignored.  Also
2778                  * put an upper limit on the size of a single storedata op.
2779                  */
2780                 max_len = wsize;
2781                 max_len = min_t(unsigned long long, max_len, end - start + 1);
2782                 max_len = min_t(unsigned long long, max_len, i_size - start);
2783
2784                 if (len < max_len) {
2785                         int max_pages = INT_MAX;
2786
2787 #ifdef CONFIG_CIFS_SMB_DIRECT
2788                         if (server->smbd_conn)
2789                                 max_pages = server->smbd_conn->max_frmr_depth;
2790 #endif
2791                         max_pages -= folio_nr_pages(folio);
2792
2793                         if (max_pages > 0)
2794                                 cifs_extend_writeback(mapping, &count, start,
2795                                                       max_pages, max_len, &len);
2796                 }
2797                 len = min_t(loff_t, len, max_len);
2798         }
2799
2800         wdata->bytes = len;
2801
2802         /* We now have a contiguous set of dirty pages, each with writeback
2803          * set; the first page is still locked at this point, but all the rest
2804          * have been unlocked.
2805          */
2806         folio_unlock(folio);
2807
2808         if (start < i_size) {
2809                 iov_iter_xarray(&wdata->iter, ITER_SOURCE, &mapping->i_pages,
2810                                 start, len);
2811
2812                 rc = adjust_credits(wdata->server, &wdata->credits, wdata->bytes);
2813                 if (rc)
2814                         goto err_wdata;
2815
2816                 if (wdata->cfile->invalidHandle)
2817                         rc = -EAGAIN;
2818                 else
2819                         rc = wdata->server->ops->async_writev(wdata,
2820                                                               cifs_writedata_release);
2821                 if (rc >= 0) {
2822                         kref_put(&wdata->refcount, cifs_writedata_release);
2823                         goto err_close;
2824                 }
2825         } else {
2826                 /* The dirty region was entirely beyond the EOF. */
2827                 cifs_pages_written_back(inode, start, len);
2828                 rc = 0;
2829         }
2830
2831 err_wdata:
2832         kref_put(&wdata->refcount, cifs_writedata_release);
2833 err_uncredit:
2834         add_credits_and_wake_if(server, credits, 0);
2835 err_close:
2836         if (cfile)
2837                 cifsFileInfo_put(cfile);
2838 err_xid:
2839         free_xid(xid);
2840         if (rc == 0) {
2841                 wbc->nr_to_write = count;
2842         } else if (is_retryable_error(rc)) {
2843                 cifs_pages_write_redirty(inode, start, len);
2844         } else {
2845                 cifs_pages_write_failed(inode, start, len);
2846                 mapping_set_error(mapping, rc);
2847         }
2848         /* Indication to update ctime and mtime as close is deferred */
2849         set_bit(CIFS_INO_MODIFIED_ATTR, &CIFS_I(inode)->flags);
2850         return rc;
2851 }
2852
2853 /*
2854  * write a region of pages back to the server
2855  */
2856 static int cifs_writepages_region(struct address_space *mapping,
2857                                   struct writeback_control *wbc,
2858                                   loff_t start, loff_t end, loff_t *_next)
2859 {
2860         struct folio_batch fbatch;
2861         int skips = 0;
2862
2863         folio_batch_init(&fbatch);
2864         do {
2865                 int nr;
2866                 pgoff_t index = start / PAGE_SIZE;
2867
2868                 nr = filemap_get_folios_tag(mapping, &index, end / PAGE_SIZE,
2869                                             PAGECACHE_TAG_DIRTY, &fbatch);
2870                 if (!nr)
2871                         break;
2872
2873                 for (int i = 0; i < nr; i++) {
2874                         ssize_t ret;
2875                         struct folio *folio = fbatch.folios[i];
2876
2877 redo_folio:
2878                         start = folio_pos(folio); /* May regress with THPs */
2879
2880                         /* At this point we hold neither the i_pages lock nor the
2881                          * page lock: the page may be truncated or invalidated
2882                          * (changing page->mapping to NULL), or even swizzled
2883                          * back from swapper_space to tmpfs file mapping
2884                          */
2885                         if (wbc->sync_mode != WB_SYNC_NONE) {
2886                                 ret = folio_lock_killable(folio);
2887                                 if (ret < 0)
2888                                         goto write_error;
2889                         } else {
2890                                 if (!folio_trylock(folio))
2891                                         goto skip_write;
2892                         }
2893
2894                         if (folio_mapping(folio) != mapping ||
2895                             !folio_test_dirty(folio)) {
2896                                 start += folio_size(folio);
2897                                 folio_unlock(folio);
2898                                 continue;
2899                         }
2900
2901                         if (folio_test_writeback(folio) ||
2902                             folio_test_fscache(folio)) {
2903                                 folio_unlock(folio);
2904                                 if (wbc->sync_mode == WB_SYNC_NONE)
2905                                         goto skip_write;
2906
2907                                 folio_wait_writeback(folio);
2908 #ifdef CONFIG_CIFS_FSCACHE
2909                                 folio_wait_fscache(folio);
2910 #endif
2911                                 goto redo_folio;
2912                         }
2913
2914                         if (!folio_clear_dirty_for_io(folio))
2915                                 /* We hold the page lock - it should've been dirty. */
2916                                 WARN_ON(1);
2917
2918                         ret = cifs_write_back_from_locked_folio(mapping, wbc, folio, start, end);
2919                         if (ret < 0)
2920                                 goto write_error;
2921
2922                         start += ret;
2923                         continue;
2924
2925 write_error:
2926                         folio_batch_release(&fbatch);
2927                         *_next = start;
2928                         return ret;
2929
2930 skip_write:
2931                         /*
2932                          * Too many skipped writes, or need to reschedule?
2933                          * Treat it as a write error without an error code.
2934                          */
2935                         if (skips >= 5 || need_resched()) {
2936                                 ret = 0;
2937                                 goto write_error;
2938                         }
2939
2940                         /* Otherwise, just skip that folio and go on to the next */
2941                         skips++;
2942                         start += folio_size(folio);
2943                         continue;
2944                 }
2945
2946                 folio_batch_release(&fbatch);           
2947                 cond_resched();
2948         } while (wbc->nr_to_write > 0);
2949
2950         *_next = start;
2951         return 0;
2952 }
2953
2954 /*
2955  * Write some of the pending data back to the server
2956  */
2957 static int cifs_writepages(struct address_space *mapping,
2958                            struct writeback_control *wbc)
2959 {
2960         loff_t start, next;
2961         int ret;
2962
2963         /* We have to be careful as we can end up racing with setattr()
2964          * truncating the pagecache since the caller doesn't take a lock here
2965          * to prevent it.
2966          */
2967
2968         if (wbc->range_cyclic) {
2969                 start = mapping->writeback_index * PAGE_SIZE;
2970                 ret = cifs_writepages_region(mapping, wbc, start, LLONG_MAX, &next);
2971                 if (ret == 0) {
2972                         mapping->writeback_index = next / PAGE_SIZE;
2973                         if (start > 0 && wbc->nr_to_write > 0) {
2974                                 ret = cifs_writepages_region(mapping, wbc, 0,
2975                                                              start, &next);
2976                                 if (ret == 0)
2977                                         mapping->writeback_index =
2978                                                 next / PAGE_SIZE;
2979                         }
2980                 }
2981         } else if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX) {
2982                 ret = cifs_writepages_region(mapping, wbc, 0, LLONG_MAX, &next);
2983                 if (wbc->nr_to_write > 0 && ret == 0)
2984                         mapping->writeback_index = next / PAGE_SIZE;
2985         } else {
2986                 ret = cifs_writepages_region(mapping, wbc,
2987                                              wbc->range_start, wbc->range_end, &next);
2988         }
2989
2990         return ret;
2991 }
2992
2993 static int
2994 cifs_writepage_locked(struct page *page, struct writeback_control *wbc)
2995 {
2996         int rc;
2997         unsigned int xid;
2998
2999         xid = get_xid();
3000 /* BB add check for wbc flags */
3001         get_page(page);
3002         if (!PageUptodate(page))
3003                 cifs_dbg(FYI, "ppw - page not up to date\n");
3004
3005         /*
3006          * Set the "writeback" flag, and clear "dirty" in the radix tree.
3007          *
3008          * A writepage() implementation always needs to do either this,
3009          * or re-dirty the page with "redirty_page_for_writepage()" in
3010          * the case of a failure.
3011          *
3012          * Just unlocking the page will cause the radix tree tag-bits
3013          * to fail to update with the state of the page correctly.
3014          */
3015         set_page_writeback(page);
3016 retry_write:
3017         rc = cifs_partialpagewrite(page, 0, PAGE_SIZE);
3018         if (is_retryable_error(rc)) {
3019                 if (wbc->sync_mode == WB_SYNC_ALL && rc == -EAGAIN)
3020                         goto retry_write;
3021                 redirty_page_for_writepage(wbc, page);
3022         } else if (rc != 0) {
3023                 SetPageError(page);
3024                 mapping_set_error(page->mapping, rc);
3025         } else {
3026                 SetPageUptodate(page);
3027         }
3028         end_page_writeback(page);
3029         put_page(page);
3030         free_xid(xid);
3031         return rc;
3032 }
3033
3034 static int cifs_write_end(struct file *file, struct address_space *mapping,
3035                         loff_t pos, unsigned len, unsigned copied,
3036                         struct page *page, void *fsdata)
3037 {
3038         int rc;
3039         struct inode *inode = mapping->host;
3040         struct cifsFileInfo *cfile = file->private_data;
3041         struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
3042         struct folio *folio = page_folio(page);
3043         __u32 pid;
3044
3045         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
3046                 pid = cfile->pid;
3047         else
3048                 pid = current->tgid;
3049
3050         cifs_dbg(FYI, "write_end for page %p from pos %lld with %d bytes\n",
3051                  page, pos, copied);
3052
3053         if (folio_test_checked(folio)) {
3054                 if (copied == len)
3055                         folio_mark_uptodate(folio);
3056                 folio_clear_checked(folio);
3057         } else if (!folio_test_uptodate(folio) && copied == PAGE_SIZE)
3058                 folio_mark_uptodate(folio);
3059
3060         if (!folio_test_uptodate(folio)) {
3061                 char *page_data;
3062                 unsigned offset = pos & (PAGE_SIZE - 1);
3063                 unsigned int xid;
3064
3065                 xid = get_xid();
3066                 /* this is probably better than directly calling
3067                    partialpage_write since in this function the file handle is
3068                    known which we might as well leverage */
3069                 /* BB check if anything else missing out of ppw
3070                    such as updating last write time */
3071                 page_data = kmap(page);
3072                 rc = cifs_write(cfile, pid, page_data + offset, copied, &pos);
3073                 /* if (rc < 0) should we set writebehind rc? */
3074                 kunmap(page);
3075
3076                 free_xid(xid);
3077         } else {
3078                 rc = copied;
3079                 pos += copied;
3080                 set_page_dirty(page);
3081         }
3082
3083         if (rc > 0) {
3084                 spin_lock(&inode->i_lock);
3085                 if (pos > inode->i_size) {
3086                         i_size_write(inode, pos);
3087                         inode->i_blocks = (512 - 1 + pos) >> 9;
3088                 }
3089                 spin_unlock(&inode->i_lock);
3090         }
3091
3092         unlock_page(page);
3093         put_page(page);
3094         /* Indication to update ctime and mtime as close is deferred */
3095         set_bit(CIFS_INO_MODIFIED_ATTR, &CIFS_I(inode)->flags);
3096
3097         return rc;
3098 }
3099
3100 int cifs_strict_fsync(struct file *file, loff_t start, loff_t end,
3101                       int datasync)
3102 {
3103         unsigned int xid;
3104         int rc = 0;
3105         struct cifs_tcon *tcon;
3106         struct TCP_Server_Info *server;
3107         struct cifsFileInfo *smbfile = file->private_data;
3108         struct inode *inode = file_inode(file);
3109         struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
3110
3111         rc = file_write_and_wait_range(file, start, end);
3112         if (rc) {
3113                 trace_cifs_fsync_err(inode->i_ino, rc);
3114                 return rc;
3115         }
3116
3117         xid = get_xid();
3118
3119         cifs_dbg(FYI, "Sync file - name: %pD datasync: 0x%x\n",
3120                  file, datasync);
3121
3122         if (!CIFS_CACHE_READ(CIFS_I(inode))) {
3123                 rc = cifs_zap_mapping(inode);
3124                 if (rc) {
3125                         cifs_dbg(FYI, "rc: %d during invalidate phase\n", rc);
3126                         rc = 0; /* don't care about it in fsync */
3127                 }
3128         }
3129
3130         tcon = tlink_tcon(smbfile->tlink);
3131         if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) {
3132                 server = tcon->ses->server;
3133                 if (server->ops->flush == NULL) {
3134                         rc = -ENOSYS;
3135                         goto strict_fsync_exit;
3136                 }
3137
3138                 if ((OPEN_FMODE(smbfile->f_flags) & FMODE_WRITE) == 0) {
3139                         smbfile = find_writable_file(CIFS_I(inode), FIND_WR_ANY);
3140                         if (smbfile) {
3141                                 rc = server->ops->flush(xid, tcon, &smbfile->fid);
3142                                 cifsFileInfo_put(smbfile);
3143                         } else
3144                                 cifs_dbg(FYI, "ignore fsync for file not open for write\n");
3145                 } else
3146                         rc = server->ops->flush(xid, tcon, &smbfile->fid);
3147         }
3148
3149 strict_fsync_exit:
3150         free_xid(xid);
3151         return rc;
3152 }
3153
3154 int cifs_fsync(struct file *file, loff_t start, loff_t end, int datasync)
3155 {
3156         unsigned int xid;
3157         int rc = 0;
3158         struct cifs_tcon *tcon;
3159         struct TCP_Server_Info *server;
3160         struct cifsFileInfo *smbfile = file->private_data;
3161         struct inode *inode = file_inode(file);
3162         struct cifs_sb_info *cifs_sb = CIFS_FILE_SB(file);
3163
3164         rc = file_write_and_wait_range(file, start, end);
3165         if (rc) {
3166                 trace_cifs_fsync_err(file_inode(file)->i_ino, rc);
3167                 return rc;
3168         }
3169
3170         xid = get_xid();
3171
3172         cifs_dbg(FYI, "Sync file - name: %pD datasync: 0x%x\n",
3173                  file, datasync);
3174
3175         tcon = tlink_tcon(smbfile->tlink);
3176         if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) {
3177                 server = tcon->ses->server;
3178                 if (server->ops->flush == NULL) {
3179                         rc = -ENOSYS;
3180                         goto fsync_exit;
3181                 }
3182
3183                 if ((OPEN_FMODE(smbfile->f_flags) & FMODE_WRITE) == 0) {
3184                         smbfile = find_writable_file(CIFS_I(inode), FIND_WR_ANY);
3185                         if (smbfile) {
3186                                 rc = server->ops->flush(xid, tcon, &smbfile->fid);
3187                                 cifsFileInfo_put(smbfile);
3188                         } else
3189                                 cifs_dbg(FYI, "ignore fsync for file not open for write\n");
3190                 } else
3191                         rc = server->ops->flush(xid, tcon, &smbfile->fid);
3192         }
3193
3194 fsync_exit:
3195         free_xid(xid);
3196         return rc;
3197 }
3198
3199 /*
3200  * As file closes, flush all cached write data for this inode checking
3201  * for write behind errors.
3202  */
3203 int cifs_flush(struct file *file, fl_owner_t id)
3204 {
3205         struct inode *inode = file_inode(file);
3206         int rc = 0;
3207
3208         if (file->f_mode & FMODE_WRITE)
3209                 rc = filemap_write_and_wait(inode->i_mapping);
3210
3211         cifs_dbg(FYI, "Flush inode %p file %p rc %d\n", inode, file, rc);
3212         if (rc) {
3213                 /* get more nuanced writeback errors */
3214                 rc = filemap_check_wb_err(file->f_mapping, 0);
3215                 trace_cifs_flush_err(inode->i_ino, rc);
3216         }
3217         return rc;
3218 }
3219
3220 static void
3221 cifs_uncached_writedata_release(struct kref *refcount)
3222 {
3223         struct cifs_writedata *wdata = container_of(refcount,
3224                                         struct cifs_writedata, refcount);
3225
3226         kref_put(&wdata->ctx->refcount, cifs_aio_ctx_release);
3227         cifs_writedata_release(refcount);
3228 }
3229
3230 static void collect_uncached_write_data(struct cifs_aio_ctx *ctx);
3231
3232 static void
3233 cifs_uncached_writev_complete(struct work_struct *work)
3234 {
3235         struct cifs_writedata *wdata = container_of(work,
3236                                         struct cifs_writedata, work);
3237         struct inode *inode = d_inode(wdata->cfile->dentry);
3238         struct cifsInodeInfo *cifsi = CIFS_I(inode);
3239
3240         spin_lock(&inode->i_lock);
3241         cifs_update_eof(cifsi, wdata->offset, wdata->bytes);
3242         if (cifsi->server_eof > inode->i_size)
3243                 i_size_write(inode, cifsi->server_eof);
3244         spin_unlock(&inode->i_lock);
3245
3246         complete(&wdata->done);
3247         collect_uncached_write_data(wdata->ctx);
3248         /* the below call can possibly free the last ref to aio ctx */
3249         kref_put(&wdata->refcount, cifs_uncached_writedata_release);
3250 }
3251
3252 static int
3253 cifs_resend_wdata(struct cifs_writedata *wdata, struct list_head *wdata_list,
3254         struct cifs_aio_ctx *ctx)
3255 {
3256         unsigned int wsize;
3257         struct cifs_credits credits;
3258         int rc;
3259         struct TCP_Server_Info *server = wdata->server;
3260
3261         do {
3262                 if (wdata->cfile->invalidHandle) {
3263                         rc = cifs_reopen_file(wdata->cfile, false);
3264                         if (rc == -EAGAIN)
3265                                 continue;
3266                         else if (rc)
3267                                 break;
3268                 }
3269
3270
3271                 /*
3272                  * Wait for credits to resend this wdata.
3273                  * Note: we are attempting to resend the whole wdata not in
3274                  * segments
3275                  */
3276                 do {
3277                         rc = server->ops->wait_mtu_credits(server, wdata->bytes,
3278                                                 &wsize, &credits);
3279                         if (rc)
3280                                 goto fail;
3281
3282                         if (wsize < wdata->bytes) {
3283                                 add_credits_and_wake_if(server, &credits, 0);
3284                                 msleep(1000);
3285                         }
3286                 } while (wsize < wdata->bytes);
3287                 wdata->credits = credits;
3288
3289                 rc = adjust_credits(server, &wdata->credits, wdata->bytes);
3290
3291                 if (!rc) {
3292                         if (wdata->cfile->invalidHandle)
3293                                 rc = -EAGAIN;
3294                         else {
3295 #ifdef CONFIG_CIFS_SMB_DIRECT
3296                                 if (wdata->mr) {
3297                                         wdata->mr->need_invalidate = true;
3298                                         smbd_deregister_mr(wdata->mr);
3299                                         wdata->mr = NULL;
3300                                 }
3301 #endif
3302                                 rc = server->ops->async_writev(wdata,
3303                                         cifs_uncached_writedata_release);
3304                         }
3305                 }
3306
3307                 /* If the write was successfully sent, we are done */
3308                 if (!rc) {
3309                         list_add_tail(&wdata->list, wdata_list);
3310                         return 0;
3311                 }
3312
3313                 /* Roll back credits and retry if needed */
3314                 add_credits_and_wake_if(server, &wdata->credits, 0);
3315         } while (rc == -EAGAIN);
3316
3317 fail:
3318         kref_put(&wdata->refcount, cifs_uncached_writedata_release);
3319         return rc;
3320 }
3321
3322 /*
3323  * Select span of a bvec iterator we're going to use.  Limit it by both maximum
3324  * size and maximum number of segments.
3325  */
3326 static size_t cifs_limit_bvec_subset(const struct iov_iter *iter, size_t max_size,
3327                                      size_t max_segs, unsigned int *_nsegs)
3328 {
3329         const struct bio_vec *bvecs = iter->bvec;
3330         unsigned int nbv = iter->nr_segs, ix = 0, nsegs = 0;
3331         size_t len, span = 0, n = iter->count;
3332         size_t skip = iter->iov_offset;
3333
3334         if (WARN_ON(!iov_iter_is_bvec(iter)) || n == 0)
3335                 return 0;
3336
3337         while (n && ix < nbv && skip) {
3338                 len = bvecs[ix].bv_len;
3339                 if (skip < len)
3340                         break;
3341                 skip -= len;
3342                 n -= len;
3343                 ix++;
3344         }
3345
3346         while (n && ix < nbv) {
3347                 len = min3(n, bvecs[ix].bv_len - skip, max_size);
3348                 span += len;
3349                 nsegs++;
3350                 ix++;
3351                 if (span >= max_size || nsegs >= max_segs)
3352                         break;
3353                 skip = 0;
3354                 n -= len;
3355         }
3356
3357         *_nsegs = nsegs;
3358         return span;
3359 }
3360
3361 static int
3362 cifs_write_from_iter(loff_t fpos, size_t len, struct iov_iter *from,
3363                      struct cifsFileInfo *open_file,
3364                      struct cifs_sb_info *cifs_sb, struct list_head *wdata_list,
3365                      struct cifs_aio_ctx *ctx)
3366 {
3367         int rc = 0;
3368         size_t cur_len, max_len;
3369         struct cifs_writedata *wdata;
3370         pid_t pid;
3371         struct TCP_Server_Info *server;
3372         unsigned int xid, max_segs = INT_MAX;
3373
3374         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
3375                 pid = open_file->pid;
3376         else
3377                 pid = current->tgid;
3378
3379         server = cifs_pick_channel(tlink_tcon(open_file->tlink)->ses);
3380         xid = get_xid();
3381
3382 #ifdef CONFIG_CIFS_SMB_DIRECT
3383         if (server->smbd_conn)
3384                 max_segs = server->smbd_conn->max_frmr_depth;
3385 #endif
3386
3387         do {
3388                 struct cifs_credits credits_on_stack;
3389                 struct cifs_credits *credits = &credits_on_stack;
3390                 unsigned int wsize, nsegs = 0;
3391
3392                 if (signal_pending(current)) {
3393                         rc = -EINTR;
3394                         break;
3395                 }
3396
3397                 if (open_file->invalidHandle) {
3398                         rc = cifs_reopen_file(open_file, false);
3399                         if (rc == -EAGAIN)
3400                                 continue;
3401                         else if (rc)
3402                                 break;
3403                 }
3404
3405                 rc = server->ops->wait_mtu_credits(server, cifs_sb->ctx->wsize,
3406                                                    &wsize, credits);
3407                 if (rc)
3408                         break;
3409
3410                 max_len = min_t(const size_t, len, wsize);
3411                 if (!max_len) {
3412                         rc = -EAGAIN;
3413                         add_credits_and_wake_if(server, credits, 0);
3414                         break;
3415                 }
3416
3417                 cur_len = cifs_limit_bvec_subset(from, max_len, max_segs, &nsegs);
3418                 cifs_dbg(FYI, "write_from_iter len=%zx/%zx nsegs=%u/%lu/%u\n",
3419                          cur_len, max_len, nsegs, from->nr_segs, max_segs);
3420                 if (cur_len == 0) {
3421                         rc = -EIO;
3422                         add_credits_and_wake_if(server, credits, 0);
3423                         break;
3424                 }
3425
3426                 wdata = cifs_writedata_alloc(cifs_uncached_writev_complete);
3427                 if (!wdata) {
3428                         rc = -ENOMEM;
3429                         add_credits_and_wake_if(server, credits, 0);
3430                         break;
3431                 }
3432
3433                 wdata->sync_mode = WB_SYNC_ALL;
3434                 wdata->offset   = (__u64)fpos;
3435                 wdata->cfile    = cifsFileInfo_get(open_file);
3436                 wdata->server   = server;
3437                 wdata->pid      = pid;
3438                 wdata->bytes    = cur_len;
3439                 wdata->credits  = credits_on_stack;
3440                 wdata->iter     = *from;
3441                 wdata->ctx      = ctx;
3442                 kref_get(&ctx->refcount);
3443
3444                 iov_iter_truncate(&wdata->iter, cur_len);
3445
3446                 rc = adjust_credits(server, &wdata->credits, wdata->bytes);
3447
3448                 if (!rc) {
3449                         if (wdata->cfile->invalidHandle)
3450                                 rc = -EAGAIN;
3451                         else
3452                                 rc = server->ops->async_writev(wdata,
3453                                         cifs_uncached_writedata_release);
3454                 }
3455
3456                 if (rc) {
3457                         add_credits_and_wake_if(server, &wdata->credits, 0);
3458                         kref_put(&wdata->refcount,
3459                                  cifs_uncached_writedata_release);
3460                         if (rc == -EAGAIN)
3461                                 continue;
3462                         break;
3463                 }
3464
3465                 list_add_tail(&wdata->list, wdata_list);
3466                 iov_iter_advance(from, cur_len);
3467                 fpos += cur_len;
3468                 len -= cur_len;
3469         } while (len > 0);
3470
3471         free_xid(xid);
3472         return rc;
3473 }
3474
3475 static void collect_uncached_write_data(struct cifs_aio_ctx *ctx)
3476 {
3477         struct cifs_writedata *wdata, *tmp;
3478         struct cifs_tcon *tcon;
3479         struct cifs_sb_info *cifs_sb;
3480         struct dentry *dentry = ctx->cfile->dentry;
3481         ssize_t rc;
3482
3483         tcon = tlink_tcon(ctx->cfile->tlink);
3484         cifs_sb = CIFS_SB(dentry->d_sb);
3485
3486         mutex_lock(&ctx->aio_mutex);
3487
3488         if (list_empty(&ctx->list)) {
3489                 mutex_unlock(&ctx->aio_mutex);
3490                 return;
3491         }
3492
3493         rc = ctx->rc;
3494         /*
3495          * Wait for and collect replies for any successful sends in order of
3496          * increasing offset. Once an error is hit, then return without waiting
3497          * for any more replies.
3498          */
3499 restart_loop:
3500         list_for_each_entry_safe(wdata, tmp, &ctx->list, list) {
3501                 if (!rc) {
3502                         if (!try_wait_for_completion(&wdata->done)) {
3503                                 mutex_unlock(&ctx->aio_mutex);
3504                                 return;
3505                         }
3506
3507                         if (wdata->result)
3508                                 rc = wdata->result;
3509                         else
3510                                 ctx->total_len += wdata->bytes;
3511
3512                         /* resend call if it's a retryable error */
3513                         if (rc == -EAGAIN) {
3514                                 struct list_head tmp_list;
3515                                 struct iov_iter tmp_from = ctx->iter;
3516
3517                                 INIT_LIST_HEAD(&tmp_list);
3518                                 list_del_init(&wdata->list);
3519
3520                                 if (ctx->direct_io)
3521                                         rc = cifs_resend_wdata(
3522                                                 wdata, &tmp_list, ctx);
3523                                 else {
3524                                         iov_iter_advance(&tmp_from,
3525                                                  wdata->offset - ctx->pos);
3526
3527                                         rc = cifs_write_from_iter(wdata->offset,
3528                                                 wdata->bytes, &tmp_from,
3529                                                 ctx->cfile, cifs_sb, &tmp_list,
3530                                                 ctx);
3531
3532                                         kref_put(&wdata->refcount,
3533                                                 cifs_uncached_writedata_release);
3534                                 }
3535
3536                                 list_splice(&tmp_list, &ctx->list);
3537                                 goto restart_loop;
3538                         }
3539                 }
3540                 list_del_init(&wdata->list);
3541                 kref_put(&wdata->refcount, cifs_uncached_writedata_release);
3542         }
3543
3544         cifs_stats_bytes_written(tcon, ctx->total_len);
3545         set_bit(CIFS_INO_INVALID_MAPPING, &CIFS_I(dentry->d_inode)->flags);
3546
3547         ctx->rc = (rc == 0) ? ctx->total_len : rc;
3548
3549         mutex_unlock(&ctx->aio_mutex);
3550
3551         if (ctx->iocb && ctx->iocb->ki_complete)
3552                 ctx->iocb->ki_complete(ctx->iocb, ctx->rc);
3553         else
3554                 complete(&ctx->done);
3555 }
3556
3557 static ssize_t __cifs_writev(
3558         struct kiocb *iocb, struct iov_iter *from, bool direct)
3559 {
3560         struct file *file = iocb->ki_filp;
3561         ssize_t total_written = 0;
3562         struct cifsFileInfo *cfile;
3563         struct cifs_tcon *tcon;
3564         struct cifs_sb_info *cifs_sb;
3565         struct cifs_aio_ctx *ctx;
3566         int rc;
3567
3568         rc = generic_write_checks(iocb, from);
3569         if (rc <= 0)
3570                 return rc;
3571
3572         cifs_sb = CIFS_FILE_SB(file);
3573         cfile = file->private_data;
3574         tcon = tlink_tcon(cfile->tlink);
3575
3576         if (!tcon->ses->server->ops->async_writev)
3577                 return -ENOSYS;
3578
3579         ctx = cifs_aio_ctx_alloc();
3580         if (!ctx)
3581                 return -ENOMEM;
3582
3583         ctx->cfile = cifsFileInfo_get(cfile);
3584
3585         if (!is_sync_kiocb(iocb))
3586                 ctx->iocb = iocb;
3587
3588         ctx->pos = iocb->ki_pos;
3589         ctx->direct_io = direct;
3590         ctx->nr_pinned_pages = 0;
3591
3592         if (user_backed_iter(from)) {
3593                 /*
3594                  * Extract IOVEC/UBUF-type iterators to a BVEC-type iterator as
3595                  * they contain references to the calling process's virtual
3596                  * memory layout which won't be available in an async worker
3597                  * thread.  This also takes a pin on every folio involved.
3598                  */
3599                 rc = netfs_extract_user_iter(from, iov_iter_count(from),
3600                                              &ctx->iter, 0);
3601                 if (rc < 0) {
3602                         kref_put(&ctx->refcount, cifs_aio_ctx_release);
3603                         return rc;
3604                 }
3605
3606                 ctx->nr_pinned_pages = rc;
3607                 ctx->bv = (void *)ctx->iter.bvec;
3608                 ctx->bv_need_unpin = iov_iter_extract_will_pin(&ctx->iter);
3609         } else if ((iov_iter_is_bvec(from) || iov_iter_is_kvec(from)) &&
3610                    !is_sync_kiocb(iocb)) {
3611                 /*
3612                  * If the op is asynchronous, we need to copy the list attached
3613                  * to a BVEC/KVEC-type iterator, but we assume that the storage
3614                  * will be pinned by the caller; in any case, we may or may not
3615                  * be able to pin the pages, so we don't try.
3616                  */
3617                 ctx->bv = (void *)dup_iter(&ctx->iter, from, GFP_KERNEL);
3618                 if (!ctx->bv) {
3619                         kref_put(&ctx->refcount, cifs_aio_ctx_release);
3620                         return -ENOMEM;
3621                 }
3622         } else {
3623                 /*
3624                  * Otherwise, we just pass the iterator down as-is and rely on
3625                  * the caller to make sure the pages referred to by the
3626                  * iterator don't evaporate.
3627                  */
3628                 ctx->iter = *from;
3629         }
3630
3631         ctx->len = iov_iter_count(&ctx->iter);
3632
3633         /* grab a lock here due to read response handlers can access ctx */
3634         mutex_lock(&ctx->aio_mutex);
3635
3636         rc = cifs_write_from_iter(iocb->ki_pos, ctx->len, &ctx->iter,
3637                                   cfile, cifs_sb, &ctx->list, ctx);
3638
3639         /*
3640          * If at least one write was successfully sent, then discard any rc
3641          * value from the later writes. If the other write succeeds, then
3642          * we'll end up returning whatever was written. If it fails, then
3643          * we'll get a new rc value from that.
3644          */
3645         if (!list_empty(&ctx->list))
3646                 rc = 0;
3647
3648         mutex_unlock(&ctx->aio_mutex);
3649
3650         if (rc) {
3651                 kref_put(&ctx->refcount, cifs_aio_ctx_release);
3652                 return rc;
3653         }
3654
3655         if (!is_sync_kiocb(iocb)) {
3656                 kref_put(&ctx->refcount, cifs_aio_ctx_release);
3657                 return -EIOCBQUEUED;
3658         }
3659
3660         rc = wait_for_completion_killable(&ctx->done);
3661         if (rc) {
3662                 mutex_lock(&ctx->aio_mutex);
3663                 ctx->rc = rc = -EINTR;
3664                 total_written = ctx->total_len;
3665                 mutex_unlock(&ctx->aio_mutex);
3666         } else {
3667                 rc = ctx->rc;
3668                 total_written = ctx->total_len;
3669         }
3670
3671         kref_put(&ctx->refcount, cifs_aio_ctx_release);
3672
3673         if (unlikely(!total_written))
3674                 return rc;
3675
3676         iocb->ki_pos += total_written;
3677         return total_written;
3678 }
3679
3680 ssize_t cifs_direct_writev(struct kiocb *iocb, struct iov_iter *from)
3681 {
3682         struct file *file = iocb->ki_filp;
3683
3684         cifs_revalidate_mapping(file->f_inode);
3685         return __cifs_writev(iocb, from, true);
3686 }
3687
3688 ssize_t cifs_user_writev(struct kiocb *iocb, struct iov_iter *from)
3689 {
3690         return __cifs_writev(iocb, from, false);
3691 }
3692
3693 static ssize_t
3694 cifs_writev(struct kiocb *iocb, struct iov_iter *from)
3695 {
3696         struct file *file = iocb->ki_filp;
3697         struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
3698         struct inode *inode = file->f_mapping->host;
3699         struct cifsInodeInfo *cinode = CIFS_I(inode);
3700         struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
3701         ssize_t rc;
3702
3703         inode_lock(inode);
3704         /*
3705          * We need to hold the sem to be sure nobody modifies lock list
3706          * with a brlock that prevents writing.
3707          */
3708         down_read(&cinode->lock_sem);
3709
3710         rc = generic_write_checks(iocb, from);
3711         if (rc <= 0)
3712                 goto out;
3713
3714         if (!cifs_find_lock_conflict(cfile, iocb->ki_pos, iov_iter_count(from),
3715                                      server->vals->exclusive_lock_type, 0,
3716                                      NULL, CIFS_WRITE_OP))
3717                 rc = __generic_file_write_iter(iocb, from);
3718         else
3719                 rc = -EACCES;
3720 out:
3721         up_read(&cinode->lock_sem);
3722         inode_unlock(inode);
3723
3724         if (rc > 0)
3725                 rc = generic_write_sync(iocb, rc);
3726         return rc;
3727 }
3728
3729 ssize_t
3730 cifs_strict_writev(struct kiocb *iocb, struct iov_iter *from)
3731 {
3732         struct inode *inode = file_inode(iocb->ki_filp);
3733         struct cifsInodeInfo *cinode = CIFS_I(inode);
3734         struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
3735         struct cifsFileInfo *cfile = (struct cifsFileInfo *)
3736                                                 iocb->ki_filp->private_data;
3737         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
3738         ssize_t written;
3739
3740         written = cifs_get_writer(cinode);
3741         if (written)
3742                 return written;
3743
3744         if (CIFS_CACHE_WRITE(cinode)) {
3745                 if (cap_unix(tcon->ses) &&
3746                 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability))
3747                   && ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0)) {
3748                         written = generic_file_write_iter(iocb, from);
3749                         goto out;
3750                 }
3751                 written = cifs_writev(iocb, from);
3752                 goto out;
3753         }
3754         /*
3755          * For non-oplocked files in strict cache mode we need to write the data
3756          * to the server exactly from the pos to pos+len-1 rather than flush all
3757          * affected pages because it may cause a error with mandatory locks on
3758          * these pages but not on the region from pos to ppos+len-1.
3759          */
3760         written = cifs_user_writev(iocb, from);
3761         if (CIFS_CACHE_READ(cinode)) {
3762                 /*
3763                  * We have read level caching and we have just sent a write
3764                  * request to the server thus making data in the cache stale.
3765                  * Zap the cache and set oplock/lease level to NONE to avoid
3766                  * reading stale data from the cache. All subsequent read
3767                  * operations will read new data from the server.
3768                  */
3769                 cifs_zap_mapping(inode);
3770                 cifs_dbg(FYI, "Set Oplock/Lease to NONE for inode=%p after write\n",
3771                          inode);
3772                 cinode->oplock = 0;
3773         }
3774 out:
3775         cifs_put_writer(cinode);
3776         return written;
3777 }
3778
3779 static struct cifs_readdata *cifs_readdata_alloc(work_func_t complete)
3780 {
3781         struct cifs_readdata *rdata;
3782
3783         rdata = kzalloc(sizeof(*rdata), GFP_KERNEL);
3784         if (rdata) {
3785                 kref_init(&rdata->refcount);
3786                 INIT_LIST_HEAD(&rdata->list);
3787                 init_completion(&rdata->done);
3788                 INIT_WORK(&rdata->work, complete);
3789         }
3790
3791         return rdata;
3792 }
3793
3794 void
3795 cifs_readdata_release(struct kref *refcount)
3796 {
3797         struct cifs_readdata *rdata = container_of(refcount,
3798                                         struct cifs_readdata, refcount);
3799
3800         if (rdata->ctx)
3801                 kref_put(&rdata->ctx->refcount, cifs_aio_ctx_release);
3802 #ifdef CONFIG_CIFS_SMB_DIRECT
3803         if (rdata->mr) {
3804                 smbd_deregister_mr(rdata->mr);
3805                 rdata->mr = NULL;
3806         }
3807 #endif
3808         if (rdata->cfile)
3809                 cifsFileInfo_put(rdata->cfile);
3810
3811         kfree(rdata);
3812 }
3813
3814 static void collect_uncached_read_data(struct cifs_aio_ctx *ctx);
3815
3816 static void
3817 cifs_uncached_readv_complete(struct work_struct *work)
3818 {
3819         struct cifs_readdata *rdata = container_of(work,
3820                                                 struct cifs_readdata, work);
3821
3822         complete(&rdata->done);
3823         collect_uncached_read_data(rdata->ctx);
3824         /* the below call can possibly free the last ref to aio ctx */
3825         kref_put(&rdata->refcount, cifs_readdata_release);
3826 }
3827
3828 static int cifs_resend_rdata(struct cifs_readdata *rdata,
3829                         struct list_head *rdata_list,
3830                         struct cifs_aio_ctx *ctx)
3831 {
3832         unsigned int rsize;
3833         struct cifs_credits credits;
3834         int rc;
3835         struct TCP_Server_Info *server;
3836
3837         /* XXX: should we pick a new channel here? */
3838         server = rdata->server;
3839
3840         do {
3841                 if (rdata->cfile->invalidHandle) {
3842                         rc = cifs_reopen_file(rdata->cfile, true);
3843                         if (rc == -EAGAIN)
3844                                 continue;
3845                         else if (rc)
3846                                 break;
3847                 }
3848
3849                 /*
3850                  * Wait for credits to resend this rdata.
3851                  * Note: we are attempting to resend the whole rdata not in
3852                  * segments
3853                  */
3854                 do {
3855                         rc = server->ops->wait_mtu_credits(server, rdata->bytes,
3856                                                 &rsize, &credits);
3857
3858                         if (rc)
3859                                 goto fail;
3860
3861                         if (rsize < rdata->bytes) {
3862                                 add_credits_and_wake_if(server, &credits, 0);
3863                                 msleep(1000);
3864                         }
3865                 } while (rsize < rdata->bytes);
3866                 rdata->credits = credits;
3867
3868                 rc = adjust_credits(server, &rdata->credits, rdata->bytes);
3869                 if (!rc) {
3870                         if (rdata->cfile->invalidHandle)
3871                                 rc = -EAGAIN;
3872                         else {
3873 #ifdef CONFIG_CIFS_SMB_DIRECT
3874                                 if (rdata->mr) {
3875                                         rdata->mr->need_invalidate = true;
3876                                         smbd_deregister_mr(rdata->mr);
3877                                         rdata->mr = NULL;
3878                                 }
3879 #endif
3880                                 rc = server->ops->async_readv(rdata);
3881                         }
3882                 }
3883
3884                 /* If the read was successfully sent, we are done */
3885                 if (!rc) {
3886                         /* Add to aio pending list */
3887                         list_add_tail(&rdata->list, rdata_list);
3888                         return 0;
3889                 }
3890
3891                 /* Roll back credits and retry if needed */
3892                 add_credits_and_wake_if(server, &rdata->credits, 0);
3893         } while (rc == -EAGAIN);
3894
3895 fail:
3896         kref_put(&rdata->refcount, cifs_readdata_release);
3897         return rc;
3898 }
3899
3900 static int
3901 cifs_send_async_read(loff_t fpos, size_t len, struct cifsFileInfo *open_file,
3902                      struct cifs_sb_info *cifs_sb, struct list_head *rdata_list,
3903                      struct cifs_aio_ctx *ctx)
3904 {
3905         struct cifs_readdata *rdata;
3906         unsigned int rsize, nsegs, max_segs = INT_MAX;
3907         struct cifs_credits credits_on_stack;
3908         struct cifs_credits *credits = &credits_on_stack;
3909         size_t cur_len, max_len;
3910         int rc;
3911         pid_t pid;
3912         struct TCP_Server_Info *server;
3913
3914         server = cifs_pick_channel(tlink_tcon(open_file->tlink)->ses);
3915
3916 #ifdef CONFIG_CIFS_SMB_DIRECT
3917         if (server->smbd_conn)
3918                 max_segs = server->smbd_conn->max_frmr_depth;
3919 #endif
3920
3921         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
3922                 pid = open_file->pid;
3923         else
3924                 pid = current->tgid;
3925
3926         do {
3927                 if (open_file->invalidHandle) {
3928                         rc = cifs_reopen_file(open_file, true);
3929                         if (rc == -EAGAIN)
3930                                 continue;
3931                         else if (rc)
3932                                 break;
3933                 }
3934
3935                 if (cifs_sb->ctx->rsize == 0)
3936                         cifs_sb->ctx->rsize =
3937                                 server->ops->negotiate_rsize(tlink_tcon(open_file->tlink),
3938                                                              cifs_sb->ctx);
3939
3940                 rc = server->ops->wait_mtu_credits(server, cifs_sb->ctx->rsize,
3941                                                    &rsize, credits);
3942                 if (rc)
3943                         break;
3944
3945                 max_len = min_t(size_t, len, rsize);
3946
3947                 cur_len = cifs_limit_bvec_subset(&ctx->iter, max_len,
3948                                                  max_segs, &nsegs);
3949                 cifs_dbg(FYI, "read-to-iter len=%zx/%zx nsegs=%u/%lu/%u\n",
3950                          cur_len, max_len, nsegs, ctx->iter.nr_segs, max_segs);
3951                 if (cur_len == 0) {
3952                         rc = -EIO;
3953                         add_credits_and_wake_if(server, credits, 0);
3954                         break;
3955                 }
3956
3957                 rdata = cifs_readdata_alloc(cifs_uncached_readv_complete);
3958                 if (!rdata) {
3959                         add_credits_and_wake_if(server, credits, 0);
3960                         rc = -ENOMEM;
3961                         break;
3962                 }
3963
3964                 rdata->server   = server;
3965                 rdata->cfile    = cifsFileInfo_get(open_file);
3966                 rdata->offset   = fpos;
3967                 rdata->bytes    = cur_len;
3968                 rdata->pid      = pid;
3969                 rdata->credits  = credits_on_stack;
3970                 rdata->ctx      = ctx;
3971                 kref_get(&ctx->refcount);
3972
3973                 rdata->iter     = ctx->iter;
3974                 iov_iter_truncate(&rdata->iter, cur_len);
3975
3976                 rc = adjust_credits(server, &rdata->credits, rdata->bytes);
3977
3978                 if (!rc) {
3979                         if (rdata->cfile->invalidHandle)
3980                                 rc = -EAGAIN;
3981                         else
3982                                 rc = server->ops->async_readv(rdata);
3983                 }
3984
3985                 if (rc) {
3986                         add_credits_and_wake_if(server, &rdata->credits, 0);
3987                         kref_put(&rdata->refcount, cifs_readdata_release);
3988                         if (rc == -EAGAIN)
3989                                 continue;
3990                         break;
3991                 }
3992
3993                 list_add_tail(&rdata->list, rdata_list);
3994                 iov_iter_advance(&ctx->iter, cur_len);
3995                 fpos += cur_len;
3996                 len -= cur_len;
3997         } while (len > 0);
3998
3999         return rc;
4000 }
4001
4002 static void
4003 collect_uncached_read_data(struct cifs_aio_ctx *ctx)
4004 {
4005         struct cifs_readdata *rdata, *tmp;
4006         struct iov_iter *to = &ctx->iter;
4007         struct cifs_sb_info *cifs_sb;
4008         int rc;
4009
4010         cifs_sb = CIFS_SB(ctx->cfile->dentry->d_sb);
4011
4012         mutex_lock(&ctx->aio_mutex);
4013
4014         if (list_empty(&ctx->list)) {
4015                 mutex_unlock(&ctx->aio_mutex);
4016                 return;
4017         }
4018
4019         rc = ctx->rc;
4020         /* the loop below should proceed in the order of increasing offsets */
4021 again:
4022         list_for_each_entry_safe(rdata, tmp, &ctx->list, list) {
4023                 if (!rc) {
4024                         if (!try_wait_for_completion(&rdata->done)) {
4025                                 mutex_unlock(&ctx->aio_mutex);
4026                                 return;
4027                         }
4028
4029                         if (rdata->result == -EAGAIN) {
4030                                 /* resend call if it's a retryable error */
4031                                 struct list_head tmp_list;
4032                                 unsigned int got_bytes = rdata->got_bytes;
4033
4034                                 list_del_init(&rdata->list);
4035                                 INIT_LIST_HEAD(&tmp_list);
4036
4037                                 if (ctx->direct_io) {
4038                                         /*
4039                                          * Re-use rdata as this is a
4040                                          * direct I/O
4041                                          */
4042                                         rc = cifs_resend_rdata(
4043                                                 rdata,
4044                                                 &tmp_list, ctx);
4045                                 } else {
4046                                         rc = cifs_send_async_read(
4047                                                 rdata->offset + got_bytes,
4048                                                 rdata->bytes - got_bytes,
4049                                                 rdata->cfile, cifs_sb,
4050                                                 &tmp_list, ctx);
4051
4052                                         kref_put(&rdata->refcount,
4053                                                 cifs_readdata_release);
4054                                 }
4055
4056                                 list_splice(&tmp_list, &ctx->list);
4057
4058                                 goto again;
4059                         } else if (rdata->result)
4060                                 rc = rdata->result;
4061
4062                         /* if there was a short read -- discard anything left */
4063                         if (rdata->got_bytes && rdata->got_bytes < rdata->bytes)
4064                                 rc = -ENODATA;
4065
4066                         ctx->total_len += rdata->got_bytes;
4067                 }
4068                 list_del_init(&rdata->list);
4069                 kref_put(&rdata->refcount, cifs_readdata_release);
4070         }
4071
4072         if (!ctx->direct_io)
4073                 ctx->total_len = ctx->len - iov_iter_count(to);
4074
4075         /* mask nodata case */
4076         if (rc == -ENODATA)
4077                 rc = 0;
4078
4079         ctx->rc = (rc == 0) ? (ssize_t)ctx->total_len : rc;
4080
4081         mutex_unlock(&ctx->aio_mutex);
4082
4083         if (ctx->iocb && ctx->iocb->ki_complete)
4084                 ctx->iocb->ki_complete(ctx->iocb, ctx->rc);
4085         else
4086                 complete(&ctx->done);
4087 }
4088
4089 static ssize_t __cifs_readv(
4090         struct kiocb *iocb, struct iov_iter *to, bool direct)
4091 {
4092         size_t len;
4093         struct file *file = iocb->ki_filp;
4094         struct cifs_sb_info *cifs_sb;
4095         struct cifsFileInfo *cfile;
4096         struct cifs_tcon *tcon;
4097         ssize_t rc, total_read = 0;
4098         loff_t offset = iocb->ki_pos;
4099         struct cifs_aio_ctx *ctx;
4100
4101         len = iov_iter_count(to);
4102         if (!len)
4103                 return 0;
4104
4105         cifs_sb = CIFS_FILE_SB(file);
4106         cfile = file->private_data;
4107         tcon = tlink_tcon(cfile->tlink);
4108
4109         if (!tcon->ses->server->ops->async_readv)
4110                 return -ENOSYS;
4111
4112         if ((file->f_flags & O_ACCMODE) == O_WRONLY)
4113                 cifs_dbg(FYI, "attempting read on write only file instance\n");
4114
4115         ctx = cifs_aio_ctx_alloc();
4116         if (!ctx)
4117                 return -ENOMEM;
4118
4119         ctx->pos        = offset;
4120         ctx->direct_io  = direct;
4121         ctx->len        = len;
4122         ctx->cfile      = cifsFileInfo_get(cfile);
4123         ctx->nr_pinned_pages = 0;
4124
4125         if (!is_sync_kiocb(iocb))
4126                 ctx->iocb = iocb;
4127
4128         if (user_backed_iter(to)) {
4129                 /*
4130                  * Extract IOVEC/UBUF-type iterators to a BVEC-type iterator as
4131                  * they contain references to the calling process's virtual
4132                  * memory layout which won't be available in an async worker
4133                  * thread.  This also takes a pin on every folio involved.
4134                  */
4135                 rc = netfs_extract_user_iter(to, iov_iter_count(to),
4136                                              &ctx->iter, 0);
4137                 if (rc < 0) {
4138                         kref_put(&ctx->refcount, cifs_aio_ctx_release);
4139                         return rc;
4140                 }
4141
4142                 ctx->nr_pinned_pages = rc;
4143                 ctx->bv = (void *)ctx->iter.bvec;
4144                 ctx->bv_need_unpin = iov_iter_extract_will_pin(&ctx->iter);
4145                 ctx->should_dirty = true;
4146         } else if ((iov_iter_is_bvec(to) || iov_iter_is_kvec(to)) &&
4147                    !is_sync_kiocb(iocb)) {
4148                 /*
4149                  * If the op is asynchronous, we need to copy the list attached
4150                  * to a BVEC/KVEC-type iterator, but we assume that the storage
4151                  * will be retained by the caller; in any case, we may or may
4152                  * not be able to pin the pages, so we don't try.
4153                  */
4154                 ctx->bv = (void *)dup_iter(&ctx->iter, to, GFP_KERNEL);
4155                 if (!ctx->bv) {
4156                         kref_put(&ctx->refcount, cifs_aio_ctx_release);
4157                         return -ENOMEM;
4158                 }
4159         } else {
4160                 /*
4161                  * Otherwise, we just pass the iterator down as-is and rely on
4162                  * the caller to make sure the pages referred to by the
4163                  * iterator don't evaporate.
4164                  */
4165                 ctx->iter = *to;
4166         }
4167
4168         if (direct) {
4169                 rc = filemap_write_and_wait_range(file->f_inode->i_mapping,
4170                                                   offset, offset + len - 1);
4171                 if (rc) {
4172                         kref_put(&ctx->refcount, cifs_aio_ctx_release);
4173                         return -EAGAIN;
4174                 }
4175         }
4176
4177         /* grab a lock here due to read response handlers can access ctx */
4178         mutex_lock(&ctx->aio_mutex);
4179
4180         rc = cifs_send_async_read(offset, len, cfile, cifs_sb, &ctx->list, ctx);
4181
4182         /* if at least one read request send succeeded, then reset rc */
4183         if (!list_empty(&ctx->list))
4184                 rc = 0;
4185
4186         mutex_unlock(&ctx->aio_mutex);
4187
4188         if (rc) {
4189                 kref_put(&ctx->refcount, cifs_aio_ctx_release);
4190                 return rc;
4191         }
4192
4193         if (!is_sync_kiocb(iocb)) {
4194                 kref_put(&ctx->refcount, cifs_aio_ctx_release);
4195                 return -EIOCBQUEUED;
4196         }
4197
4198         rc = wait_for_completion_killable(&ctx->done);
4199         if (rc) {
4200                 mutex_lock(&ctx->aio_mutex);
4201                 ctx->rc = rc = -EINTR;
4202                 total_read = ctx->total_len;
4203                 mutex_unlock(&ctx->aio_mutex);
4204         } else {
4205                 rc = ctx->rc;
4206                 total_read = ctx->total_len;
4207         }
4208
4209         kref_put(&ctx->refcount, cifs_aio_ctx_release);
4210
4211         if (total_read) {
4212                 iocb->ki_pos += total_read;
4213                 return total_read;
4214         }
4215         return rc;
4216 }
4217
4218 ssize_t cifs_direct_readv(struct kiocb *iocb, struct iov_iter *to)
4219 {
4220         return __cifs_readv(iocb, to, true);
4221 }
4222
4223 ssize_t cifs_user_readv(struct kiocb *iocb, struct iov_iter *to)
4224 {
4225         return __cifs_readv(iocb, to, false);
4226 }
4227
4228 ssize_t
4229 cifs_strict_readv(struct kiocb *iocb, struct iov_iter *to)
4230 {
4231         struct inode *inode = file_inode(iocb->ki_filp);
4232         struct cifsInodeInfo *cinode = CIFS_I(inode);
4233         struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
4234         struct cifsFileInfo *cfile = (struct cifsFileInfo *)
4235                                                 iocb->ki_filp->private_data;
4236         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
4237         int rc = -EACCES;
4238
4239         /*
4240          * In strict cache mode we need to read from the server all the time
4241          * if we don't have level II oplock because the server can delay mtime
4242          * change - so we can't make a decision about inode invalidating.
4243          * And we can also fail with pagereading if there are mandatory locks
4244          * on pages affected by this read but not on the region from pos to
4245          * pos+len-1.
4246          */
4247         if (!CIFS_CACHE_READ(cinode))
4248                 return cifs_user_readv(iocb, to);
4249
4250         if (cap_unix(tcon->ses) &&
4251             (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
4252             ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
4253                 return generic_file_read_iter(iocb, to);
4254
4255         /*
4256          * We need to hold the sem to be sure nobody modifies lock list
4257          * with a brlock that prevents reading.
4258          */
4259         down_read(&cinode->lock_sem);
4260         if (!cifs_find_lock_conflict(cfile, iocb->ki_pos, iov_iter_count(to),
4261                                      tcon->ses->server->vals->shared_lock_type,
4262                                      0, NULL, CIFS_READ_OP))
4263                 rc = generic_file_read_iter(iocb, to);
4264         up_read(&cinode->lock_sem);
4265         return rc;
4266 }
4267
4268 static ssize_t
4269 cifs_read(struct file *file, char *read_data, size_t read_size, loff_t *offset)
4270 {
4271         int rc = -EACCES;
4272         unsigned int bytes_read = 0;
4273         unsigned int total_read;
4274         unsigned int current_read_size;
4275         unsigned int rsize;
4276         struct cifs_sb_info *cifs_sb;
4277         struct cifs_tcon *tcon;
4278         struct TCP_Server_Info *server;
4279         unsigned int xid;
4280         char *cur_offset;
4281         struct cifsFileInfo *open_file;
4282         struct cifs_io_parms io_parms = {0};
4283         int buf_type = CIFS_NO_BUFFER;
4284         __u32 pid;
4285
4286         xid = get_xid();
4287         cifs_sb = CIFS_FILE_SB(file);
4288
4289         /* FIXME: set up handlers for larger reads and/or convert to async */
4290         rsize = min_t(unsigned int, cifs_sb->ctx->rsize, CIFSMaxBufSize);
4291
4292         if (file->private_data == NULL) {
4293                 rc = -EBADF;
4294                 free_xid(xid);
4295                 return rc;
4296         }
4297         open_file = file->private_data;
4298         tcon = tlink_tcon(open_file->tlink);
4299         server = cifs_pick_channel(tcon->ses);
4300
4301         if (!server->ops->sync_read) {
4302                 free_xid(xid);
4303                 return -ENOSYS;
4304         }
4305
4306         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
4307                 pid = open_file->pid;
4308         else
4309                 pid = current->tgid;
4310
4311         if ((file->f_flags & O_ACCMODE) == O_WRONLY)
4312                 cifs_dbg(FYI, "attempting read on write only file instance\n");
4313
4314         for (total_read = 0, cur_offset = read_data; read_size > total_read;
4315              total_read += bytes_read, cur_offset += bytes_read) {
4316                 do {
4317                         current_read_size = min_t(uint, read_size - total_read,
4318                                                   rsize);
4319                         /*
4320                          * For windows me and 9x we do not want to request more
4321                          * than it negotiated since it will refuse the read
4322                          * then.
4323                          */
4324                         if (!(tcon->ses->capabilities &
4325                                 tcon->ses->server->vals->cap_large_files)) {
4326                                 current_read_size = min_t(uint,
4327                                         current_read_size, CIFSMaxBufSize);
4328                         }
4329                         if (open_file->invalidHandle) {
4330                                 rc = cifs_reopen_file(open_file, true);
4331                                 if (rc != 0)
4332                                         break;
4333                         }
4334                         io_parms.pid = pid;
4335                         io_parms.tcon = tcon;
4336                         io_parms.offset = *offset;
4337                         io_parms.length = current_read_size;
4338                         io_parms.server = server;
4339                         rc = server->ops->sync_read(xid, &open_file->fid, &io_parms,
4340                                                     &bytes_read, &cur_offset,
4341                                                     &buf_type);
4342                 } while (rc == -EAGAIN);
4343
4344                 if (rc || (bytes_read == 0)) {
4345                         if (total_read) {
4346                                 break;
4347                         } else {
4348                                 free_xid(xid);
4349                                 return rc;
4350                         }
4351                 } else {
4352                         cifs_stats_bytes_read(tcon, total_read);
4353                         *offset += bytes_read;
4354                 }
4355         }
4356         free_xid(xid);
4357         return total_read;
4358 }
4359
4360 /*
4361  * If the page is mmap'ed into a process' page tables, then we need to make
4362  * sure that it doesn't change while being written back.
4363  */
4364 static vm_fault_t cifs_page_mkwrite(struct vm_fault *vmf)
4365 {
4366         struct folio *folio = page_folio(vmf->page);
4367
4368         /* Wait for the folio to be written to the cache before we allow it to
4369          * be modified.  We then assume the entire folio will need writing back.
4370          */
4371 #ifdef CONFIG_CIFS_FSCACHE
4372         if (folio_test_fscache(folio) &&
4373             folio_wait_fscache_killable(folio) < 0)
4374                 return VM_FAULT_RETRY;
4375 #endif
4376
4377         folio_wait_writeback(folio);
4378
4379         if (folio_lock_killable(folio) < 0)
4380                 return VM_FAULT_RETRY;
4381         return VM_FAULT_LOCKED;
4382 }
4383
4384 static const struct vm_operations_struct cifs_file_vm_ops = {
4385         .fault = filemap_fault,
4386         .map_pages = filemap_map_pages,
4387         .page_mkwrite = cifs_page_mkwrite,
4388 };
4389
4390 int cifs_file_strict_mmap(struct file *file, struct vm_area_struct *vma)
4391 {
4392         int xid, rc = 0;
4393         struct inode *inode = file_inode(file);
4394
4395         xid = get_xid();
4396
4397         if (!CIFS_CACHE_READ(CIFS_I(inode)))
4398                 rc = cifs_zap_mapping(inode);
4399         if (!rc)
4400                 rc = generic_file_mmap(file, vma);
4401         if (!rc)
4402                 vma->vm_ops = &cifs_file_vm_ops;
4403
4404         free_xid(xid);
4405         return rc;
4406 }
4407
4408 int cifs_file_mmap(struct file *file, struct vm_area_struct *vma)
4409 {
4410         int rc, xid;
4411
4412         xid = get_xid();
4413
4414         rc = cifs_revalidate_file(file);
4415         if (rc)
4416                 cifs_dbg(FYI, "Validation prior to mmap failed, error=%d\n",
4417                          rc);
4418         if (!rc)
4419                 rc = generic_file_mmap(file, vma);
4420         if (!rc)
4421                 vma->vm_ops = &cifs_file_vm_ops;
4422
4423         free_xid(xid);
4424         return rc;
4425 }
4426
4427 /*
4428  * Unlock a bunch of folios in the pagecache.
4429  */
4430 static void cifs_unlock_folios(struct address_space *mapping, pgoff_t first, pgoff_t last)
4431 {
4432         struct folio *folio;
4433         XA_STATE(xas, &mapping->i_pages, first);
4434
4435         rcu_read_lock();
4436         xas_for_each(&xas, folio, last) {
4437                 folio_unlock(folio);
4438         }
4439         rcu_read_unlock();
4440 }
4441
4442 static void cifs_readahead_complete(struct work_struct *work)
4443 {
4444         struct cifs_readdata *rdata = container_of(work,
4445                                                    struct cifs_readdata, work);
4446         struct folio *folio;
4447         pgoff_t last;
4448         bool good = rdata->result == 0 || (rdata->result == -EAGAIN && rdata->got_bytes);
4449
4450         XA_STATE(xas, &rdata->mapping->i_pages, rdata->offset / PAGE_SIZE);
4451
4452         if (good)
4453                 cifs_readahead_to_fscache(rdata->mapping->host,
4454                                           rdata->offset, rdata->bytes);
4455
4456         if (iov_iter_count(&rdata->iter) > 0)
4457                 iov_iter_zero(iov_iter_count(&rdata->iter), &rdata->iter);
4458
4459         last = (rdata->offset + rdata->bytes - 1) / PAGE_SIZE;
4460
4461         rcu_read_lock();
4462         xas_for_each(&xas, folio, last) {
4463                 if (good) {
4464                         flush_dcache_folio(folio);
4465                         folio_mark_uptodate(folio);
4466                 }
4467                 folio_unlock(folio);
4468         }
4469         rcu_read_unlock();
4470
4471         kref_put(&rdata->refcount, cifs_readdata_release);
4472 }
4473
4474 static void cifs_readahead(struct readahead_control *ractl)
4475 {
4476         struct cifsFileInfo *open_file = ractl->file->private_data;
4477         struct cifs_sb_info *cifs_sb = CIFS_FILE_SB(ractl->file);
4478         struct TCP_Server_Info *server;
4479         unsigned int xid, nr_pages, cache_nr_pages = 0;
4480         unsigned int ra_pages;
4481         pgoff_t next_cached = ULONG_MAX, ra_index;
4482         bool caching = fscache_cookie_enabled(cifs_inode_cookie(ractl->mapping->host)) &&
4483                 cifs_inode_cookie(ractl->mapping->host)->cache_priv;
4484         bool check_cache = caching;
4485         pid_t pid;
4486         int rc = 0;
4487
4488         /* Note that readahead_count() lags behind our dequeuing of pages from
4489          * the ractl, wo we have to keep track for ourselves.
4490          */
4491         ra_pages = readahead_count(ractl);
4492         ra_index = readahead_index(ractl);
4493
4494         xid = get_xid();
4495
4496         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
4497                 pid = open_file->pid;
4498         else
4499                 pid = current->tgid;
4500
4501         server = cifs_pick_channel(tlink_tcon(open_file->tlink)->ses);
4502
4503         cifs_dbg(FYI, "%s: file=%p mapping=%p num_pages=%u\n",
4504                  __func__, ractl->file, ractl->mapping, ra_pages);
4505
4506         /*
4507          * Chop the readahead request up into rsize-sized read requests.
4508          */
4509         while ((nr_pages = ra_pages)) {
4510                 unsigned int i, rsize;
4511                 struct cifs_readdata *rdata;
4512                 struct cifs_credits credits_on_stack;
4513                 struct cifs_credits *credits = &credits_on_stack;
4514                 struct folio *folio;
4515                 pgoff_t fsize;
4516
4517                 /*
4518                  * Find out if we have anything cached in the range of
4519                  * interest, and if so, where the next chunk of cached data is.
4520                  */
4521                 if (caching) {
4522                         if (check_cache) {
4523                                 rc = cifs_fscache_query_occupancy(
4524                                         ractl->mapping->host, ra_index, nr_pages,
4525                                         &next_cached, &cache_nr_pages);
4526                                 if (rc < 0)
4527                                         caching = false;
4528                                 check_cache = false;
4529                         }
4530
4531                         if (ra_index == next_cached) {
4532                                 /*
4533                                  * TODO: Send a whole batch of pages to be read
4534                                  * by the cache.
4535                                  */
4536                                 folio = readahead_folio(ractl);
4537                                 fsize = folio_nr_pages(folio);
4538                                 ra_pages -= fsize;
4539                                 ra_index += fsize;
4540                                 if (cifs_readpage_from_fscache(ractl->mapping->host,
4541                                                                &folio->page) < 0) {
4542                                         /*
4543                                          * TODO: Deal with cache read failure
4544                                          * here, but for the moment, delegate
4545                                          * that to readpage.
4546                                          */
4547                                         caching = false;
4548                                 }
4549                                 folio_unlock(folio);
4550                                 next_cached += fsize;
4551                                 cache_nr_pages -= fsize;
4552                                 if (cache_nr_pages == 0)
4553                                         check_cache = true;
4554                                 continue;
4555                         }
4556                 }
4557
4558                 if (open_file->invalidHandle) {
4559                         rc = cifs_reopen_file(open_file, true);
4560                         if (rc) {
4561                                 if (rc == -EAGAIN)
4562                                         continue;
4563                                 break;
4564                         }
4565                 }
4566
4567                 if (cifs_sb->ctx->rsize == 0)
4568                         cifs_sb->ctx->rsize =
4569                                 server->ops->negotiate_rsize(tlink_tcon(open_file->tlink),
4570                                                              cifs_sb->ctx);
4571
4572                 rc = server->ops->wait_mtu_credits(server, cifs_sb->ctx->rsize,
4573                                                    &rsize, credits);
4574                 if (rc)
4575                         break;
4576                 nr_pages = min_t(size_t, rsize / PAGE_SIZE, ra_pages);
4577                 if (next_cached != ULONG_MAX)
4578                         nr_pages = min_t(size_t, nr_pages, next_cached - ra_index);
4579
4580                 /*
4581                  * Give up immediately if rsize is too small to read an entire
4582                  * page. The VFS will fall back to readpage. We should never
4583                  * reach this point however since we set ra_pages to 0 when the
4584                  * rsize is smaller than a cache page.
4585                  */
4586                 if (unlikely(!nr_pages)) {
4587                         add_credits_and_wake_if(server, credits, 0);
4588                         break;
4589                 }
4590
4591                 rdata = cifs_readdata_alloc(cifs_readahead_complete);
4592                 if (!rdata) {
4593                         /* best to give up if we're out of mem */
4594                         add_credits_and_wake_if(server, credits, 0);
4595                         break;
4596                 }
4597
4598                 rdata->offset   = ra_index * PAGE_SIZE;
4599                 rdata->bytes    = nr_pages * PAGE_SIZE;
4600                 rdata->cfile    = cifsFileInfo_get(open_file);
4601                 rdata->server   = server;
4602                 rdata->mapping  = ractl->mapping;
4603                 rdata->pid      = pid;
4604                 rdata->credits  = credits_on_stack;
4605
4606                 for (i = 0; i < nr_pages; i++) {
4607                         if (!readahead_folio(ractl))
4608                                 WARN_ON(1);
4609                 }
4610                 ra_pages -= nr_pages;
4611                 ra_index += nr_pages;
4612
4613                 iov_iter_xarray(&rdata->iter, ITER_DEST, &rdata->mapping->i_pages,
4614                                 rdata->offset, rdata->bytes);
4615
4616                 rc = adjust_credits(server, &rdata->credits, rdata->bytes);
4617                 if (!rc) {
4618                         if (rdata->cfile->invalidHandle)
4619                                 rc = -EAGAIN;
4620                         else
4621                                 rc = server->ops->async_readv(rdata);
4622                 }
4623
4624                 if (rc) {
4625                         add_credits_and_wake_if(server, &rdata->credits, 0);
4626                         cifs_unlock_folios(rdata->mapping,
4627                                            rdata->offset / PAGE_SIZE,
4628                                            (rdata->offset + rdata->bytes - 1) / PAGE_SIZE);
4629                         /* Fallback to the readpage in error/reconnect cases */
4630                         kref_put(&rdata->refcount, cifs_readdata_release);
4631                         break;
4632                 }
4633
4634                 kref_put(&rdata->refcount, cifs_readdata_release);
4635         }
4636
4637         free_xid(xid);
4638 }
4639
4640 /*
4641  * cifs_readpage_worker must be called with the page pinned
4642  */
4643 static int cifs_readpage_worker(struct file *file, struct page *page,
4644         loff_t *poffset)
4645 {
4646         char *read_data;
4647         int rc;
4648
4649         /* Is the page cached? */
4650         rc = cifs_readpage_from_fscache(file_inode(file), page);
4651         if (rc == 0)
4652                 goto read_complete;
4653
4654         read_data = kmap(page);
4655         /* for reads over a certain size could initiate async read ahead */
4656
4657         rc = cifs_read(file, read_data, PAGE_SIZE, poffset);
4658
4659         if (rc < 0)
4660                 goto io_error;
4661         else
4662                 cifs_dbg(FYI, "Bytes read %d\n", rc);
4663
4664         /* we do not want atime to be less than mtime, it broke some apps */
4665         file_inode(file)->i_atime = current_time(file_inode(file));
4666         if (timespec64_compare(&(file_inode(file)->i_atime), &(file_inode(file)->i_mtime)))
4667                 file_inode(file)->i_atime = file_inode(file)->i_mtime;
4668         else
4669                 file_inode(file)->i_atime = current_time(file_inode(file));
4670
4671         if (PAGE_SIZE > rc)
4672                 memset(read_data + rc, 0, PAGE_SIZE - rc);
4673
4674         flush_dcache_page(page);
4675         SetPageUptodate(page);
4676         rc = 0;
4677
4678 io_error:
4679         kunmap(page);
4680         unlock_page(page);
4681
4682 read_complete:
4683         return rc;
4684 }
4685
4686 static int cifs_read_folio(struct file *file, struct folio *folio)
4687 {
4688         struct page *page = &folio->page;
4689         loff_t offset = page_file_offset(page);
4690         int rc = -EACCES;
4691         unsigned int xid;
4692
4693         xid = get_xid();
4694
4695         if (file->private_data == NULL) {
4696                 rc = -EBADF;
4697                 free_xid(xid);
4698                 return rc;
4699         }
4700
4701         cifs_dbg(FYI, "read_folio %p at offset %d 0x%x\n",
4702                  page, (int)offset, (int)offset);
4703
4704         rc = cifs_readpage_worker(file, page, &offset);
4705
4706         free_xid(xid);
4707         return rc;
4708 }
4709
4710 static int is_inode_writable(struct cifsInodeInfo *cifs_inode)
4711 {
4712         struct cifsFileInfo *open_file;
4713
4714         spin_lock(&cifs_inode->open_file_lock);
4715         list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
4716                 if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) {
4717                         spin_unlock(&cifs_inode->open_file_lock);
4718                         return 1;
4719                 }
4720         }
4721         spin_unlock(&cifs_inode->open_file_lock);
4722         return 0;
4723 }
4724
4725 /* We do not want to update the file size from server for inodes
4726    open for write - to avoid races with writepage extending
4727    the file - in the future we could consider allowing
4728    refreshing the inode only on increases in the file size
4729    but this is tricky to do without racing with writebehind
4730    page caching in the current Linux kernel design */
4731 bool is_size_safe_to_change(struct cifsInodeInfo *cifsInode, __u64 end_of_file)
4732 {
4733         if (!cifsInode)
4734                 return true;
4735
4736         if (is_inode_writable(cifsInode)) {
4737                 /* This inode is open for write at least once */
4738                 struct cifs_sb_info *cifs_sb;
4739
4740                 cifs_sb = CIFS_SB(cifsInode->netfs.inode.i_sb);
4741                 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DIRECT_IO) {
4742                         /* since no page cache to corrupt on directio
4743                         we can change size safely */
4744                         return true;
4745                 }
4746
4747                 if (i_size_read(&cifsInode->netfs.inode) < end_of_file)
4748                         return true;
4749
4750                 return false;
4751         } else
4752                 return true;
4753 }
4754
4755 static int cifs_write_begin(struct file *file, struct address_space *mapping,
4756                         loff_t pos, unsigned len,
4757                         struct page **pagep, void **fsdata)
4758 {
4759         int oncethru = 0;
4760         pgoff_t index = pos >> PAGE_SHIFT;
4761         loff_t offset = pos & (PAGE_SIZE - 1);
4762         loff_t page_start = pos & PAGE_MASK;
4763         loff_t i_size;
4764         struct page *page;
4765         int rc = 0;
4766
4767         cifs_dbg(FYI, "write_begin from %lld len %d\n", (long long)pos, len);
4768
4769 start:
4770         page = grab_cache_page_write_begin(mapping, index);
4771         if (!page) {
4772                 rc = -ENOMEM;
4773                 goto out;
4774         }
4775
4776         if (PageUptodate(page))
4777                 goto out;
4778
4779         /*
4780          * If we write a full page it will be up to date, no need to read from
4781          * the server. If the write is short, we'll end up doing a sync write
4782          * instead.
4783          */
4784         if (len == PAGE_SIZE)
4785                 goto out;
4786
4787         /*
4788          * optimize away the read when we have an oplock, and we're not
4789          * expecting to use any of the data we'd be reading in. That
4790          * is, when the page lies beyond the EOF, or straddles the EOF
4791          * and the write will cover all of the existing data.
4792          */
4793         if (CIFS_CACHE_READ(CIFS_I(mapping->host))) {
4794                 i_size = i_size_read(mapping->host);
4795                 if (page_start >= i_size ||
4796                     (offset == 0 && (pos + len) >= i_size)) {
4797                         zero_user_segments(page, 0, offset,
4798                                            offset + len,
4799                                            PAGE_SIZE);
4800                         /*
4801                          * PageChecked means that the parts of the page
4802                          * to which we're not writing are considered up
4803                          * to date. Once the data is copied to the
4804                          * page, it can be set uptodate.
4805                          */
4806                         SetPageChecked(page);
4807                         goto out;
4808                 }
4809         }
4810
4811         if ((file->f_flags & O_ACCMODE) != O_WRONLY && !oncethru) {
4812                 /*
4813                  * might as well read a page, it is fast enough. If we get
4814                  * an error, we don't need to return it. cifs_write_end will
4815                  * do a sync write instead since PG_uptodate isn't set.
4816                  */
4817                 cifs_readpage_worker(file, page, &page_start);
4818                 put_page(page);
4819                 oncethru = 1;
4820                 goto start;
4821         } else {
4822                 /* we could try using another file handle if there is one -
4823                    but how would we lock it to prevent close of that handle
4824                    racing with this read? In any case
4825                    this will be written out by write_end so is fine */
4826         }
4827 out:
4828         *pagep = page;
4829         return rc;
4830 }
4831
4832 static bool cifs_release_folio(struct folio *folio, gfp_t gfp)
4833 {
4834         if (folio_test_private(folio))
4835                 return 0;
4836         if (folio_test_fscache(folio)) {
4837                 if (current_is_kswapd() || !(gfp & __GFP_FS))
4838                         return false;
4839                 folio_wait_fscache(folio);
4840         }
4841         fscache_note_page_release(cifs_inode_cookie(folio->mapping->host));
4842         return true;
4843 }
4844
4845 static void cifs_invalidate_folio(struct folio *folio, size_t offset,
4846                                  size_t length)
4847 {
4848         folio_wait_fscache(folio);
4849 }
4850
4851 static int cifs_launder_folio(struct folio *folio)
4852 {
4853         int rc = 0;
4854         loff_t range_start = folio_pos(folio);
4855         loff_t range_end = range_start + folio_size(folio);
4856         struct writeback_control wbc = {
4857                 .sync_mode = WB_SYNC_ALL,
4858                 .nr_to_write = 0,
4859                 .range_start = range_start,
4860                 .range_end = range_end,
4861         };
4862
4863         cifs_dbg(FYI, "Launder page: %lu\n", folio->index);
4864
4865         if (folio_clear_dirty_for_io(folio))
4866                 rc = cifs_writepage_locked(&folio->page, &wbc);
4867
4868         folio_wait_fscache(folio);
4869         return rc;
4870 }
4871
4872 void cifs_oplock_break(struct work_struct *work)
4873 {
4874         struct cifsFileInfo *cfile = container_of(work, struct cifsFileInfo,
4875                                                   oplock_break);
4876         struct inode *inode = d_inode(cfile->dentry);
4877         struct cifsInodeInfo *cinode = CIFS_I(inode);
4878         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
4879         struct TCP_Server_Info *server = tcon->ses->server;
4880         int rc = 0;
4881         bool purge_cache = false;
4882
4883         wait_on_bit(&cinode->flags, CIFS_INODE_PENDING_WRITERS,
4884                         TASK_UNINTERRUPTIBLE);
4885
4886         server->ops->downgrade_oplock(server, cinode, cfile->oplock_level,
4887                                       cfile->oplock_epoch, &purge_cache);
4888
4889         if (!CIFS_CACHE_WRITE(cinode) && CIFS_CACHE_READ(cinode) &&
4890                                                 cifs_has_mand_locks(cinode)) {
4891                 cifs_dbg(FYI, "Reset oplock to None for inode=%p due to mand locks\n",
4892                          inode);
4893                 cinode->oplock = 0;
4894         }
4895
4896         if (inode && S_ISREG(inode->i_mode)) {
4897                 if (CIFS_CACHE_READ(cinode))
4898                         break_lease(inode, O_RDONLY);
4899                 else
4900                         break_lease(inode, O_WRONLY);
4901                 rc = filemap_fdatawrite(inode->i_mapping);
4902                 if (!CIFS_CACHE_READ(cinode) || purge_cache) {
4903                         rc = filemap_fdatawait(inode->i_mapping);
4904                         mapping_set_error(inode->i_mapping, rc);
4905                         cifs_zap_mapping(inode);
4906                 }
4907                 cifs_dbg(FYI, "Oplock flush inode %p rc %d\n", inode, rc);
4908                 if (CIFS_CACHE_WRITE(cinode))
4909                         goto oplock_break_ack;
4910         }
4911
4912         rc = cifs_push_locks(cfile);
4913         if (rc)
4914                 cifs_dbg(VFS, "Push locks rc = %d\n", rc);
4915
4916 oplock_break_ack:
4917         /*
4918          * releasing stale oplock after recent reconnect of smb session using
4919          * a now incorrect file handle is not a data integrity issue but do
4920          * not bother sending an oplock release if session to server still is
4921          * disconnected since oplock already released by the server
4922          */
4923         if (!cfile->oplock_break_cancelled) {
4924                 rc = tcon->ses->server->ops->oplock_response(tcon, &cfile->fid,
4925                                                              cinode);
4926                 cifs_dbg(FYI, "Oplock release rc = %d\n", rc);
4927         }
4928
4929         _cifsFileInfo_put(cfile, false /* do not wait for ourself */, false);
4930         cifs_done_oplock_break(cinode);
4931 }
4932
4933 /*
4934  * The presence of cifs_direct_io() in the address space ops vector
4935  * allowes open() O_DIRECT flags which would have failed otherwise.
4936  *
4937  * In the non-cached mode (mount with cache=none), we shunt off direct read and write requests
4938  * so this method should never be called.
4939  *
4940  * Direct IO is not yet supported in the cached mode.
4941  */
4942 static ssize_t
4943 cifs_direct_io(struct kiocb *iocb, struct iov_iter *iter)
4944 {
4945         /*
4946          * FIXME
4947          * Eventually need to support direct IO for non forcedirectio mounts
4948          */
4949         return -EINVAL;
4950 }
4951
4952 static int cifs_swap_activate(struct swap_info_struct *sis,
4953                               struct file *swap_file, sector_t *span)
4954 {
4955         struct cifsFileInfo *cfile = swap_file->private_data;
4956         struct inode *inode = swap_file->f_mapping->host;
4957         unsigned long blocks;
4958         long long isize;
4959
4960         cifs_dbg(FYI, "swap activate\n");
4961
4962         if (!swap_file->f_mapping->a_ops->swap_rw)
4963                 /* Cannot support swap */
4964                 return -EINVAL;
4965
4966         spin_lock(&inode->i_lock);
4967         blocks = inode->i_blocks;
4968         isize = inode->i_size;
4969         spin_unlock(&inode->i_lock);
4970         if (blocks*512 < isize) {
4971                 pr_warn("swap activate: swapfile has holes\n");
4972                 return -EINVAL;
4973         }
4974         *span = sis->pages;
4975
4976         pr_warn_once("Swap support over SMB3 is experimental\n");
4977
4978         /*
4979          * TODO: consider adding ACL (or documenting how) to prevent other
4980          * users (on this or other systems) from reading it
4981          */
4982
4983
4984         /* TODO: add sk_set_memalloc(inet) or similar */
4985
4986         if (cfile)
4987                 cfile->swapfile = true;
4988         /*
4989          * TODO: Since file already open, we can't open with DENY_ALL here
4990          * but we could add call to grab a byte range lock to prevent others
4991          * from reading or writing the file
4992          */
4993
4994         sis->flags |= SWP_FS_OPS;
4995         return add_swap_extent(sis, 0, sis->max, 0);
4996 }
4997
4998 static void cifs_swap_deactivate(struct file *file)
4999 {
5000         struct cifsFileInfo *cfile = file->private_data;
5001
5002         cifs_dbg(FYI, "swap deactivate\n");
5003
5004         /* TODO: undo sk_set_memalloc(inet) will eventually be needed */
5005
5006         if (cfile)
5007                 cfile->swapfile = false;
5008
5009         /* do we need to unpin (or unlock) the file */
5010 }
5011
5012 /*
5013  * Mark a page as having been made dirty and thus needing writeback.  We also
5014  * need to pin the cache object to write back to.
5015  */
5016 #ifdef CONFIG_CIFS_FSCACHE
5017 static bool cifs_dirty_folio(struct address_space *mapping, struct folio *folio)
5018 {
5019         return fscache_dirty_folio(mapping, folio,
5020                                         cifs_inode_cookie(mapping->host));
5021 }
5022 #else
5023 #define cifs_dirty_folio filemap_dirty_folio
5024 #endif
5025
5026 const struct address_space_operations cifs_addr_ops = {
5027         .read_folio = cifs_read_folio,
5028         .readahead = cifs_readahead,
5029         .writepages = cifs_writepages,
5030         .write_begin = cifs_write_begin,
5031         .write_end = cifs_write_end,
5032         .dirty_folio = cifs_dirty_folio,
5033         .release_folio = cifs_release_folio,
5034         .direct_IO = cifs_direct_io,
5035         .invalidate_folio = cifs_invalidate_folio,
5036         .launder_folio = cifs_launder_folio,
5037         .migrate_folio = filemap_migrate_folio,
5038         /*
5039          * TODO: investigate and if useful we could add an is_dirty_writeback
5040          * helper if needed
5041          */
5042         .swap_activate = cifs_swap_activate,
5043         .swap_deactivate = cifs_swap_deactivate,
5044 };
5045
5046 /*
5047  * cifs_readahead requires the server to support a buffer large enough to
5048  * contain the header plus one complete page of data.  Otherwise, we need
5049  * to leave cifs_readahead out of the address space operations.
5050  */
5051 const struct address_space_operations cifs_addr_ops_smallbuf = {
5052         .read_folio = cifs_read_folio,
5053         .writepages = cifs_writepages,
5054         .write_begin = cifs_write_begin,
5055         .write_end = cifs_write_end,
5056         .dirty_folio = cifs_dirty_folio,
5057         .release_folio = cifs_release_folio,
5058         .invalidate_folio = cifs_invalidate_folio,
5059         .launder_folio = cifs_launder_folio,
5060         .migrate_folio = filemap_migrate_folio,
5061 };
5062
5063 /*
5064  * Splice data from a file into a pipe.
5065  */
5066 ssize_t cifs_splice_read(struct file *in, loff_t *ppos,
5067                          struct pipe_inode_info *pipe, size_t len,
5068                          unsigned int flags)
5069 {
5070         if (unlikely(*ppos >= file_inode(in)->i_sb->s_maxbytes))
5071                 return 0;
5072         if (unlikely(!len))
5073                 return 0;
5074         if (in->f_flags & O_DIRECT)
5075                 return direct_splice_read(in, ppos, pipe, len, flags);
5076         return filemap_splice_read(in, ppos, pipe, len, flags);
5077 }