4 * vfs operations that deal with files
6 * Copyright (C) International Business Machines Corp., 2002,2010
7 * Author(s): Steve French (sfrench@us.ibm.com)
8 * Jeremy Allison (jra@samba.org)
10 * This library is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU Lesser General Public License as published
12 * by the Free Software Foundation; either version 2.1 of the License, or
13 * (at your option) any later version.
15 * This library is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
18 * the GNU Lesser General Public License for more details.
20 * You should have received a copy of the GNU Lesser General Public License
21 * along with this library; if not, write to the Free Software
22 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
25 #include <linux/backing-dev.h>
26 #include <linux/stat.h>
27 #include <linux/fcntl.h>
28 #include <linux/pagemap.h>
29 #include <linux/pagevec.h>
30 #include <linux/writeback.h>
31 #include <linux/task_io_accounting_ops.h>
32 #include <linux/delay.h>
33 #include <linux/mount.h>
34 #include <linux/slab.h>
35 #include <linux/swap.h>
37 #include <asm/div64.h>
41 #include "cifsproto.h"
42 #include "cifs_unicode.h"
43 #include "cifs_debug.h"
44 #include "cifs_fs_sb.h"
46 #include "smbdirect.h"
47 #include "fs_context.h"
49 static inline int cifs_convert_flags(unsigned int flags)
51 if ((flags & O_ACCMODE) == O_RDONLY)
53 else if ((flags & O_ACCMODE) == O_WRONLY)
55 else if ((flags & O_ACCMODE) == O_RDWR) {
56 /* GENERIC_ALL is too much permission to request
57 can cause unnecessary access denied on create */
58 /* return GENERIC_ALL; */
59 return (GENERIC_READ | GENERIC_WRITE);
62 return (READ_CONTROL | FILE_WRITE_ATTRIBUTES | FILE_READ_ATTRIBUTES |
63 FILE_WRITE_EA | FILE_APPEND_DATA | FILE_WRITE_DATA |
67 static u32 cifs_posix_convert_flags(unsigned int flags)
71 if ((flags & O_ACCMODE) == O_RDONLY)
72 posix_flags = SMB_O_RDONLY;
73 else if ((flags & O_ACCMODE) == O_WRONLY)
74 posix_flags = SMB_O_WRONLY;
75 else if ((flags & O_ACCMODE) == O_RDWR)
76 posix_flags = SMB_O_RDWR;
78 if (flags & O_CREAT) {
79 posix_flags |= SMB_O_CREAT;
81 posix_flags |= SMB_O_EXCL;
82 } else if (flags & O_EXCL)
83 cifs_dbg(FYI, "Application %s pid %d has incorrectly set O_EXCL flag but not O_CREAT on file open. Ignoring O_EXCL\n",
84 current->comm, current->tgid);
87 posix_flags |= SMB_O_TRUNC;
88 /* be safe and imply O_SYNC for O_DSYNC */
90 posix_flags |= SMB_O_SYNC;
91 if (flags & O_DIRECTORY)
92 posix_flags |= SMB_O_DIRECTORY;
93 if (flags & O_NOFOLLOW)
94 posix_flags |= SMB_O_NOFOLLOW;
96 posix_flags |= SMB_O_DIRECT;
101 static inline int cifs_get_disposition(unsigned int flags)
103 if ((flags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL))
105 else if ((flags & (O_CREAT | O_TRUNC)) == (O_CREAT | O_TRUNC))
106 return FILE_OVERWRITE_IF;
107 else if ((flags & O_CREAT) == O_CREAT)
109 else if ((flags & O_TRUNC) == O_TRUNC)
110 return FILE_OVERWRITE;
115 int cifs_posix_open(char *full_path, struct inode **pinode,
116 struct super_block *sb, int mode, unsigned int f_flags,
117 __u32 *poplock, __u16 *pnetfid, unsigned int xid)
120 FILE_UNIX_BASIC_INFO *presp_data;
121 __u32 posix_flags = 0;
122 struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
123 struct cifs_fattr fattr;
124 struct tcon_link *tlink;
125 struct cifs_tcon *tcon;
127 cifs_dbg(FYI, "posix open %s\n", full_path);
129 presp_data = kzalloc(sizeof(FILE_UNIX_BASIC_INFO), GFP_KERNEL);
130 if (presp_data == NULL)
133 tlink = cifs_sb_tlink(cifs_sb);
139 tcon = tlink_tcon(tlink);
140 mode &= ~current_umask();
142 posix_flags = cifs_posix_convert_flags(f_flags);
143 rc = CIFSPOSIXCreate(xid, tcon, posix_flags, mode, pnetfid, presp_data,
144 poplock, full_path, cifs_sb->local_nls,
145 cifs_remap(cifs_sb));
146 cifs_put_tlink(tlink);
151 if (presp_data->Type == cpu_to_le32(-1))
152 goto posix_open_ret; /* open ok, caller does qpathinfo */
155 goto posix_open_ret; /* caller does not need info */
157 cifs_unix_basic_to_fattr(&fattr, presp_data, cifs_sb);
159 /* get new inode and set it up */
160 if (*pinode == NULL) {
161 cifs_fill_uniqueid(sb, &fattr);
162 *pinode = cifs_iget(sb, &fattr);
168 cifs_fattr_to_inode(*pinode, &fattr);
177 cifs_nt_open(char *full_path, struct inode *inode, struct cifs_sb_info *cifs_sb,
178 struct cifs_tcon *tcon, unsigned int f_flags, __u32 *oplock,
179 struct cifs_fid *fid, unsigned int xid)
184 int create_options = CREATE_NOT_DIR;
186 struct TCP_Server_Info *server = tcon->ses->server;
187 struct cifs_open_parms oparms;
189 if (!server->ops->open)
192 desired_access = cifs_convert_flags(f_flags);
194 /*********************************************************************
195 * open flag mapping table:
197 * POSIX Flag CIFS Disposition
198 * ---------- ----------------
199 * O_CREAT FILE_OPEN_IF
200 * O_CREAT | O_EXCL FILE_CREATE
201 * O_CREAT | O_TRUNC FILE_OVERWRITE_IF
202 * O_TRUNC FILE_OVERWRITE
203 * none of the above FILE_OPEN
205 * Note that there is not a direct match between disposition
206 * FILE_SUPERSEDE (ie create whether or not file exists although
207 * O_CREAT | O_TRUNC is similar but truncates the existing
208 * file rather than creating a new file as FILE_SUPERSEDE does
209 * (which uses the attributes / metadata passed in on open call)
211 *? O_SYNC is a reasonable match to CIFS writethrough flag
212 *? and the read write flags match reasonably. O_LARGEFILE
213 *? is irrelevant because largefile support is always used
214 *? by this client. Flags O_APPEND, O_DIRECT, O_DIRECTORY,
215 * O_FASYNC, O_NOFOLLOW, O_NONBLOCK need further investigation
216 *********************************************************************/
218 disposition = cifs_get_disposition(f_flags);
220 /* BB pass O_SYNC flag through on file attributes .. BB */
222 buf = kmalloc(sizeof(FILE_ALL_INFO), GFP_KERNEL);
226 /* O_SYNC also has bit for O_DSYNC so following check picks up either */
227 if (f_flags & O_SYNC)
228 create_options |= CREATE_WRITE_THROUGH;
230 if (f_flags & O_DIRECT)
231 create_options |= CREATE_NO_BUFFER;
234 oparms.cifs_sb = cifs_sb;
235 oparms.desired_access = desired_access;
236 oparms.create_options = cifs_create_options(cifs_sb, create_options);
237 oparms.disposition = disposition;
238 oparms.path = full_path;
240 oparms.reconnect = false;
242 rc = server->ops->open(xid, &oparms, oplock, buf);
247 /* TODO: Add support for calling posix query info but with passing in fid */
249 rc = cifs_get_inode_info_unix(&inode, full_path, inode->i_sb,
252 rc = cifs_get_inode_info(&inode, full_path, buf, inode->i_sb,
256 server->ops->close(xid, tcon, fid);
267 cifs_has_mand_locks(struct cifsInodeInfo *cinode)
269 struct cifs_fid_locks *cur;
270 bool has_locks = false;
272 down_read(&cinode->lock_sem);
273 list_for_each_entry(cur, &cinode->llist, llist) {
274 if (!list_empty(&cur->locks)) {
279 up_read(&cinode->lock_sem);
284 cifs_down_write(struct rw_semaphore *sem)
286 while (!down_write_trylock(sem))
290 static void cifsFileInfo_put_work(struct work_struct *work);
292 struct cifsFileInfo *
293 cifs_new_fileinfo(struct cifs_fid *fid, struct file *file,
294 struct tcon_link *tlink, __u32 oplock)
296 struct dentry *dentry = file_dentry(file);
297 struct inode *inode = d_inode(dentry);
298 struct cifsInodeInfo *cinode = CIFS_I(inode);
299 struct cifsFileInfo *cfile;
300 struct cifs_fid_locks *fdlocks;
301 struct cifs_tcon *tcon = tlink_tcon(tlink);
302 struct TCP_Server_Info *server = tcon->ses->server;
304 cfile = kzalloc(sizeof(struct cifsFileInfo), GFP_KERNEL);
308 fdlocks = kzalloc(sizeof(struct cifs_fid_locks), GFP_KERNEL);
314 INIT_LIST_HEAD(&fdlocks->locks);
315 fdlocks->cfile = cfile;
316 cfile->llist = fdlocks;
319 cfile->pid = current->tgid;
320 cfile->uid = current_fsuid();
321 cfile->dentry = dget(dentry);
322 cfile->f_flags = file->f_flags;
323 cfile->invalidHandle = false;
324 cfile->tlink = cifs_get_tlink(tlink);
325 INIT_WORK(&cfile->oplock_break, cifs_oplock_break);
326 INIT_WORK(&cfile->put, cifsFileInfo_put_work);
327 mutex_init(&cfile->fh_mutex);
328 spin_lock_init(&cfile->file_info_lock);
330 cifs_sb_active(inode->i_sb);
333 * If the server returned a read oplock and we have mandatory brlocks,
334 * set oplock level to None.
336 if (server->ops->is_read_op(oplock) && cifs_has_mand_locks(cinode)) {
337 cifs_dbg(FYI, "Reset oplock val from read to None due to mand locks\n");
341 cifs_down_write(&cinode->lock_sem);
342 list_add(&fdlocks->llist, &cinode->llist);
343 up_write(&cinode->lock_sem);
345 spin_lock(&tcon->open_file_lock);
346 if (fid->pending_open->oplock != CIFS_OPLOCK_NO_CHANGE && oplock)
347 oplock = fid->pending_open->oplock;
348 list_del(&fid->pending_open->olist);
350 fid->purge_cache = false;
351 server->ops->set_fid(cfile, fid, oplock);
353 list_add(&cfile->tlist, &tcon->openFileList);
354 atomic_inc(&tcon->num_local_opens);
356 /* if readable file instance put first in list*/
357 spin_lock(&cinode->open_file_lock);
358 if (file->f_mode & FMODE_READ)
359 list_add(&cfile->flist, &cinode->openFileList);
361 list_add_tail(&cfile->flist, &cinode->openFileList);
362 spin_unlock(&cinode->open_file_lock);
363 spin_unlock(&tcon->open_file_lock);
365 if (fid->purge_cache)
366 cifs_zap_mapping(inode);
368 file->private_data = cfile;
372 struct cifsFileInfo *
373 cifsFileInfo_get(struct cifsFileInfo *cifs_file)
375 spin_lock(&cifs_file->file_info_lock);
376 cifsFileInfo_get_locked(cifs_file);
377 spin_unlock(&cifs_file->file_info_lock);
381 static void cifsFileInfo_put_final(struct cifsFileInfo *cifs_file)
383 struct inode *inode = d_inode(cifs_file->dentry);
384 struct cifsInodeInfo *cifsi = CIFS_I(inode);
385 struct cifsLockInfo *li, *tmp;
386 struct super_block *sb = inode->i_sb;
389 * Delete any outstanding lock records. We'll lose them when the file
392 cifs_down_write(&cifsi->lock_sem);
393 list_for_each_entry_safe(li, tmp, &cifs_file->llist->locks, llist) {
394 list_del(&li->llist);
395 cifs_del_lock_waiters(li);
398 list_del(&cifs_file->llist->llist);
399 kfree(cifs_file->llist);
400 up_write(&cifsi->lock_sem);
402 cifs_put_tlink(cifs_file->tlink);
403 dput(cifs_file->dentry);
404 cifs_sb_deactive(sb);
408 static void cifsFileInfo_put_work(struct work_struct *work)
410 struct cifsFileInfo *cifs_file = container_of(work,
411 struct cifsFileInfo, put);
413 cifsFileInfo_put_final(cifs_file);
417 * cifsFileInfo_put - release a reference of file priv data
419 * Always potentially wait for oplock handler. See _cifsFileInfo_put().
421 * @cifs_file: cifs/smb3 specific info (eg refcounts) for an open file
423 void cifsFileInfo_put(struct cifsFileInfo *cifs_file)
425 _cifsFileInfo_put(cifs_file, true, true);
429 * _cifsFileInfo_put - release a reference of file priv data
431 * This may involve closing the filehandle @cifs_file out on the
432 * server. Must be called without holding tcon->open_file_lock,
433 * cinode->open_file_lock and cifs_file->file_info_lock.
435 * If @wait_for_oplock_handler is true and we are releasing the last
436 * reference, wait for any running oplock break handler of the file
437 * and cancel any pending one.
439 * @cifs_file: cifs/smb3 specific info (eg refcounts) for an open file
440 * @wait_oplock_handler: must be false if called from oplock_break_handler
441 * @offload: not offloaded on close and oplock breaks
444 void _cifsFileInfo_put(struct cifsFileInfo *cifs_file,
445 bool wait_oplock_handler, bool offload)
447 struct inode *inode = d_inode(cifs_file->dentry);
448 struct cifs_tcon *tcon = tlink_tcon(cifs_file->tlink);
449 struct TCP_Server_Info *server = tcon->ses->server;
450 struct cifsInodeInfo *cifsi = CIFS_I(inode);
451 struct super_block *sb = inode->i_sb;
452 struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
454 struct cifs_pending_open open;
455 bool oplock_break_cancelled;
457 spin_lock(&tcon->open_file_lock);
458 spin_lock(&cifsi->open_file_lock);
459 spin_lock(&cifs_file->file_info_lock);
460 if (--cifs_file->count > 0) {
461 spin_unlock(&cifs_file->file_info_lock);
462 spin_unlock(&cifsi->open_file_lock);
463 spin_unlock(&tcon->open_file_lock);
466 spin_unlock(&cifs_file->file_info_lock);
468 if (server->ops->get_lease_key)
469 server->ops->get_lease_key(inode, &fid);
471 /* store open in pending opens to make sure we don't miss lease break */
472 cifs_add_pending_open_locked(&fid, cifs_file->tlink, &open);
474 /* remove it from the lists */
475 list_del(&cifs_file->flist);
476 list_del(&cifs_file->tlist);
477 atomic_dec(&tcon->num_local_opens);
479 if (list_empty(&cifsi->openFileList)) {
480 cifs_dbg(FYI, "closing last open instance for inode %p\n",
481 d_inode(cifs_file->dentry));
483 * In strict cache mode we need invalidate mapping on the last
484 * close because it may cause a error when we open this file
485 * again and get at least level II oplock.
487 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO)
488 set_bit(CIFS_INO_INVALID_MAPPING, &cifsi->flags);
489 cifs_set_oplock_level(cifsi, 0);
492 spin_unlock(&cifsi->open_file_lock);
493 spin_unlock(&tcon->open_file_lock);
495 oplock_break_cancelled = wait_oplock_handler ?
496 cancel_work_sync(&cifs_file->oplock_break) : false;
498 if (!tcon->need_reconnect && !cifs_file->invalidHandle) {
499 struct TCP_Server_Info *server = tcon->ses->server;
503 if (server->ops->close_getattr)
504 server->ops->close_getattr(xid, tcon, cifs_file);
505 else if (server->ops->close)
506 server->ops->close(xid, tcon, &cifs_file->fid);
510 if (oplock_break_cancelled)
511 cifs_done_oplock_break(cifsi);
513 cifs_del_pending_open(&open);
516 queue_work(fileinfo_put_wq, &cifs_file->put);
518 cifsFileInfo_put_final(cifs_file);
521 int cifs_open(struct inode *inode, struct file *file)
527 struct cifs_sb_info *cifs_sb;
528 struct TCP_Server_Info *server;
529 struct cifs_tcon *tcon;
530 struct tcon_link *tlink;
531 struct cifsFileInfo *cfile = NULL;
532 char *full_path = NULL;
533 bool posix_open_ok = false;
535 struct cifs_pending_open open;
539 cifs_sb = CIFS_SB(inode->i_sb);
540 tlink = cifs_sb_tlink(cifs_sb);
543 return PTR_ERR(tlink);
545 tcon = tlink_tcon(tlink);
546 server = tcon->ses->server;
548 full_path = build_path_from_dentry(file_dentry(file));
549 if (full_path == NULL) {
554 cifs_dbg(FYI, "inode = 0x%p file flags are 0x%x for %s\n",
555 inode, file->f_flags, full_path);
557 if (file->f_flags & O_DIRECT &&
558 cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO) {
559 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_BRL)
560 file->f_op = &cifs_file_direct_nobrl_ops;
562 file->f_op = &cifs_file_direct_ops;
570 if (!tcon->broken_posix_open && tcon->unix_ext &&
571 cap_unix(tcon->ses) && (CIFS_UNIX_POSIX_PATH_OPS_CAP &
572 le64_to_cpu(tcon->fsUnixInfo.Capability))) {
573 /* can not refresh inode info since size could be stale */
574 rc = cifs_posix_open(full_path, &inode, inode->i_sb,
575 cifs_sb->ctx->file_mode /* ignored */,
576 file->f_flags, &oplock, &fid.netfid, xid);
578 cifs_dbg(FYI, "posix open succeeded\n");
579 posix_open_ok = true;
580 } else if ((rc == -EINVAL) || (rc == -EOPNOTSUPP)) {
581 if (tcon->ses->serverNOS)
582 cifs_dbg(VFS, "server %s of type %s returned unexpected error on SMB posix open, disabling posix open support. Check if server update available.\n",
584 tcon->ses->serverNOS);
585 tcon->broken_posix_open = true;
586 } else if ((rc != -EIO) && (rc != -EREMOTE) &&
587 (rc != -EOPNOTSUPP)) /* path not found or net err */
590 * Else fallthrough to retry open the old way on network i/o
595 if (server->ops->get_lease_key)
596 server->ops->get_lease_key(inode, &fid);
598 cifs_add_pending_open(&fid, tlink, &open);
600 if (!posix_open_ok) {
601 if (server->ops->get_lease_key)
602 server->ops->get_lease_key(inode, &fid);
604 rc = cifs_nt_open(full_path, inode, cifs_sb, tcon,
605 file->f_flags, &oplock, &fid, xid);
607 cifs_del_pending_open(&open);
612 cfile = cifs_new_fileinfo(&fid, file, tlink, oplock);
614 if (server->ops->close)
615 server->ops->close(xid, tcon, &fid);
616 cifs_del_pending_open(&open);
621 cifs_fscache_set_inode_cookie(inode, file);
623 if ((oplock & CIFS_CREATE_ACTION) && !posix_open_ok && tcon->unix_ext) {
625 * Time to set mode which we can not set earlier due to
626 * problems creating new read-only files.
628 struct cifs_unix_set_info_args args = {
629 .mode = inode->i_mode,
630 .uid = INVALID_UID, /* no change */
631 .gid = INVALID_GID, /* no change */
632 .ctime = NO_CHANGE_64,
633 .atime = NO_CHANGE_64,
634 .mtime = NO_CHANGE_64,
637 CIFSSMBUnixSetFileInfo(xid, tcon, &args, fid.netfid,
644 cifs_put_tlink(tlink);
648 static int cifs_push_posix_locks(struct cifsFileInfo *cfile);
651 * Try to reacquire byte range locks that were released when session
652 * to server was lost.
655 cifs_relock_file(struct cifsFileInfo *cfile)
657 struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
658 struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
659 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
662 down_read_nested(&cinode->lock_sem, SINGLE_DEPTH_NESTING);
663 if (cinode->can_cache_brlcks) {
664 /* can cache locks - no need to relock */
665 up_read(&cinode->lock_sem);
669 if (cap_unix(tcon->ses) &&
670 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
671 ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
672 rc = cifs_push_posix_locks(cfile);
674 rc = tcon->ses->server->ops->push_mand_locks(cfile);
676 up_read(&cinode->lock_sem);
681 cifs_reopen_file(struct cifsFileInfo *cfile, bool can_flush)
686 struct cifs_sb_info *cifs_sb;
687 struct cifs_tcon *tcon;
688 struct TCP_Server_Info *server;
689 struct cifsInodeInfo *cinode;
691 char *full_path = NULL;
693 int disposition = FILE_OPEN;
694 int create_options = CREATE_NOT_DIR;
695 struct cifs_open_parms oparms;
698 mutex_lock(&cfile->fh_mutex);
699 if (!cfile->invalidHandle) {
700 mutex_unlock(&cfile->fh_mutex);
706 inode = d_inode(cfile->dentry);
707 cifs_sb = CIFS_SB(inode->i_sb);
708 tcon = tlink_tcon(cfile->tlink);
709 server = tcon->ses->server;
712 * Can not grab rename sem here because various ops, including those
713 * that already have the rename sem can end up causing writepage to get
714 * called and if the server was down that means we end up here, and we
715 * can never tell if the caller already has the rename_sem.
717 full_path = build_path_from_dentry(cfile->dentry);
718 if (full_path == NULL) {
720 mutex_unlock(&cfile->fh_mutex);
725 cifs_dbg(FYI, "inode = 0x%p file flags 0x%x for %s\n",
726 inode, cfile->f_flags, full_path);
728 if (tcon->ses->server->oplocks)
733 if (tcon->unix_ext && cap_unix(tcon->ses) &&
734 (CIFS_UNIX_POSIX_PATH_OPS_CAP &
735 le64_to_cpu(tcon->fsUnixInfo.Capability))) {
737 * O_CREAT, O_EXCL and O_TRUNC already had their effect on the
738 * original open. Must mask them off for a reopen.
740 unsigned int oflags = cfile->f_flags &
741 ~(O_CREAT | O_EXCL | O_TRUNC);
743 rc = cifs_posix_open(full_path, NULL, inode->i_sb,
744 cifs_sb->ctx->file_mode /* ignored */,
745 oflags, &oplock, &cfile->fid.netfid, xid);
747 cifs_dbg(FYI, "posix reopen succeeded\n");
748 oparms.reconnect = true;
752 * fallthrough to retry open the old way on errors, especially
753 * in the reconnect path it is important to retry hard
757 desired_access = cifs_convert_flags(cfile->f_flags);
759 /* O_SYNC also has bit for O_DSYNC so following check picks up either */
760 if (cfile->f_flags & O_SYNC)
761 create_options |= CREATE_WRITE_THROUGH;
763 if (cfile->f_flags & O_DIRECT)
764 create_options |= CREATE_NO_BUFFER;
766 if (server->ops->get_lease_key)
767 server->ops->get_lease_key(inode, &cfile->fid);
770 oparms.cifs_sb = cifs_sb;
771 oparms.desired_access = desired_access;
772 oparms.create_options = cifs_create_options(cifs_sb, create_options);
773 oparms.disposition = disposition;
774 oparms.path = full_path;
775 oparms.fid = &cfile->fid;
776 oparms.reconnect = true;
779 * Can not refresh inode by passing in file_info buf to be returned by
780 * ops->open and then calling get_inode_info with returned buf since
781 * file might have write behind data that needs to be flushed and server
782 * version of file size can be stale. If we knew for sure that inode was
783 * not dirty locally we could do this.
785 rc = server->ops->open(xid, &oparms, &oplock, NULL);
786 if (rc == -ENOENT && oparms.reconnect == false) {
787 /* durable handle timeout is expired - open the file again */
788 rc = server->ops->open(xid, &oparms, &oplock, NULL);
789 /* indicate that we need to relock the file */
790 oparms.reconnect = true;
794 mutex_unlock(&cfile->fh_mutex);
795 cifs_dbg(FYI, "cifs_reopen returned 0x%x\n", rc);
796 cifs_dbg(FYI, "oplock: %d\n", oplock);
797 goto reopen_error_exit;
801 cfile->invalidHandle = false;
802 mutex_unlock(&cfile->fh_mutex);
803 cinode = CIFS_I(inode);
806 rc = filemap_write_and_wait(inode->i_mapping);
807 if (!is_interrupt_error(rc))
808 mapping_set_error(inode->i_mapping, rc);
810 if (tcon->posix_extensions)
811 rc = smb311_posix_get_inode_info(&inode, full_path, inode->i_sb, xid);
812 else if (tcon->unix_ext)
813 rc = cifs_get_inode_info_unix(&inode, full_path,
816 rc = cifs_get_inode_info(&inode, full_path, NULL,
817 inode->i_sb, xid, NULL);
820 * Else we are writing out data to server already and could deadlock if
821 * we tried to flush data, and since we do not know if we have data that
822 * would invalidate the current end of file on the server we can not go
823 * to the server to get the new inode info.
827 * If the server returned a read oplock and we have mandatory brlocks,
828 * set oplock level to None.
830 if (server->ops->is_read_op(oplock) && cifs_has_mand_locks(cinode)) {
831 cifs_dbg(FYI, "Reset oplock val from read to None due to mand locks\n");
835 server->ops->set_fid(cfile, &cfile->fid, oplock);
836 if (oparms.reconnect)
837 cifs_relock_file(cfile);
845 int cifs_close(struct inode *inode, struct file *file)
847 if (file->private_data != NULL) {
848 _cifsFileInfo_put(file->private_data, true, false);
849 file->private_data = NULL;
852 /* return code from the ->release op is always ignored */
857 cifs_reopen_persistent_handles(struct cifs_tcon *tcon)
859 struct cifsFileInfo *open_file;
860 struct list_head *tmp;
861 struct list_head *tmp1;
862 struct list_head tmp_list;
864 if (!tcon->use_persistent || !tcon->need_reopen_files)
867 tcon->need_reopen_files = false;
869 cifs_dbg(FYI, "Reopen persistent handles\n");
870 INIT_LIST_HEAD(&tmp_list);
872 /* list all files open on tree connection, reopen resilient handles */
873 spin_lock(&tcon->open_file_lock);
874 list_for_each(tmp, &tcon->openFileList) {
875 open_file = list_entry(tmp, struct cifsFileInfo, tlist);
876 if (!open_file->invalidHandle)
878 cifsFileInfo_get(open_file);
879 list_add_tail(&open_file->rlist, &tmp_list);
881 spin_unlock(&tcon->open_file_lock);
883 list_for_each_safe(tmp, tmp1, &tmp_list) {
884 open_file = list_entry(tmp, struct cifsFileInfo, rlist);
885 if (cifs_reopen_file(open_file, false /* do not flush */))
886 tcon->need_reopen_files = true;
887 list_del_init(&open_file->rlist);
888 cifsFileInfo_put(open_file);
892 int cifs_closedir(struct inode *inode, struct file *file)
896 struct cifsFileInfo *cfile = file->private_data;
897 struct cifs_tcon *tcon;
898 struct TCP_Server_Info *server;
901 cifs_dbg(FYI, "Closedir inode = 0x%p\n", inode);
907 tcon = tlink_tcon(cfile->tlink);
908 server = tcon->ses->server;
910 cifs_dbg(FYI, "Freeing private data in close dir\n");
911 spin_lock(&cfile->file_info_lock);
912 if (server->ops->dir_needs_close(cfile)) {
913 cfile->invalidHandle = true;
914 spin_unlock(&cfile->file_info_lock);
915 if (server->ops->close_dir)
916 rc = server->ops->close_dir(xid, tcon, &cfile->fid);
919 cifs_dbg(FYI, "Closing uncompleted readdir with rc %d\n", rc);
920 /* not much we can do if it fails anyway, ignore rc */
923 spin_unlock(&cfile->file_info_lock);
925 buf = cfile->srch_inf.ntwrk_buf_start;
927 cifs_dbg(FYI, "closedir free smb buf in srch struct\n");
928 cfile->srch_inf.ntwrk_buf_start = NULL;
929 if (cfile->srch_inf.smallBuf)
930 cifs_small_buf_release(buf);
932 cifs_buf_release(buf);
935 cifs_put_tlink(cfile->tlink);
936 kfree(file->private_data);
937 file->private_data = NULL;
938 /* BB can we lock the filestruct while this is going on? */
943 static struct cifsLockInfo *
944 cifs_lock_init(__u64 offset, __u64 length, __u8 type, __u16 flags)
946 struct cifsLockInfo *lock =
947 kmalloc(sizeof(struct cifsLockInfo), GFP_KERNEL);
950 lock->offset = offset;
951 lock->length = length;
953 lock->pid = current->tgid;
955 INIT_LIST_HEAD(&lock->blist);
956 init_waitqueue_head(&lock->block_q);
961 cifs_del_lock_waiters(struct cifsLockInfo *lock)
963 struct cifsLockInfo *li, *tmp;
964 list_for_each_entry_safe(li, tmp, &lock->blist, blist) {
965 list_del_init(&li->blist);
966 wake_up(&li->block_q);
970 #define CIFS_LOCK_OP 0
971 #define CIFS_READ_OP 1
972 #define CIFS_WRITE_OP 2
974 /* @rw_check : 0 - no op, 1 - read, 2 - write */
976 cifs_find_fid_lock_conflict(struct cifs_fid_locks *fdlocks, __u64 offset,
977 __u64 length, __u8 type, __u16 flags,
978 struct cifsFileInfo *cfile,
979 struct cifsLockInfo **conf_lock, int rw_check)
981 struct cifsLockInfo *li;
982 struct cifsFileInfo *cur_cfile = fdlocks->cfile;
983 struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
985 list_for_each_entry(li, &fdlocks->locks, llist) {
986 if (offset + length <= li->offset ||
987 offset >= li->offset + li->length)
989 if (rw_check != CIFS_LOCK_OP && current->tgid == li->pid &&
990 server->ops->compare_fids(cfile, cur_cfile)) {
991 /* shared lock prevents write op through the same fid */
992 if (!(li->type & server->vals->shared_lock_type) ||
993 rw_check != CIFS_WRITE_OP)
996 if ((type & server->vals->shared_lock_type) &&
997 ((server->ops->compare_fids(cfile, cur_cfile) &&
998 current->tgid == li->pid) || type == li->type))
1000 if (rw_check == CIFS_LOCK_OP &&
1001 (flags & FL_OFDLCK) && (li->flags & FL_OFDLCK) &&
1002 server->ops->compare_fids(cfile, cur_cfile))
1012 cifs_find_lock_conflict(struct cifsFileInfo *cfile, __u64 offset, __u64 length,
1013 __u8 type, __u16 flags,
1014 struct cifsLockInfo **conf_lock, int rw_check)
1017 struct cifs_fid_locks *cur;
1018 struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1020 list_for_each_entry(cur, &cinode->llist, llist) {
1021 rc = cifs_find_fid_lock_conflict(cur, offset, length, type,
1022 flags, cfile, conf_lock,
1032 * Check if there is another lock that prevents us to set the lock (mandatory
1033 * style). If such a lock exists, update the flock structure with its
1034 * properties. Otherwise, set the flock type to F_UNLCK if we can cache brlocks
1035 * or leave it the same if we can't. Returns 0 if we don't need to request to
1036 * the server or 1 otherwise.
1039 cifs_lock_test(struct cifsFileInfo *cfile, __u64 offset, __u64 length,
1040 __u8 type, struct file_lock *flock)
1043 struct cifsLockInfo *conf_lock;
1044 struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1045 struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
1048 down_read(&cinode->lock_sem);
1050 exist = cifs_find_lock_conflict(cfile, offset, length, type,
1051 flock->fl_flags, &conf_lock,
1054 flock->fl_start = conf_lock->offset;
1055 flock->fl_end = conf_lock->offset + conf_lock->length - 1;
1056 flock->fl_pid = conf_lock->pid;
1057 if (conf_lock->type & server->vals->shared_lock_type)
1058 flock->fl_type = F_RDLCK;
1060 flock->fl_type = F_WRLCK;
1061 } else if (!cinode->can_cache_brlcks)
1064 flock->fl_type = F_UNLCK;
1066 up_read(&cinode->lock_sem);
1071 cifs_lock_add(struct cifsFileInfo *cfile, struct cifsLockInfo *lock)
1073 struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1074 cifs_down_write(&cinode->lock_sem);
1075 list_add_tail(&lock->llist, &cfile->llist->locks);
1076 up_write(&cinode->lock_sem);
1080 * Set the byte-range lock (mandatory style). Returns:
1081 * 1) 0, if we set the lock and don't need to request to the server;
1082 * 2) 1, if no locks prevent us but we need to request to the server;
1083 * 3) -EACCES, if there is a lock that prevents us and wait is false.
1086 cifs_lock_add_if(struct cifsFileInfo *cfile, struct cifsLockInfo *lock,
1089 struct cifsLockInfo *conf_lock;
1090 struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1096 cifs_down_write(&cinode->lock_sem);
1098 exist = cifs_find_lock_conflict(cfile, lock->offset, lock->length,
1099 lock->type, lock->flags, &conf_lock,
1101 if (!exist && cinode->can_cache_brlcks) {
1102 list_add_tail(&lock->llist, &cfile->llist->locks);
1103 up_write(&cinode->lock_sem);
1112 list_add_tail(&lock->blist, &conf_lock->blist);
1113 up_write(&cinode->lock_sem);
1114 rc = wait_event_interruptible(lock->block_q,
1115 (lock->blist.prev == &lock->blist) &&
1116 (lock->blist.next == &lock->blist));
1119 cifs_down_write(&cinode->lock_sem);
1120 list_del_init(&lock->blist);
1123 up_write(&cinode->lock_sem);
1128 * Check if there is another lock that prevents us to set the lock (posix
1129 * style). If such a lock exists, update the flock structure with its
1130 * properties. Otherwise, set the flock type to F_UNLCK if we can cache brlocks
1131 * or leave it the same if we can't. Returns 0 if we don't need to request to
1132 * the server or 1 otherwise.
1135 cifs_posix_lock_test(struct file *file, struct file_lock *flock)
1138 struct cifsInodeInfo *cinode = CIFS_I(file_inode(file));
1139 unsigned char saved_type = flock->fl_type;
1141 if ((flock->fl_flags & FL_POSIX) == 0)
1144 down_read(&cinode->lock_sem);
1145 posix_test_lock(file, flock);
1147 if (flock->fl_type == F_UNLCK && !cinode->can_cache_brlcks) {
1148 flock->fl_type = saved_type;
1152 up_read(&cinode->lock_sem);
1157 * Set the byte-range lock (posix style). Returns:
1158 * 1) <0, if the error occurs while setting the lock;
1159 * 2) 0, if we set the lock and don't need to request to the server;
1160 * 3) FILE_LOCK_DEFERRED, if we will wait for some other file_lock;
1161 * 4) FILE_LOCK_DEFERRED + 1, if we need to request to the server.
1164 cifs_posix_lock_set(struct file *file, struct file_lock *flock)
1166 struct cifsInodeInfo *cinode = CIFS_I(file_inode(file));
1167 int rc = FILE_LOCK_DEFERRED + 1;
1169 if ((flock->fl_flags & FL_POSIX) == 0)
1172 cifs_down_write(&cinode->lock_sem);
1173 if (!cinode->can_cache_brlcks) {
1174 up_write(&cinode->lock_sem);
1178 rc = posix_lock_file(file, flock, NULL);
1179 up_write(&cinode->lock_sem);
1184 cifs_push_mandatory_locks(struct cifsFileInfo *cfile)
1187 int rc = 0, stored_rc;
1188 struct cifsLockInfo *li, *tmp;
1189 struct cifs_tcon *tcon;
1190 unsigned int num, max_num, max_buf;
1191 LOCKING_ANDX_RANGE *buf, *cur;
1192 static const int types[] = {
1193 LOCKING_ANDX_LARGE_FILES,
1194 LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES
1199 tcon = tlink_tcon(cfile->tlink);
1202 * Accessing maxBuf is racy with cifs_reconnect - need to store value
1203 * and check it before using.
1205 max_buf = tcon->ses->server->maxBuf;
1206 if (max_buf < (sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE))) {
1211 BUILD_BUG_ON(sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE) >
1213 max_buf = min_t(unsigned int, max_buf - sizeof(struct smb_hdr),
1215 max_num = (max_buf - sizeof(struct smb_hdr)) /
1216 sizeof(LOCKING_ANDX_RANGE);
1217 buf = kcalloc(max_num, sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
1223 for (i = 0; i < 2; i++) {
1226 list_for_each_entry_safe(li, tmp, &cfile->llist->locks, llist) {
1227 if (li->type != types[i])
1229 cur->Pid = cpu_to_le16(li->pid);
1230 cur->LengthLow = cpu_to_le32((u32)li->length);
1231 cur->LengthHigh = cpu_to_le32((u32)(li->length>>32));
1232 cur->OffsetLow = cpu_to_le32((u32)li->offset);
1233 cur->OffsetHigh = cpu_to_le32((u32)(li->offset>>32));
1234 if (++num == max_num) {
1235 stored_rc = cifs_lockv(xid, tcon,
1237 (__u8)li->type, 0, num,
1248 stored_rc = cifs_lockv(xid, tcon, cfile->fid.netfid,
1249 (__u8)types[i], 0, num, buf);
1261 hash_lockowner(fl_owner_t owner)
1263 return cifs_lock_secret ^ hash32_ptr((const void *)owner);
1266 struct lock_to_push {
1267 struct list_head llist;
1276 cifs_push_posix_locks(struct cifsFileInfo *cfile)
1278 struct inode *inode = d_inode(cfile->dentry);
1279 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1280 struct file_lock *flock;
1281 struct file_lock_context *flctx = inode->i_flctx;
1282 unsigned int count = 0, i;
1283 int rc = 0, xid, type;
1284 struct list_head locks_to_send, *el;
1285 struct lock_to_push *lck, *tmp;
1293 spin_lock(&flctx->flc_lock);
1294 list_for_each(el, &flctx->flc_posix) {
1297 spin_unlock(&flctx->flc_lock);
1299 INIT_LIST_HEAD(&locks_to_send);
1302 * Allocating count locks is enough because no FL_POSIX locks can be
1303 * added to the list while we are holding cinode->lock_sem that
1304 * protects locking operations of this inode.
1306 for (i = 0; i < count; i++) {
1307 lck = kmalloc(sizeof(struct lock_to_push), GFP_KERNEL);
1312 list_add_tail(&lck->llist, &locks_to_send);
1315 el = locks_to_send.next;
1316 spin_lock(&flctx->flc_lock);
1317 list_for_each_entry(flock, &flctx->flc_posix, fl_list) {
1318 if (el == &locks_to_send) {
1320 * The list ended. We don't have enough allocated
1321 * structures - something is really wrong.
1323 cifs_dbg(VFS, "Can't push all brlocks!\n");
1326 length = 1 + flock->fl_end - flock->fl_start;
1327 if (flock->fl_type == F_RDLCK || flock->fl_type == F_SHLCK)
1331 lck = list_entry(el, struct lock_to_push, llist);
1332 lck->pid = hash_lockowner(flock->fl_owner);
1333 lck->netfid = cfile->fid.netfid;
1334 lck->length = length;
1336 lck->offset = flock->fl_start;
1338 spin_unlock(&flctx->flc_lock);
1340 list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) {
1343 stored_rc = CIFSSMBPosixLock(xid, tcon, lck->netfid, lck->pid,
1344 lck->offset, lck->length, NULL,
1348 list_del(&lck->llist);
1356 list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) {
1357 list_del(&lck->llist);
1364 cifs_push_locks(struct cifsFileInfo *cfile)
1366 struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
1367 struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1368 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1371 /* we are going to update can_cache_brlcks here - need a write access */
1372 cifs_down_write(&cinode->lock_sem);
1373 if (!cinode->can_cache_brlcks) {
1374 up_write(&cinode->lock_sem);
1378 if (cap_unix(tcon->ses) &&
1379 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
1380 ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
1381 rc = cifs_push_posix_locks(cfile);
1383 rc = tcon->ses->server->ops->push_mand_locks(cfile);
1385 cinode->can_cache_brlcks = false;
1386 up_write(&cinode->lock_sem);
1391 cifs_read_flock(struct file_lock *flock, __u32 *type, int *lock, int *unlock,
1392 bool *wait_flag, struct TCP_Server_Info *server)
1394 if (flock->fl_flags & FL_POSIX)
1395 cifs_dbg(FYI, "Posix\n");
1396 if (flock->fl_flags & FL_FLOCK)
1397 cifs_dbg(FYI, "Flock\n");
1398 if (flock->fl_flags & FL_SLEEP) {
1399 cifs_dbg(FYI, "Blocking lock\n");
1402 if (flock->fl_flags & FL_ACCESS)
1403 cifs_dbg(FYI, "Process suspended by mandatory locking - not implemented yet\n");
1404 if (flock->fl_flags & FL_LEASE)
1405 cifs_dbg(FYI, "Lease on file - not implemented yet\n");
1406 if (flock->fl_flags &
1407 (~(FL_POSIX | FL_FLOCK | FL_SLEEP |
1408 FL_ACCESS | FL_LEASE | FL_CLOSE | FL_OFDLCK)))
1409 cifs_dbg(FYI, "Unknown lock flags 0x%x\n", flock->fl_flags);
1411 *type = server->vals->large_lock_type;
1412 if (flock->fl_type == F_WRLCK) {
1413 cifs_dbg(FYI, "F_WRLCK\n");
1414 *type |= server->vals->exclusive_lock_type;
1416 } else if (flock->fl_type == F_UNLCK) {
1417 cifs_dbg(FYI, "F_UNLCK\n");
1418 *type |= server->vals->unlock_lock_type;
1420 /* Check if unlock includes more than one lock range */
1421 } else if (flock->fl_type == F_RDLCK) {
1422 cifs_dbg(FYI, "F_RDLCK\n");
1423 *type |= server->vals->shared_lock_type;
1425 } else if (flock->fl_type == F_EXLCK) {
1426 cifs_dbg(FYI, "F_EXLCK\n");
1427 *type |= server->vals->exclusive_lock_type;
1429 } else if (flock->fl_type == F_SHLCK) {
1430 cifs_dbg(FYI, "F_SHLCK\n");
1431 *type |= server->vals->shared_lock_type;
1434 cifs_dbg(FYI, "Unknown type of lock\n");
1438 cifs_getlk(struct file *file, struct file_lock *flock, __u32 type,
1439 bool wait_flag, bool posix_lck, unsigned int xid)
1442 __u64 length = 1 + flock->fl_end - flock->fl_start;
1443 struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
1444 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1445 struct TCP_Server_Info *server = tcon->ses->server;
1446 __u16 netfid = cfile->fid.netfid;
1449 int posix_lock_type;
1451 rc = cifs_posix_lock_test(file, flock);
1455 if (type & server->vals->shared_lock_type)
1456 posix_lock_type = CIFS_RDLCK;
1458 posix_lock_type = CIFS_WRLCK;
1459 rc = CIFSSMBPosixLock(xid, tcon, netfid,
1460 hash_lockowner(flock->fl_owner),
1461 flock->fl_start, length, flock,
1462 posix_lock_type, wait_flag);
1466 rc = cifs_lock_test(cfile, flock->fl_start, length, type, flock);
1470 /* BB we could chain these into one lock request BB */
1471 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length, type,
1474 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1476 flock->fl_type = F_UNLCK;
1478 cifs_dbg(VFS, "Error unlocking previously locked range %d during test of lock\n",
1483 if (type & server->vals->shared_lock_type) {
1484 flock->fl_type = F_WRLCK;
1488 type &= ~server->vals->exclusive_lock_type;
1490 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1491 type | server->vals->shared_lock_type,
1494 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1495 type | server->vals->shared_lock_type, 0, 1, false);
1496 flock->fl_type = F_RDLCK;
1498 cifs_dbg(VFS, "Error unlocking previously locked range %d during test of lock\n",
1501 flock->fl_type = F_WRLCK;
1507 cifs_move_llist(struct list_head *source, struct list_head *dest)
1509 struct list_head *li, *tmp;
1510 list_for_each_safe(li, tmp, source)
1511 list_move(li, dest);
1515 cifs_free_llist(struct list_head *llist)
1517 struct cifsLockInfo *li, *tmp;
1518 list_for_each_entry_safe(li, tmp, llist, llist) {
1519 cifs_del_lock_waiters(li);
1520 list_del(&li->llist);
1526 cifs_unlock_range(struct cifsFileInfo *cfile, struct file_lock *flock,
1529 int rc = 0, stored_rc;
1530 static const int types[] = {
1531 LOCKING_ANDX_LARGE_FILES,
1532 LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES
1535 unsigned int max_num, num, max_buf;
1536 LOCKING_ANDX_RANGE *buf, *cur;
1537 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1538 struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1539 struct cifsLockInfo *li, *tmp;
1540 __u64 length = 1 + flock->fl_end - flock->fl_start;
1541 struct list_head tmp_llist;
1543 INIT_LIST_HEAD(&tmp_llist);
1546 * Accessing maxBuf is racy with cifs_reconnect - need to store value
1547 * and check it before using.
1549 max_buf = tcon->ses->server->maxBuf;
1550 if (max_buf < (sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE)))
1553 BUILD_BUG_ON(sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE) >
1555 max_buf = min_t(unsigned int, max_buf - sizeof(struct smb_hdr),
1557 max_num = (max_buf - sizeof(struct smb_hdr)) /
1558 sizeof(LOCKING_ANDX_RANGE);
1559 buf = kcalloc(max_num, sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
1563 cifs_down_write(&cinode->lock_sem);
1564 for (i = 0; i < 2; i++) {
1567 list_for_each_entry_safe(li, tmp, &cfile->llist->locks, llist) {
1568 if (flock->fl_start > li->offset ||
1569 (flock->fl_start + length) <
1570 (li->offset + li->length))
1572 if (current->tgid != li->pid)
1574 if (types[i] != li->type)
1576 if (cinode->can_cache_brlcks) {
1578 * We can cache brlock requests - simply remove
1579 * a lock from the file's list.
1581 list_del(&li->llist);
1582 cifs_del_lock_waiters(li);
1586 cur->Pid = cpu_to_le16(li->pid);
1587 cur->LengthLow = cpu_to_le32((u32)li->length);
1588 cur->LengthHigh = cpu_to_le32((u32)(li->length>>32));
1589 cur->OffsetLow = cpu_to_le32((u32)li->offset);
1590 cur->OffsetHigh = cpu_to_le32((u32)(li->offset>>32));
1592 * We need to save a lock here to let us add it again to
1593 * the file's list if the unlock range request fails on
1596 list_move(&li->llist, &tmp_llist);
1597 if (++num == max_num) {
1598 stored_rc = cifs_lockv(xid, tcon,
1600 li->type, num, 0, buf);
1603 * We failed on the unlock range
1604 * request - add all locks from the tmp
1605 * list to the head of the file's list.
1607 cifs_move_llist(&tmp_llist,
1608 &cfile->llist->locks);
1612 * The unlock range request succeed -
1613 * free the tmp list.
1615 cifs_free_llist(&tmp_llist);
1622 stored_rc = cifs_lockv(xid, tcon, cfile->fid.netfid,
1623 types[i], num, 0, buf);
1625 cifs_move_llist(&tmp_llist,
1626 &cfile->llist->locks);
1629 cifs_free_llist(&tmp_llist);
1633 up_write(&cinode->lock_sem);
1639 cifs_setlk(struct file *file, struct file_lock *flock, __u32 type,
1640 bool wait_flag, bool posix_lck, int lock, int unlock,
1644 __u64 length = 1 + flock->fl_end - flock->fl_start;
1645 struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
1646 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1647 struct TCP_Server_Info *server = tcon->ses->server;
1648 struct inode *inode = d_inode(cfile->dentry);
1651 int posix_lock_type;
1653 rc = cifs_posix_lock_set(file, flock);
1654 if (rc <= FILE_LOCK_DEFERRED)
1657 if (type & server->vals->shared_lock_type)
1658 posix_lock_type = CIFS_RDLCK;
1660 posix_lock_type = CIFS_WRLCK;
1663 posix_lock_type = CIFS_UNLCK;
1665 rc = CIFSSMBPosixLock(xid, tcon, cfile->fid.netfid,
1666 hash_lockowner(flock->fl_owner),
1667 flock->fl_start, length,
1668 NULL, posix_lock_type, wait_flag);
1673 struct cifsLockInfo *lock;
1675 lock = cifs_lock_init(flock->fl_start, length, type,
1680 rc = cifs_lock_add_if(cfile, lock, wait_flag);
1689 * Windows 7 server can delay breaking lease from read to None
1690 * if we set a byte-range lock on a file - break it explicitly
1691 * before sending the lock to the server to be sure the next
1692 * read won't conflict with non-overlapted locks due to
1695 if (!CIFS_CACHE_WRITE(CIFS_I(inode)) &&
1696 CIFS_CACHE_READ(CIFS_I(inode))) {
1697 cifs_zap_mapping(inode);
1698 cifs_dbg(FYI, "Set no oplock for inode=%p due to mand locks\n",
1700 CIFS_I(inode)->oplock = 0;
1703 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1704 type, 1, 0, wait_flag);
1710 cifs_lock_add(cfile, lock);
1712 rc = server->ops->mand_unlock_range(cfile, flock, xid);
1715 if ((flock->fl_flags & FL_POSIX) || (flock->fl_flags & FL_FLOCK)) {
1717 * If this is a request to remove all locks because we
1718 * are closing the file, it doesn't matter if the
1719 * unlocking failed as both cifs.ko and the SMB server
1720 * remove the lock on file close
1723 cifs_dbg(VFS, "%s failed rc=%d\n", __func__, rc);
1724 if (!(flock->fl_flags & FL_CLOSE))
1727 rc = locks_lock_file_wait(file, flock);
1732 int cifs_flock(struct file *file, int cmd, struct file_lock *fl)
1735 int lock = 0, unlock = 0;
1736 bool wait_flag = false;
1737 bool posix_lck = false;
1738 struct cifs_sb_info *cifs_sb;
1739 struct cifs_tcon *tcon;
1740 struct cifsFileInfo *cfile;
1746 if (!(fl->fl_flags & FL_FLOCK))
1749 cfile = (struct cifsFileInfo *)file->private_data;
1750 tcon = tlink_tcon(cfile->tlink);
1752 cifs_read_flock(fl, &type, &lock, &unlock, &wait_flag,
1754 cifs_sb = CIFS_FILE_SB(file);
1756 if (cap_unix(tcon->ses) &&
1757 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
1758 ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
1761 if (!lock && !unlock) {
1763 * if no lock or unlock then nothing to do since we do not
1770 rc = cifs_setlk(file, fl, type, wait_flag, posix_lck, lock, unlock,
1778 int cifs_lock(struct file *file, int cmd, struct file_lock *flock)
1781 int lock = 0, unlock = 0;
1782 bool wait_flag = false;
1783 bool posix_lck = false;
1784 struct cifs_sb_info *cifs_sb;
1785 struct cifs_tcon *tcon;
1786 struct cifsFileInfo *cfile;
1792 cifs_dbg(FYI, "Lock parm: 0x%x flockflags: 0x%x flocktype: 0x%x start: %lld end: %lld\n",
1793 cmd, flock->fl_flags, flock->fl_type,
1794 flock->fl_start, flock->fl_end);
1796 cfile = (struct cifsFileInfo *)file->private_data;
1797 tcon = tlink_tcon(cfile->tlink);
1799 cifs_read_flock(flock, &type, &lock, &unlock, &wait_flag,
1801 cifs_sb = CIFS_FILE_SB(file);
1803 if (cap_unix(tcon->ses) &&
1804 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
1805 ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
1808 * BB add code here to normalize offset and length to account for
1809 * negative length which we can not accept over the wire.
1811 if (IS_GETLK(cmd)) {
1812 rc = cifs_getlk(file, flock, type, wait_flag, posix_lck, xid);
1817 if (!lock && !unlock) {
1819 * if no lock or unlock then nothing to do since we do not
1826 rc = cifs_setlk(file, flock, type, wait_flag, posix_lck, lock, unlock,
1833 * update the file size (if needed) after a write. Should be called with
1834 * the inode->i_lock held
1837 cifs_update_eof(struct cifsInodeInfo *cifsi, loff_t offset,
1838 unsigned int bytes_written)
1840 loff_t end_of_write = offset + bytes_written;
1842 if (end_of_write > cifsi->server_eof)
1843 cifsi->server_eof = end_of_write;
1847 cifs_write(struct cifsFileInfo *open_file, __u32 pid, const char *write_data,
1848 size_t write_size, loff_t *offset)
1851 unsigned int bytes_written = 0;
1852 unsigned int total_written;
1853 struct cifs_tcon *tcon;
1854 struct TCP_Server_Info *server;
1856 struct dentry *dentry = open_file->dentry;
1857 struct cifsInodeInfo *cifsi = CIFS_I(d_inode(dentry));
1858 struct cifs_io_parms io_parms = {0};
1860 cifs_dbg(FYI, "write %zd bytes to offset %lld of %pd\n",
1861 write_size, *offset, dentry);
1863 tcon = tlink_tcon(open_file->tlink);
1864 server = tcon->ses->server;
1866 if (!server->ops->sync_write)
1871 for (total_written = 0; write_size > total_written;
1872 total_written += bytes_written) {
1874 while (rc == -EAGAIN) {
1878 if (open_file->invalidHandle) {
1879 /* we could deadlock if we called
1880 filemap_fdatawait from here so tell
1881 reopen_file not to flush data to
1883 rc = cifs_reopen_file(open_file, false);
1888 len = min(server->ops->wp_retry_size(d_inode(dentry)),
1889 (unsigned int)write_size - total_written);
1890 /* iov[0] is reserved for smb header */
1891 iov[1].iov_base = (char *)write_data + total_written;
1892 iov[1].iov_len = len;
1894 io_parms.tcon = tcon;
1895 io_parms.offset = *offset;
1896 io_parms.length = len;
1897 rc = server->ops->sync_write(xid, &open_file->fid,
1898 &io_parms, &bytes_written, iov, 1);
1900 if (rc || (bytes_written == 0)) {
1908 spin_lock(&d_inode(dentry)->i_lock);
1909 cifs_update_eof(cifsi, *offset, bytes_written);
1910 spin_unlock(&d_inode(dentry)->i_lock);
1911 *offset += bytes_written;
1915 cifs_stats_bytes_written(tcon, total_written);
1917 if (total_written > 0) {
1918 spin_lock(&d_inode(dentry)->i_lock);
1919 if (*offset > d_inode(dentry)->i_size)
1920 i_size_write(d_inode(dentry), *offset);
1921 spin_unlock(&d_inode(dentry)->i_lock);
1923 mark_inode_dirty_sync(d_inode(dentry));
1925 return total_written;
1928 struct cifsFileInfo *find_readable_file(struct cifsInodeInfo *cifs_inode,
1931 struct cifsFileInfo *open_file = NULL;
1932 struct cifs_sb_info *cifs_sb = CIFS_SB(cifs_inode->vfs_inode.i_sb);
1934 /* only filter by fsuid on multiuser mounts */
1935 if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
1938 spin_lock(&cifs_inode->open_file_lock);
1939 /* we could simply get the first_list_entry since write-only entries
1940 are always at the end of the list but since the first entry might
1941 have a close pending, we go through the whole list */
1942 list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
1943 if (fsuid_only && !uid_eq(open_file->uid, current_fsuid()))
1945 if (OPEN_FMODE(open_file->f_flags) & FMODE_READ) {
1946 if (!open_file->invalidHandle) {
1947 /* found a good file */
1948 /* lock it so it will not be closed on us */
1949 cifsFileInfo_get(open_file);
1950 spin_unlock(&cifs_inode->open_file_lock);
1952 } /* else might as well continue, and look for
1953 another, or simply have the caller reopen it
1954 again rather than trying to fix this handle */
1955 } else /* write only file */
1956 break; /* write only files are last so must be done */
1958 spin_unlock(&cifs_inode->open_file_lock);
1962 /* Return -EBADF if no handle is found and general rc otherwise */
1964 cifs_get_writable_file(struct cifsInodeInfo *cifs_inode, int flags,
1965 struct cifsFileInfo **ret_file)
1967 struct cifsFileInfo *open_file, *inv_file = NULL;
1968 struct cifs_sb_info *cifs_sb;
1969 bool any_available = false;
1971 unsigned int refind = 0;
1972 bool fsuid_only = flags & FIND_WR_FSUID_ONLY;
1973 bool with_delete = flags & FIND_WR_WITH_DELETE;
1977 * Having a null inode here (because mapping->host was set to zero by
1978 * the VFS or MM) should not happen but we had reports of on oops (due
1979 * to it being zero) during stress testcases so we need to check for it
1982 if (cifs_inode == NULL) {
1983 cifs_dbg(VFS, "Null inode passed to cifs_writeable_file\n");
1988 cifs_sb = CIFS_SB(cifs_inode->vfs_inode.i_sb);
1990 /* only filter by fsuid on multiuser mounts */
1991 if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
1994 spin_lock(&cifs_inode->open_file_lock);
1996 if (refind > MAX_REOPEN_ATT) {
1997 spin_unlock(&cifs_inode->open_file_lock);
2000 list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
2001 if (!any_available && open_file->pid != current->tgid)
2003 if (fsuid_only && !uid_eq(open_file->uid, current_fsuid()))
2005 if (with_delete && !(open_file->fid.access & DELETE))
2007 if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) {
2008 if (!open_file->invalidHandle) {
2009 /* found a good writable file */
2010 cifsFileInfo_get(open_file);
2011 spin_unlock(&cifs_inode->open_file_lock);
2012 *ret_file = open_file;
2016 inv_file = open_file;
2020 /* couldn't find useable FH with same pid, try any available */
2021 if (!any_available) {
2022 any_available = true;
2023 goto refind_writable;
2027 any_available = false;
2028 cifsFileInfo_get(inv_file);
2031 spin_unlock(&cifs_inode->open_file_lock);
2034 rc = cifs_reopen_file(inv_file, false);
2036 *ret_file = inv_file;
2040 spin_lock(&cifs_inode->open_file_lock);
2041 list_move_tail(&inv_file->flist, &cifs_inode->openFileList);
2042 spin_unlock(&cifs_inode->open_file_lock);
2043 cifsFileInfo_put(inv_file);
2046 spin_lock(&cifs_inode->open_file_lock);
2047 goto refind_writable;
2053 struct cifsFileInfo *
2054 find_writable_file(struct cifsInodeInfo *cifs_inode, int flags)
2056 struct cifsFileInfo *cfile;
2059 rc = cifs_get_writable_file(cifs_inode, flags, &cfile);
2061 cifs_dbg(FYI, "Couldn't find writable handle rc=%d\n", rc);
2067 cifs_get_writable_path(struct cifs_tcon *tcon, const char *name,
2069 struct cifsFileInfo **ret_file)
2071 struct list_head *tmp;
2072 struct cifsFileInfo *cfile;
2073 struct cifsInodeInfo *cinode;
2078 spin_lock(&tcon->open_file_lock);
2079 list_for_each(tmp, &tcon->openFileList) {
2080 cfile = list_entry(tmp, struct cifsFileInfo,
2082 full_path = build_path_from_dentry(cfile->dentry);
2083 if (full_path == NULL) {
2084 spin_unlock(&tcon->open_file_lock);
2087 if (strcmp(full_path, name)) {
2093 cinode = CIFS_I(d_inode(cfile->dentry));
2094 spin_unlock(&tcon->open_file_lock);
2095 return cifs_get_writable_file(cinode, flags, ret_file);
2098 spin_unlock(&tcon->open_file_lock);
2103 cifs_get_readable_path(struct cifs_tcon *tcon, const char *name,
2104 struct cifsFileInfo **ret_file)
2106 struct list_head *tmp;
2107 struct cifsFileInfo *cfile;
2108 struct cifsInodeInfo *cinode;
2113 spin_lock(&tcon->open_file_lock);
2114 list_for_each(tmp, &tcon->openFileList) {
2115 cfile = list_entry(tmp, struct cifsFileInfo,
2117 full_path = build_path_from_dentry(cfile->dentry);
2118 if (full_path == NULL) {
2119 spin_unlock(&tcon->open_file_lock);
2122 if (strcmp(full_path, name)) {
2128 cinode = CIFS_I(d_inode(cfile->dentry));
2129 spin_unlock(&tcon->open_file_lock);
2130 *ret_file = find_readable_file(cinode, 0);
2131 return *ret_file ? 0 : -ENOENT;
2134 spin_unlock(&tcon->open_file_lock);
2138 static int cifs_partialpagewrite(struct page *page, unsigned from, unsigned to)
2140 struct address_space *mapping = page->mapping;
2141 loff_t offset = (loff_t)page->index << PAGE_SHIFT;
2144 int bytes_written = 0;
2145 struct inode *inode;
2146 struct cifsFileInfo *open_file;
2148 if (!mapping || !mapping->host)
2151 inode = page->mapping->host;
2153 offset += (loff_t)from;
2154 write_data = kmap(page);
2157 if ((to > PAGE_SIZE) || (from > to)) {
2162 /* racing with truncate? */
2163 if (offset > mapping->host->i_size) {
2165 return 0; /* don't care */
2168 /* check to make sure that we are not extending the file */
2169 if (mapping->host->i_size - offset < (loff_t)to)
2170 to = (unsigned)(mapping->host->i_size - offset);
2172 rc = cifs_get_writable_file(CIFS_I(mapping->host), FIND_WR_ANY,
2175 bytes_written = cifs_write(open_file, open_file->pid,
2176 write_data, to - from, &offset);
2177 cifsFileInfo_put(open_file);
2178 /* Does mm or vfs already set times? */
2179 inode->i_atime = inode->i_mtime = current_time(inode);
2180 if ((bytes_written > 0) && (offset))
2182 else if (bytes_written < 0)
2187 cifs_dbg(FYI, "No writable handle for write page rc=%d\n", rc);
2188 if (!is_retryable_error(rc))
2196 static struct cifs_writedata *
2197 wdata_alloc_and_fillpages(pgoff_t tofind, struct address_space *mapping,
2198 pgoff_t end, pgoff_t *index,
2199 unsigned int *found_pages)
2201 struct cifs_writedata *wdata;
2203 wdata = cifs_writedata_alloc((unsigned int)tofind,
2204 cifs_writev_complete);
2208 *found_pages = find_get_pages_range_tag(mapping, index, end,
2209 PAGECACHE_TAG_DIRTY, tofind, wdata->pages);
2214 wdata_prepare_pages(struct cifs_writedata *wdata, unsigned int found_pages,
2215 struct address_space *mapping,
2216 struct writeback_control *wbc,
2217 pgoff_t end, pgoff_t *index, pgoff_t *next, bool *done)
2219 unsigned int nr_pages = 0, i;
2222 for (i = 0; i < found_pages; i++) {
2223 page = wdata->pages[i];
2225 * At this point we hold neither the i_pages lock nor the
2226 * page lock: the page may be truncated or invalidated
2227 * (changing page->mapping to NULL), or even swizzled
2228 * back from swapper_space to tmpfs file mapping
2233 else if (!trylock_page(page))
2236 if (unlikely(page->mapping != mapping)) {
2241 if (!wbc->range_cyclic && page->index > end) {
2247 if (*next && (page->index != *next)) {
2248 /* Not next consecutive page */
2253 if (wbc->sync_mode != WB_SYNC_NONE)
2254 wait_on_page_writeback(page);
2256 if (PageWriteback(page) ||
2257 !clear_page_dirty_for_io(page)) {
2263 * This actually clears the dirty bit in the radix tree.
2264 * See cifs_writepage() for more commentary.
2266 set_page_writeback(page);
2267 if (page_offset(page) >= i_size_read(mapping->host)) {
2270 end_page_writeback(page);
2274 wdata->pages[i] = page;
2275 *next = page->index + 1;
2279 /* reset index to refind any pages skipped */
2281 *index = wdata->pages[0]->index + 1;
2283 /* put any pages we aren't going to use */
2284 for (i = nr_pages; i < found_pages; i++) {
2285 put_page(wdata->pages[i]);
2286 wdata->pages[i] = NULL;
2293 wdata_send_pages(struct cifs_writedata *wdata, unsigned int nr_pages,
2294 struct address_space *mapping, struct writeback_control *wbc)
2298 wdata->sync_mode = wbc->sync_mode;
2299 wdata->nr_pages = nr_pages;
2300 wdata->offset = page_offset(wdata->pages[0]);
2301 wdata->pagesz = PAGE_SIZE;
2302 wdata->tailsz = min(i_size_read(mapping->host) -
2303 page_offset(wdata->pages[nr_pages - 1]),
2305 wdata->bytes = ((nr_pages - 1) * PAGE_SIZE) + wdata->tailsz;
2306 wdata->pid = wdata->cfile->pid;
2308 rc = adjust_credits(wdata->server, &wdata->credits, wdata->bytes);
2312 if (wdata->cfile->invalidHandle)
2315 rc = wdata->server->ops->async_writev(wdata,
2316 cifs_writedata_release);
2321 static int cifs_writepages(struct address_space *mapping,
2322 struct writeback_control *wbc)
2324 struct inode *inode = mapping->host;
2325 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
2326 struct TCP_Server_Info *server;
2327 bool done = false, scanned = false, range_whole = false;
2329 struct cifs_writedata *wdata;
2330 struct cifsFileInfo *cfile = NULL;
2336 * If wsize is smaller than the page cache size, default to writing
2337 * one page at a time via cifs_writepage
2339 if (cifs_sb->ctx->wsize < PAGE_SIZE)
2340 return generic_writepages(mapping, wbc);
2343 if (wbc->range_cyclic) {
2344 index = mapping->writeback_index; /* Start from prev offset */
2347 index = wbc->range_start >> PAGE_SHIFT;
2348 end = wbc->range_end >> PAGE_SHIFT;
2349 if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
2353 server = cifs_pick_channel(cifs_sb_master_tcon(cifs_sb)->ses);
2356 while (!done && index <= end) {
2357 unsigned int i, nr_pages, found_pages, wsize;
2358 pgoff_t next = 0, tofind, saved_index = index;
2359 struct cifs_credits credits_on_stack;
2360 struct cifs_credits *credits = &credits_on_stack;
2361 int get_file_rc = 0;
2364 cifsFileInfo_put(cfile);
2366 rc = cifs_get_writable_file(CIFS_I(inode), FIND_WR_ANY, &cfile);
2368 /* in case of an error store it to return later */
2372 rc = server->ops->wait_mtu_credits(server, cifs_sb->ctx->wsize,
2379 tofind = min((wsize / PAGE_SIZE) - 1, end - index) + 1;
2381 wdata = wdata_alloc_and_fillpages(tofind, mapping, end, &index,
2386 add_credits_and_wake_if(server, credits, 0);
2390 if (found_pages == 0) {
2391 kref_put(&wdata->refcount, cifs_writedata_release);
2392 add_credits_and_wake_if(server, credits, 0);
2396 nr_pages = wdata_prepare_pages(wdata, found_pages, mapping, wbc,
2397 end, &index, &next, &done);
2399 /* nothing to write? */
2400 if (nr_pages == 0) {
2401 kref_put(&wdata->refcount, cifs_writedata_release);
2402 add_credits_and_wake_if(server, credits, 0);
2406 wdata->credits = credits_on_stack;
2407 wdata->cfile = cfile;
2408 wdata->server = server;
2411 if (!wdata->cfile) {
2412 cifs_dbg(VFS, "No writable handle in writepages rc=%d\n",
2414 if (is_retryable_error(get_file_rc))
2419 rc = wdata_send_pages(wdata, nr_pages, mapping, wbc);
2421 for (i = 0; i < nr_pages; ++i)
2422 unlock_page(wdata->pages[i]);
2424 /* send failure -- clean up the mess */
2426 add_credits_and_wake_if(server, &wdata->credits, 0);
2427 for (i = 0; i < nr_pages; ++i) {
2428 if (is_retryable_error(rc))
2429 redirty_page_for_writepage(wbc,
2432 SetPageError(wdata->pages[i]);
2433 end_page_writeback(wdata->pages[i]);
2434 put_page(wdata->pages[i]);
2436 if (!is_retryable_error(rc))
2437 mapping_set_error(mapping, rc);
2439 kref_put(&wdata->refcount, cifs_writedata_release);
2441 if (wbc->sync_mode == WB_SYNC_ALL && rc == -EAGAIN) {
2442 index = saved_index;
2446 /* Return immediately if we received a signal during writing */
2447 if (is_interrupt_error(rc)) {
2452 if (rc != 0 && saved_rc == 0)
2455 wbc->nr_to_write -= nr_pages;
2456 if (wbc->nr_to_write <= 0)
2462 if (!scanned && !done) {
2464 * We hit the last page and there is more work to be done: wrap
2465 * back to the start of the file
2475 if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0))
2476 mapping->writeback_index = index;
2479 cifsFileInfo_put(cfile);
2485 cifs_writepage_locked(struct page *page, struct writeback_control *wbc)
2491 /* BB add check for wbc flags */
2493 if (!PageUptodate(page))
2494 cifs_dbg(FYI, "ppw - page not up to date\n");
2497 * Set the "writeback" flag, and clear "dirty" in the radix tree.
2499 * A writepage() implementation always needs to do either this,
2500 * or re-dirty the page with "redirty_page_for_writepage()" in
2501 * the case of a failure.
2503 * Just unlocking the page will cause the radix tree tag-bits
2504 * to fail to update with the state of the page correctly.
2506 set_page_writeback(page);
2508 rc = cifs_partialpagewrite(page, 0, PAGE_SIZE);
2509 if (is_retryable_error(rc)) {
2510 if (wbc->sync_mode == WB_SYNC_ALL && rc == -EAGAIN)
2512 redirty_page_for_writepage(wbc, page);
2513 } else if (rc != 0) {
2515 mapping_set_error(page->mapping, rc);
2517 SetPageUptodate(page);
2519 end_page_writeback(page);
2525 static int cifs_writepage(struct page *page, struct writeback_control *wbc)
2527 int rc = cifs_writepage_locked(page, wbc);
2532 static int cifs_write_end(struct file *file, struct address_space *mapping,
2533 loff_t pos, unsigned len, unsigned copied,
2534 struct page *page, void *fsdata)
2537 struct inode *inode = mapping->host;
2538 struct cifsFileInfo *cfile = file->private_data;
2539 struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
2542 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
2545 pid = current->tgid;
2547 cifs_dbg(FYI, "write_end for page %p from pos %lld with %d bytes\n",
2550 if (PageChecked(page)) {
2552 SetPageUptodate(page);
2553 ClearPageChecked(page);
2554 } else if (!PageUptodate(page) && copied == PAGE_SIZE)
2555 SetPageUptodate(page);
2557 if (!PageUptodate(page)) {
2559 unsigned offset = pos & (PAGE_SIZE - 1);
2563 /* this is probably better than directly calling
2564 partialpage_write since in this function the file handle is
2565 known which we might as well leverage */
2566 /* BB check if anything else missing out of ppw
2567 such as updating last write time */
2568 page_data = kmap(page);
2569 rc = cifs_write(cfile, pid, page_data + offset, copied, &pos);
2570 /* if (rc < 0) should we set writebehind rc? */
2577 set_page_dirty(page);
2581 spin_lock(&inode->i_lock);
2582 if (pos > inode->i_size)
2583 i_size_write(inode, pos);
2584 spin_unlock(&inode->i_lock);
2593 int cifs_strict_fsync(struct file *file, loff_t start, loff_t end,
2598 struct cifs_tcon *tcon;
2599 struct TCP_Server_Info *server;
2600 struct cifsFileInfo *smbfile = file->private_data;
2601 struct inode *inode = file_inode(file);
2602 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
2604 rc = file_write_and_wait_range(file, start, end);
2606 trace_cifs_fsync_err(inode->i_ino, rc);
2612 cifs_dbg(FYI, "Sync file - name: %pD datasync: 0x%x\n",
2615 if (!CIFS_CACHE_READ(CIFS_I(inode))) {
2616 rc = cifs_zap_mapping(inode);
2618 cifs_dbg(FYI, "rc: %d during invalidate phase\n", rc);
2619 rc = 0; /* don't care about it in fsync */
2623 tcon = tlink_tcon(smbfile->tlink);
2624 if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) {
2625 server = tcon->ses->server;
2626 if (server->ops->flush)
2627 rc = server->ops->flush(xid, tcon, &smbfile->fid);
2636 int cifs_fsync(struct file *file, loff_t start, loff_t end, int datasync)
2640 struct cifs_tcon *tcon;
2641 struct TCP_Server_Info *server;
2642 struct cifsFileInfo *smbfile = file->private_data;
2643 struct cifs_sb_info *cifs_sb = CIFS_FILE_SB(file);
2645 rc = file_write_and_wait_range(file, start, end);
2647 trace_cifs_fsync_err(file_inode(file)->i_ino, rc);
2653 cifs_dbg(FYI, "Sync file - name: %pD datasync: 0x%x\n",
2656 tcon = tlink_tcon(smbfile->tlink);
2657 if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) {
2658 server = tcon->ses->server;
2659 if (server->ops->flush)
2660 rc = server->ops->flush(xid, tcon, &smbfile->fid);
2670 * As file closes, flush all cached write data for this inode checking
2671 * for write behind errors.
2673 int cifs_flush(struct file *file, fl_owner_t id)
2675 struct inode *inode = file_inode(file);
2678 if (file->f_mode & FMODE_WRITE)
2679 rc = filemap_write_and_wait(inode->i_mapping);
2681 cifs_dbg(FYI, "Flush inode %p file %p rc %d\n", inode, file, rc);
2683 trace_cifs_flush_err(inode->i_ino, rc);
2688 cifs_write_allocate_pages(struct page **pages, unsigned long num_pages)
2693 for (i = 0; i < num_pages; i++) {
2694 pages[i] = alloc_page(GFP_KERNEL|__GFP_HIGHMEM);
2697 * save number of pages we have already allocated and
2698 * return with ENOMEM error
2707 for (i = 0; i < num_pages; i++)
2714 size_t get_numpages(const size_t wsize, const size_t len, size_t *cur_len)
2719 clen = min_t(const size_t, len, wsize);
2720 num_pages = DIV_ROUND_UP(clen, PAGE_SIZE);
2729 cifs_uncached_writedata_release(struct kref *refcount)
2732 struct cifs_writedata *wdata = container_of(refcount,
2733 struct cifs_writedata, refcount);
2735 kref_put(&wdata->ctx->refcount, cifs_aio_ctx_release);
2736 for (i = 0; i < wdata->nr_pages; i++)
2737 put_page(wdata->pages[i]);
2738 cifs_writedata_release(refcount);
2741 static void collect_uncached_write_data(struct cifs_aio_ctx *ctx);
2744 cifs_uncached_writev_complete(struct work_struct *work)
2746 struct cifs_writedata *wdata = container_of(work,
2747 struct cifs_writedata, work);
2748 struct inode *inode = d_inode(wdata->cfile->dentry);
2749 struct cifsInodeInfo *cifsi = CIFS_I(inode);
2751 spin_lock(&inode->i_lock);
2752 cifs_update_eof(cifsi, wdata->offset, wdata->bytes);
2753 if (cifsi->server_eof > inode->i_size)
2754 i_size_write(inode, cifsi->server_eof);
2755 spin_unlock(&inode->i_lock);
2757 complete(&wdata->done);
2758 collect_uncached_write_data(wdata->ctx);
2759 /* the below call can possibly free the last ref to aio ctx */
2760 kref_put(&wdata->refcount, cifs_uncached_writedata_release);
2764 wdata_fill_from_iovec(struct cifs_writedata *wdata, struct iov_iter *from,
2765 size_t *len, unsigned long *num_pages)
2767 size_t save_len, copied, bytes, cur_len = *len;
2768 unsigned long i, nr_pages = *num_pages;
2771 for (i = 0; i < nr_pages; i++) {
2772 bytes = min_t(const size_t, cur_len, PAGE_SIZE);
2773 copied = copy_page_from_iter(wdata->pages[i], 0, bytes, from);
2776 * If we didn't copy as much as we expected, then that
2777 * may mean we trod into an unmapped area. Stop copying
2778 * at that point. On the next pass through the big
2779 * loop, we'll likely end up getting a zero-length
2780 * write and bailing out of it.
2785 cur_len = save_len - cur_len;
2789 * If we have no data to send, then that probably means that
2790 * the copy above failed altogether. That's most likely because
2791 * the address in the iovec was bogus. Return -EFAULT and let
2792 * the caller free anything we allocated and bail out.
2798 * i + 1 now represents the number of pages we actually used in
2799 * the copy phase above.
2806 cifs_resend_wdata(struct cifs_writedata *wdata, struct list_head *wdata_list,
2807 struct cifs_aio_ctx *ctx)
2810 struct cifs_credits credits;
2812 struct TCP_Server_Info *server = wdata->server;
2815 if (wdata->cfile->invalidHandle) {
2816 rc = cifs_reopen_file(wdata->cfile, false);
2825 * Wait for credits to resend this wdata.
2826 * Note: we are attempting to resend the whole wdata not in
2830 rc = server->ops->wait_mtu_credits(server, wdata->bytes,
2835 if (wsize < wdata->bytes) {
2836 add_credits_and_wake_if(server, &credits, 0);
2839 } while (wsize < wdata->bytes);
2840 wdata->credits = credits;
2842 rc = adjust_credits(server, &wdata->credits, wdata->bytes);
2845 if (wdata->cfile->invalidHandle)
2848 #ifdef CONFIG_CIFS_SMB_DIRECT
2850 wdata->mr->need_invalidate = true;
2851 smbd_deregister_mr(wdata->mr);
2855 rc = server->ops->async_writev(wdata,
2856 cifs_uncached_writedata_release);
2860 /* If the write was successfully sent, we are done */
2862 list_add_tail(&wdata->list, wdata_list);
2866 /* Roll back credits and retry if needed */
2867 add_credits_and_wake_if(server, &wdata->credits, 0);
2868 } while (rc == -EAGAIN);
2871 kref_put(&wdata->refcount, cifs_uncached_writedata_release);
2876 cifs_write_from_iter(loff_t offset, size_t len, struct iov_iter *from,
2877 struct cifsFileInfo *open_file,
2878 struct cifs_sb_info *cifs_sb, struct list_head *wdata_list,
2879 struct cifs_aio_ctx *ctx)
2883 unsigned long nr_pages, num_pages, i;
2884 struct cifs_writedata *wdata;
2885 struct iov_iter saved_from = *from;
2886 loff_t saved_offset = offset;
2888 struct TCP_Server_Info *server;
2889 struct page **pagevec;
2893 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
2894 pid = open_file->pid;
2896 pid = current->tgid;
2898 server = cifs_pick_channel(tlink_tcon(open_file->tlink)->ses);
2903 struct cifs_credits credits_on_stack;
2904 struct cifs_credits *credits = &credits_on_stack;
2906 if (open_file->invalidHandle) {
2907 rc = cifs_reopen_file(open_file, false);
2914 rc = server->ops->wait_mtu_credits(server, cifs_sb->ctx->wsize,
2919 cur_len = min_t(const size_t, len, wsize);
2921 if (ctx->direct_io) {
2924 result = iov_iter_get_pages_alloc(
2925 from, &pagevec, cur_len, &start);
2928 "direct_writev couldn't get user pages (rc=%zd) iter type %d iov_offset %zd count %zd\n",
2929 result, iov_iter_type(from),
2930 from->iov_offset, from->count);
2934 add_credits_and_wake_if(server, credits, 0);
2937 cur_len = (size_t)result;
2938 iov_iter_advance(from, cur_len);
2941 (cur_len + start + PAGE_SIZE - 1) / PAGE_SIZE;
2943 wdata = cifs_writedata_direct_alloc(pagevec,
2944 cifs_uncached_writev_complete);
2947 add_credits_and_wake_if(server, credits, 0);
2952 wdata->page_offset = start;
2955 cur_len - (PAGE_SIZE - start) -
2956 (nr_pages - 2) * PAGE_SIZE :
2959 nr_pages = get_numpages(wsize, len, &cur_len);
2960 wdata = cifs_writedata_alloc(nr_pages,
2961 cifs_uncached_writev_complete);
2964 add_credits_and_wake_if(server, credits, 0);
2968 rc = cifs_write_allocate_pages(wdata->pages, nr_pages);
2970 kvfree(wdata->pages);
2972 add_credits_and_wake_if(server, credits, 0);
2976 num_pages = nr_pages;
2977 rc = wdata_fill_from_iovec(
2978 wdata, from, &cur_len, &num_pages);
2980 for (i = 0; i < nr_pages; i++)
2981 put_page(wdata->pages[i]);
2982 kvfree(wdata->pages);
2984 add_credits_and_wake_if(server, credits, 0);
2989 * Bring nr_pages down to the number of pages we
2990 * actually used, and free any pages that we didn't use.
2992 for ( ; nr_pages > num_pages; nr_pages--)
2993 put_page(wdata->pages[nr_pages - 1]);
2995 wdata->tailsz = cur_len - ((nr_pages - 1) * PAGE_SIZE);
2998 wdata->sync_mode = WB_SYNC_ALL;
2999 wdata->nr_pages = nr_pages;
3000 wdata->offset = (__u64)offset;
3001 wdata->cfile = cifsFileInfo_get(open_file);
3002 wdata->server = server;
3004 wdata->bytes = cur_len;
3005 wdata->pagesz = PAGE_SIZE;
3006 wdata->credits = credits_on_stack;
3008 kref_get(&ctx->refcount);
3010 rc = adjust_credits(server, &wdata->credits, wdata->bytes);
3013 if (wdata->cfile->invalidHandle)
3016 rc = server->ops->async_writev(wdata,
3017 cifs_uncached_writedata_release);
3021 add_credits_and_wake_if(server, &wdata->credits, 0);
3022 kref_put(&wdata->refcount,
3023 cifs_uncached_writedata_release);
3024 if (rc == -EAGAIN) {
3026 iov_iter_advance(from, offset - saved_offset);
3032 list_add_tail(&wdata->list, wdata_list);
3041 static void collect_uncached_write_data(struct cifs_aio_ctx *ctx)
3043 struct cifs_writedata *wdata, *tmp;
3044 struct cifs_tcon *tcon;
3045 struct cifs_sb_info *cifs_sb;
3046 struct dentry *dentry = ctx->cfile->dentry;
3049 tcon = tlink_tcon(ctx->cfile->tlink);
3050 cifs_sb = CIFS_SB(dentry->d_sb);
3052 mutex_lock(&ctx->aio_mutex);
3054 if (list_empty(&ctx->list)) {
3055 mutex_unlock(&ctx->aio_mutex);
3061 * Wait for and collect replies for any successful sends in order of
3062 * increasing offset. Once an error is hit, then return without waiting
3063 * for any more replies.
3066 list_for_each_entry_safe(wdata, tmp, &ctx->list, list) {
3068 if (!try_wait_for_completion(&wdata->done)) {
3069 mutex_unlock(&ctx->aio_mutex);
3076 ctx->total_len += wdata->bytes;
3078 /* resend call if it's a retryable error */
3079 if (rc == -EAGAIN) {
3080 struct list_head tmp_list;
3081 struct iov_iter tmp_from = ctx->iter;
3083 INIT_LIST_HEAD(&tmp_list);
3084 list_del_init(&wdata->list);
3087 rc = cifs_resend_wdata(
3088 wdata, &tmp_list, ctx);
3090 iov_iter_advance(&tmp_from,
3091 wdata->offset - ctx->pos);
3093 rc = cifs_write_from_iter(wdata->offset,
3094 wdata->bytes, &tmp_from,
3095 ctx->cfile, cifs_sb, &tmp_list,
3098 kref_put(&wdata->refcount,
3099 cifs_uncached_writedata_release);
3102 list_splice(&tmp_list, &ctx->list);
3106 list_del_init(&wdata->list);
3107 kref_put(&wdata->refcount, cifs_uncached_writedata_release);
3110 cifs_stats_bytes_written(tcon, ctx->total_len);
3111 set_bit(CIFS_INO_INVALID_MAPPING, &CIFS_I(dentry->d_inode)->flags);
3113 ctx->rc = (rc == 0) ? ctx->total_len : rc;
3115 mutex_unlock(&ctx->aio_mutex);
3117 if (ctx->iocb && ctx->iocb->ki_complete)
3118 ctx->iocb->ki_complete(ctx->iocb, ctx->rc, 0);
3120 complete(&ctx->done);
3123 static ssize_t __cifs_writev(
3124 struct kiocb *iocb, struct iov_iter *from, bool direct)
3126 struct file *file = iocb->ki_filp;
3127 ssize_t total_written = 0;
3128 struct cifsFileInfo *cfile;
3129 struct cifs_tcon *tcon;
3130 struct cifs_sb_info *cifs_sb;
3131 struct cifs_aio_ctx *ctx;
3132 struct iov_iter saved_from = *from;
3133 size_t len = iov_iter_count(from);
3137 * iov_iter_get_pages_alloc doesn't work with ITER_KVEC.
3138 * In this case, fall back to non-direct write function.
3139 * this could be improved by getting pages directly in ITER_KVEC
3141 if (direct && iov_iter_is_kvec(from)) {
3142 cifs_dbg(FYI, "use non-direct cifs_writev for kvec I/O\n");
3146 rc = generic_write_checks(iocb, from);
3150 cifs_sb = CIFS_FILE_SB(file);
3151 cfile = file->private_data;
3152 tcon = tlink_tcon(cfile->tlink);
3154 if (!tcon->ses->server->ops->async_writev)
3157 ctx = cifs_aio_ctx_alloc();
3161 ctx->cfile = cifsFileInfo_get(cfile);
3163 if (!is_sync_kiocb(iocb))
3166 ctx->pos = iocb->ki_pos;
3169 ctx->direct_io = true;
3173 rc = setup_aio_ctx_iter(ctx, from, WRITE);
3175 kref_put(&ctx->refcount, cifs_aio_ctx_release);
3180 /* grab a lock here due to read response handlers can access ctx */
3181 mutex_lock(&ctx->aio_mutex);
3183 rc = cifs_write_from_iter(iocb->ki_pos, ctx->len, &saved_from,
3184 cfile, cifs_sb, &ctx->list, ctx);
3187 * If at least one write was successfully sent, then discard any rc
3188 * value from the later writes. If the other write succeeds, then
3189 * we'll end up returning whatever was written. If it fails, then
3190 * we'll get a new rc value from that.
3192 if (!list_empty(&ctx->list))
3195 mutex_unlock(&ctx->aio_mutex);
3198 kref_put(&ctx->refcount, cifs_aio_ctx_release);
3202 if (!is_sync_kiocb(iocb)) {
3203 kref_put(&ctx->refcount, cifs_aio_ctx_release);
3204 return -EIOCBQUEUED;
3207 rc = wait_for_completion_killable(&ctx->done);
3209 mutex_lock(&ctx->aio_mutex);
3210 ctx->rc = rc = -EINTR;
3211 total_written = ctx->total_len;
3212 mutex_unlock(&ctx->aio_mutex);
3215 total_written = ctx->total_len;
3218 kref_put(&ctx->refcount, cifs_aio_ctx_release);
3220 if (unlikely(!total_written))
3223 iocb->ki_pos += total_written;
3224 return total_written;
3227 ssize_t cifs_direct_writev(struct kiocb *iocb, struct iov_iter *from)
3229 return __cifs_writev(iocb, from, true);
3232 ssize_t cifs_user_writev(struct kiocb *iocb, struct iov_iter *from)
3234 return __cifs_writev(iocb, from, false);
3238 cifs_writev(struct kiocb *iocb, struct iov_iter *from)
3240 struct file *file = iocb->ki_filp;
3241 struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
3242 struct inode *inode = file->f_mapping->host;
3243 struct cifsInodeInfo *cinode = CIFS_I(inode);
3244 struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
3249 * We need to hold the sem to be sure nobody modifies lock list
3250 * with a brlock that prevents writing.
3252 down_read(&cinode->lock_sem);
3254 rc = generic_write_checks(iocb, from);
3258 if (!cifs_find_lock_conflict(cfile, iocb->ki_pos, iov_iter_count(from),
3259 server->vals->exclusive_lock_type, 0,
3260 NULL, CIFS_WRITE_OP))
3261 rc = __generic_file_write_iter(iocb, from);
3265 up_read(&cinode->lock_sem);
3266 inode_unlock(inode);
3269 rc = generic_write_sync(iocb, rc);
3274 cifs_strict_writev(struct kiocb *iocb, struct iov_iter *from)
3276 struct inode *inode = file_inode(iocb->ki_filp);
3277 struct cifsInodeInfo *cinode = CIFS_I(inode);
3278 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
3279 struct cifsFileInfo *cfile = (struct cifsFileInfo *)
3280 iocb->ki_filp->private_data;
3281 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
3284 written = cifs_get_writer(cinode);
3288 if (CIFS_CACHE_WRITE(cinode)) {
3289 if (cap_unix(tcon->ses) &&
3290 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability))
3291 && ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0)) {
3292 written = generic_file_write_iter(iocb, from);
3295 written = cifs_writev(iocb, from);
3299 * For non-oplocked files in strict cache mode we need to write the data
3300 * to the server exactly from the pos to pos+len-1 rather than flush all
3301 * affected pages because it may cause a error with mandatory locks on
3302 * these pages but not on the region from pos to ppos+len-1.
3304 written = cifs_user_writev(iocb, from);
3305 if (CIFS_CACHE_READ(cinode)) {
3307 * We have read level caching and we have just sent a write
3308 * request to the server thus making data in the cache stale.
3309 * Zap the cache and set oplock/lease level to NONE to avoid
3310 * reading stale data from the cache. All subsequent read
3311 * operations will read new data from the server.
3313 cifs_zap_mapping(inode);
3314 cifs_dbg(FYI, "Set Oplock/Lease to NONE for inode=%p after write\n",
3319 cifs_put_writer(cinode);
3323 static struct cifs_readdata *
3324 cifs_readdata_direct_alloc(struct page **pages, work_func_t complete)
3326 struct cifs_readdata *rdata;
3328 rdata = kzalloc(sizeof(*rdata), GFP_KERNEL);
3329 if (rdata != NULL) {
3330 rdata->pages = pages;
3331 kref_init(&rdata->refcount);
3332 INIT_LIST_HEAD(&rdata->list);
3333 init_completion(&rdata->done);
3334 INIT_WORK(&rdata->work, complete);
3340 static struct cifs_readdata *
3341 cifs_readdata_alloc(unsigned int nr_pages, work_func_t complete)
3343 struct page **pages =
3344 kcalloc(nr_pages, sizeof(struct page *), GFP_KERNEL);
3345 struct cifs_readdata *ret = NULL;
3348 ret = cifs_readdata_direct_alloc(pages, complete);
3357 cifs_readdata_release(struct kref *refcount)
3359 struct cifs_readdata *rdata = container_of(refcount,
3360 struct cifs_readdata, refcount);
3361 #ifdef CONFIG_CIFS_SMB_DIRECT
3363 smbd_deregister_mr(rdata->mr);
3368 cifsFileInfo_put(rdata->cfile);
3370 kvfree(rdata->pages);
3375 cifs_read_allocate_pages(struct cifs_readdata *rdata, unsigned int nr_pages)
3381 for (i = 0; i < nr_pages; i++) {
3382 page = alloc_page(GFP_KERNEL|__GFP_HIGHMEM);
3387 rdata->pages[i] = page;
3391 unsigned int nr_page_failed = i;
3393 for (i = 0; i < nr_page_failed; i++) {
3394 put_page(rdata->pages[i]);
3395 rdata->pages[i] = NULL;
3402 cifs_uncached_readdata_release(struct kref *refcount)
3404 struct cifs_readdata *rdata = container_of(refcount,
3405 struct cifs_readdata, refcount);
3408 kref_put(&rdata->ctx->refcount, cifs_aio_ctx_release);
3409 for (i = 0; i < rdata->nr_pages; i++) {
3410 put_page(rdata->pages[i]);
3412 cifs_readdata_release(refcount);
3416 * cifs_readdata_to_iov - copy data from pages in response to an iovec
3417 * @rdata: the readdata response with list of pages holding data
3418 * @iter: destination for our data
3420 * This function copies data from a list of pages in a readdata response into
3421 * an array of iovecs. It will first calculate where the data should go
3422 * based on the info in the readdata and then copy the data into that spot.
3425 cifs_readdata_to_iov(struct cifs_readdata *rdata, struct iov_iter *iter)
3427 size_t remaining = rdata->got_bytes;
3430 for (i = 0; i < rdata->nr_pages; i++) {
3431 struct page *page = rdata->pages[i];
3432 size_t copy = min_t(size_t, remaining, PAGE_SIZE);
3435 if (unlikely(iov_iter_is_pipe(iter))) {
3436 void *addr = kmap_atomic(page);
3438 written = copy_to_iter(addr, copy, iter);
3439 kunmap_atomic(addr);
3441 written = copy_page_to_iter(page, 0, copy, iter);
3442 remaining -= written;
3443 if (written < copy && iov_iter_count(iter) > 0)
3446 return remaining ? -EFAULT : 0;
3449 static void collect_uncached_read_data(struct cifs_aio_ctx *ctx);
3452 cifs_uncached_readv_complete(struct work_struct *work)
3454 struct cifs_readdata *rdata = container_of(work,
3455 struct cifs_readdata, work);
3457 complete(&rdata->done);
3458 collect_uncached_read_data(rdata->ctx);
3459 /* the below call can possibly free the last ref to aio ctx */
3460 kref_put(&rdata->refcount, cifs_uncached_readdata_release);
3464 uncached_fill_pages(struct TCP_Server_Info *server,
3465 struct cifs_readdata *rdata, struct iov_iter *iter,
3470 unsigned int nr_pages = rdata->nr_pages;
3471 unsigned int page_offset = rdata->page_offset;
3473 rdata->got_bytes = 0;
3474 rdata->tailsz = PAGE_SIZE;
3475 for (i = 0; i < nr_pages; i++) {
3476 struct page *page = rdata->pages[i];
3478 unsigned int segment_size = rdata->pagesz;
3481 segment_size -= page_offset;
3487 /* no need to hold page hostage */
3488 rdata->pages[i] = NULL;
3495 if (len >= segment_size)
3496 /* enough data to fill the page */
3499 rdata->tailsz = len;
3503 result = copy_page_from_iter(
3504 page, page_offset, n, iter);
3505 #ifdef CONFIG_CIFS_SMB_DIRECT
3510 result = cifs_read_page_from_socket(
3511 server, page, page_offset, n);
3515 rdata->got_bytes += result;
3518 return rdata->got_bytes > 0 && result != -ECONNABORTED ?
3519 rdata->got_bytes : result;
3523 cifs_uncached_read_into_pages(struct TCP_Server_Info *server,
3524 struct cifs_readdata *rdata, unsigned int len)
3526 return uncached_fill_pages(server, rdata, NULL, len);
3530 cifs_uncached_copy_into_pages(struct TCP_Server_Info *server,
3531 struct cifs_readdata *rdata,
3532 struct iov_iter *iter)
3534 return uncached_fill_pages(server, rdata, iter, iter->count);
3537 static int cifs_resend_rdata(struct cifs_readdata *rdata,
3538 struct list_head *rdata_list,
3539 struct cifs_aio_ctx *ctx)
3542 struct cifs_credits credits;
3544 struct TCP_Server_Info *server;
3546 /* XXX: should we pick a new channel here? */
3547 server = rdata->server;
3550 if (rdata->cfile->invalidHandle) {
3551 rc = cifs_reopen_file(rdata->cfile, true);
3559 * Wait for credits to resend this rdata.
3560 * Note: we are attempting to resend the whole rdata not in
3564 rc = server->ops->wait_mtu_credits(server, rdata->bytes,
3570 if (rsize < rdata->bytes) {
3571 add_credits_and_wake_if(server, &credits, 0);
3574 } while (rsize < rdata->bytes);
3575 rdata->credits = credits;
3577 rc = adjust_credits(server, &rdata->credits, rdata->bytes);
3579 if (rdata->cfile->invalidHandle)
3582 #ifdef CONFIG_CIFS_SMB_DIRECT
3584 rdata->mr->need_invalidate = true;
3585 smbd_deregister_mr(rdata->mr);
3589 rc = server->ops->async_readv(rdata);
3593 /* If the read was successfully sent, we are done */
3595 /* Add to aio pending list */
3596 list_add_tail(&rdata->list, rdata_list);
3600 /* Roll back credits and retry if needed */
3601 add_credits_and_wake_if(server, &rdata->credits, 0);
3602 } while (rc == -EAGAIN);
3605 kref_put(&rdata->refcount, cifs_uncached_readdata_release);
3610 cifs_send_async_read(loff_t offset, size_t len, struct cifsFileInfo *open_file,
3611 struct cifs_sb_info *cifs_sb, struct list_head *rdata_list,
3612 struct cifs_aio_ctx *ctx)
3614 struct cifs_readdata *rdata;
3615 unsigned int npages, rsize;
3616 struct cifs_credits credits_on_stack;
3617 struct cifs_credits *credits = &credits_on_stack;
3621 struct TCP_Server_Info *server;
3622 struct page **pagevec;
3624 struct iov_iter direct_iov = ctx->iter;
3626 server = cifs_pick_channel(tlink_tcon(open_file->tlink)->ses);
3628 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
3629 pid = open_file->pid;
3631 pid = current->tgid;
3634 iov_iter_advance(&direct_iov, offset - ctx->pos);
3637 if (open_file->invalidHandle) {
3638 rc = cifs_reopen_file(open_file, true);
3645 rc = server->ops->wait_mtu_credits(server, cifs_sb->ctx->rsize,
3650 cur_len = min_t(const size_t, len, rsize);
3652 if (ctx->direct_io) {
3655 result = iov_iter_get_pages_alloc(
3656 &direct_iov, &pagevec,
3660 "Couldn't get user pages (rc=%zd) iter type %d iov_offset %zd count %zd\n",
3661 result, iov_iter_type(&direct_iov),
3662 direct_iov.iov_offset,
3667 add_credits_and_wake_if(server, credits, 0);
3670 cur_len = (size_t)result;
3671 iov_iter_advance(&direct_iov, cur_len);
3673 rdata = cifs_readdata_direct_alloc(
3674 pagevec, cifs_uncached_readv_complete);
3676 add_credits_and_wake_if(server, credits, 0);
3681 npages = (cur_len + start + PAGE_SIZE-1) / PAGE_SIZE;
3682 rdata->page_offset = start;
3683 rdata->tailsz = npages > 1 ?
3684 cur_len-(PAGE_SIZE-start)-(npages-2)*PAGE_SIZE :
3689 npages = DIV_ROUND_UP(cur_len, PAGE_SIZE);
3690 /* allocate a readdata struct */
3691 rdata = cifs_readdata_alloc(npages,
3692 cifs_uncached_readv_complete);
3694 add_credits_and_wake_if(server, credits, 0);
3699 rc = cifs_read_allocate_pages(rdata, npages);
3701 kvfree(rdata->pages);
3703 add_credits_and_wake_if(server, credits, 0);
3707 rdata->tailsz = PAGE_SIZE;
3710 rdata->server = server;
3711 rdata->cfile = cifsFileInfo_get(open_file);
3712 rdata->nr_pages = npages;
3713 rdata->offset = offset;
3714 rdata->bytes = cur_len;
3716 rdata->pagesz = PAGE_SIZE;
3717 rdata->read_into_pages = cifs_uncached_read_into_pages;
3718 rdata->copy_into_pages = cifs_uncached_copy_into_pages;
3719 rdata->credits = credits_on_stack;
3721 kref_get(&ctx->refcount);
3723 rc = adjust_credits(server, &rdata->credits, rdata->bytes);
3726 if (rdata->cfile->invalidHandle)
3729 rc = server->ops->async_readv(rdata);
3733 add_credits_and_wake_if(server, &rdata->credits, 0);
3734 kref_put(&rdata->refcount,
3735 cifs_uncached_readdata_release);
3736 if (rc == -EAGAIN) {
3737 iov_iter_revert(&direct_iov, cur_len);
3743 list_add_tail(&rdata->list, rdata_list);
3752 collect_uncached_read_data(struct cifs_aio_ctx *ctx)
3754 struct cifs_readdata *rdata, *tmp;
3755 struct iov_iter *to = &ctx->iter;
3756 struct cifs_sb_info *cifs_sb;
3759 cifs_sb = CIFS_SB(ctx->cfile->dentry->d_sb);
3761 mutex_lock(&ctx->aio_mutex);
3763 if (list_empty(&ctx->list)) {
3764 mutex_unlock(&ctx->aio_mutex);
3769 /* the loop below should proceed in the order of increasing offsets */
3771 list_for_each_entry_safe(rdata, tmp, &ctx->list, list) {
3773 if (!try_wait_for_completion(&rdata->done)) {
3774 mutex_unlock(&ctx->aio_mutex);
3778 if (rdata->result == -EAGAIN) {
3779 /* resend call if it's a retryable error */
3780 struct list_head tmp_list;
3781 unsigned int got_bytes = rdata->got_bytes;
3783 list_del_init(&rdata->list);
3784 INIT_LIST_HEAD(&tmp_list);
3787 * Got a part of data and then reconnect has
3788 * happened -- fill the buffer and continue
3791 if (got_bytes && got_bytes < rdata->bytes) {
3793 if (!ctx->direct_io)
3794 rc = cifs_readdata_to_iov(rdata, to);
3796 kref_put(&rdata->refcount,
3797 cifs_uncached_readdata_release);
3802 if (ctx->direct_io) {
3804 * Re-use rdata as this is a
3807 rc = cifs_resend_rdata(
3811 rc = cifs_send_async_read(
3812 rdata->offset + got_bytes,
3813 rdata->bytes - got_bytes,
3814 rdata->cfile, cifs_sb,
3817 kref_put(&rdata->refcount,
3818 cifs_uncached_readdata_release);
3821 list_splice(&tmp_list, &ctx->list);
3824 } else if (rdata->result)
3826 else if (!ctx->direct_io)
3827 rc = cifs_readdata_to_iov(rdata, to);
3829 /* if there was a short read -- discard anything left */
3830 if (rdata->got_bytes && rdata->got_bytes < rdata->bytes)
3833 ctx->total_len += rdata->got_bytes;
3835 list_del_init(&rdata->list);
3836 kref_put(&rdata->refcount, cifs_uncached_readdata_release);
3839 if (!ctx->direct_io)
3840 ctx->total_len = ctx->len - iov_iter_count(to);
3842 /* mask nodata case */
3846 ctx->rc = (rc == 0) ? (ssize_t)ctx->total_len : rc;
3848 mutex_unlock(&ctx->aio_mutex);
3850 if (ctx->iocb && ctx->iocb->ki_complete)
3851 ctx->iocb->ki_complete(ctx->iocb, ctx->rc, 0);
3853 complete(&ctx->done);
3856 static ssize_t __cifs_readv(
3857 struct kiocb *iocb, struct iov_iter *to, bool direct)
3860 struct file *file = iocb->ki_filp;
3861 struct cifs_sb_info *cifs_sb;
3862 struct cifsFileInfo *cfile;
3863 struct cifs_tcon *tcon;
3864 ssize_t rc, total_read = 0;
3865 loff_t offset = iocb->ki_pos;
3866 struct cifs_aio_ctx *ctx;
3869 * iov_iter_get_pages_alloc() doesn't work with ITER_KVEC,
3870 * fall back to data copy read path
3871 * this could be improved by getting pages directly in ITER_KVEC
3873 if (direct && iov_iter_is_kvec(to)) {
3874 cifs_dbg(FYI, "use non-direct cifs_user_readv for kvec I/O\n");
3878 len = iov_iter_count(to);
3882 cifs_sb = CIFS_FILE_SB(file);
3883 cfile = file->private_data;
3884 tcon = tlink_tcon(cfile->tlink);
3886 if (!tcon->ses->server->ops->async_readv)
3889 if ((file->f_flags & O_ACCMODE) == O_WRONLY)
3890 cifs_dbg(FYI, "attempting read on write only file instance\n");
3892 ctx = cifs_aio_ctx_alloc();
3896 ctx->cfile = cifsFileInfo_get(cfile);
3898 if (!is_sync_kiocb(iocb))
3901 if (iter_is_iovec(to))
3902 ctx->should_dirty = true;
3906 ctx->direct_io = true;
3910 rc = setup_aio_ctx_iter(ctx, to, READ);
3912 kref_put(&ctx->refcount, cifs_aio_ctx_release);
3918 /* grab a lock here due to read response handlers can access ctx */
3919 mutex_lock(&ctx->aio_mutex);
3921 rc = cifs_send_async_read(offset, len, cfile, cifs_sb, &ctx->list, ctx);
3923 /* if at least one read request send succeeded, then reset rc */
3924 if (!list_empty(&ctx->list))
3927 mutex_unlock(&ctx->aio_mutex);
3930 kref_put(&ctx->refcount, cifs_aio_ctx_release);
3934 if (!is_sync_kiocb(iocb)) {
3935 kref_put(&ctx->refcount, cifs_aio_ctx_release);
3936 return -EIOCBQUEUED;
3939 rc = wait_for_completion_killable(&ctx->done);
3941 mutex_lock(&ctx->aio_mutex);
3942 ctx->rc = rc = -EINTR;
3943 total_read = ctx->total_len;
3944 mutex_unlock(&ctx->aio_mutex);
3947 total_read = ctx->total_len;
3950 kref_put(&ctx->refcount, cifs_aio_ctx_release);
3953 iocb->ki_pos += total_read;
3959 ssize_t cifs_direct_readv(struct kiocb *iocb, struct iov_iter *to)
3961 return __cifs_readv(iocb, to, true);
3964 ssize_t cifs_user_readv(struct kiocb *iocb, struct iov_iter *to)
3966 return __cifs_readv(iocb, to, false);
3970 cifs_strict_readv(struct kiocb *iocb, struct iov_iter *to)
3972 struct inode *inode = file_inode(iocb->ki_filp);
3973 struct cifsInodeInfo *cinode = CIFS_I(inode);
3974 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
3975 struct cifsFileInfo *cfile = (struct cifsFileInfo *)
3976 iocb->ki_filp->private_data;
3977 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
3981 * In strict cache mode we need to read from the server all the time
3982 * if we don't have level II oplock because the server can delay mtime
3983 * change - so we can't make a decision about inode invalidating.
3984 * And we can also fail with pagereading if there are mandatory locks
3985 * on pages affected by this read but not on the region from pos to
3988 if (!CIFS_CACHE_READ(cinode))
3989 return cifs_user_readv(iocb, to);
3991 if (cap_unix(tcon->ses) &&
3992 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
3993 ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
3994 return generic_file_read_iter(iocb, to);
3997 * We need to hold the sem to be sure nobody modifies lock list
3998 * with a brlock that prevents reading.
4000 down_read(&cinode->lock_sem);
4001 if (!cifs_find_lock_conflict(cfile, iocb->ki_pos, iov_iter_count(to),
4002 tcon->ses->server->vals->shared_lock_type,
4003 0, NULL, CIFS_READ_OP))
4004 rc = generic_file_read_iter(iocb, to);
4005 up_read(&cinode->lock_sem);
4010 cifs_read(struct file *file, char *read_data, size_t read_size, loff_t *offset)
4013 unsigned int bytes_read = 0;
4014 unsigned int total_read;
4015 unsigned int current_read_size;
4017 struct cifs_sb_info *cifs_sb;
4018 struct cifs_tcon *tcon;
4019 struct TCP_Server_Info *server;
4022 struct cifsFileInfo *open_file;
4023 struct cifs_io_parms io_parms = {0};
4024 int buf_type = CIFS_NO_BUFFER;
4028 cifs_sb = CIFS_FILE_SB(file);
4030 /* FIXME: set up handlers for larger reads and/or convert to async */
4031 rsize = min_t(unsigned int, cifs_sb->ctx->rsize, CIFSMaxBufSize);
4033 if (file->private_data == NULL) {
4038 open_file = file->private_data;
4039 tcon = tlink_tcon(open_file->tlink);
4040 server = cifs_pick_channel(tcon->ses);
4042 if (!server->ops->sync_read) {
4047 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
4048 pid = open_file->pid;
4050 pid = current->tgid;
4052 if ((file->f_flags & O_ACCMODE) == O_WRONLY)
4053 cifs_dbg(FYI, "attempting read on write only file instance\n");
4055 for (total_read = 0, cur_offset = read_data; read_size > total_read;
4056 total_read += bytes_read, cur_offset += bytes_read) {
4058 current_read_size = min_t(uint, read_size - total_read,
4061 * For windows me and 9x we do not want to request more
4062 * than it negotiated since it will refuse the read
4065 if (!(tcon->ses->capabilities &
4066 tcon->ses->server->vals->cap_large_files)) {
4067 current_read_size = min_t(uint,
4068 current_read_size, CIFSMaxBufSize);
4070 if (open_file->invalidHandle) {
4071 rc = cifs_reopen_file(open_file, true);
4076 io_parms.tcon = tcon;
4077 io_parms.offset = *offset;
4078 io_parms.length = current_read_size;
4079 io_parms.server = server;
4080 rc = server->ops->sync_read(xid, &open_file->fid, &io_parms,
4081 &bytes_read, &cur_offset,
4083 } while (rc == -EAGAIN);
4085 if (rc || (bytes_read == 0)) {
4093 cifs_stats_bytes_read(tcon, total_read);
4094 *offset += bytes_read;
4102 * If the page is mmap'ed into a process' page tables, then we need to make
4103 * sure that it doesn't change while being written back.
4106 cifs_page_mkwrite(struct vm_fault *vmf)
4108 struct page *page = vmf->page;
4111 return VM_FAULT_LOCKED;
4114 static const struct vm_operations_struct cifs_file_vm_ops = {
4115 .fault = filemap_fault,
4116 .map_pages = filemap_map_pages,
4117 .page_mkwrite = cifs_page_mkwrite,
4120 int cifs_file_strict_mmap(struct file *file, struct vm_area_struct *vma)
4123 struct inode *inode = file_inode(file);
4127 if (!CIFS_CACHE_READ(CIFS_I(inode)))
4128 rc = cifs_zap_mapping(inode);
4130 rc = generic_file_mmap(file, vma);
4132 vma->vm_ops = &cifs_file_vm_ops;
4138 int cifs_file_mmap(struct file *file, struct vm_area_struct *vma)
4144 rc = cifs_revalidate_file(file);
4146 cifs_dbg(FYI, "Validation prior to mmap failed, error=%d\n",
4149 rc = generic_file_mmap(file, vma);
4151 vma->vm_ops = &cifs_file_vm_ops;
4158 cifs_readv_complete(struct work_struct *work)
4160 unsigned int i, got_bytes;
4161 struct cifs_readdata *rdata = container_of(work,
4162 struct cifs_readdata, work);
4164 got_bytes = rdata->got_bytes;
4165 for (i = 0; i < rdata->nr_pages; i++) {
4166 struct page *page = rdata->pages[i];
4168 lru_cache_add(page);
4170 if (rdata->result == 0 ||
4171 (rdata->result == -EAGAIN && got_bytes)) {
4172 flush_dcache_page(page);
4173 SetPageUptodate(page);
4178 if (rdata->result == 0 ||
4179 (rdata->result == -EAGAIN && got_bytes))
4180 cifs_readpage_to_fscache(rdata->mapping->host, page);
4182 got_bytes -= min_t(unsigned int, PAGE_SIZE, got_bytes);
4185 rdata->pages[i] = NULL;
4187 kref_put(&rdata->refcount, cifs_readdata_release);
4191 readpages_fill_pages(struct TCP_Server_Info *server,
4192 struct cifs_readdata *rdata, struct iov_iter *iter,
4199 unsigned int nr_pages = rdata->nr_pages;
4200 unsigned int page_offset = rdata->page_offset;
4202 /* determine the eof that the server (probably) has */
4203 eof = CIFS_I(rdata->mapping->host)->server_eof;
4204 eof_index = eof ? (eof - 1) >> PAGE_SHIFT : 0;
4205 cifs_dbg(FYI, "eof=%llu eof_index=%lu\n", eof, eof_index);
4207 rdata->got_bytes = 0;
4208 rdata->tailsz = PAGE_SIZE;
4209 for (i = 0; i < nr_pages; i++) {
4210 struct page *page = rdata->pages[i];
4211 unsigned int to_read = rdata->pagesz;
4215 to_read -= page_offset;
4221 if (len >= to_read) {
4223 } else if (len > 0) {
4224 /* enough for partial page, fill and zero the rest */
4225 zero_user(page, len + page_offset, to_read - len);
4226 n = rdata->tailsz = len;
4228 } else if (page->index > eof_index) {
4230 * The VFS will not try to do readahead past the
4231 * i_size, but it's possible that we have outstanding
4232 * writes with gaps in the middle and the i_size hasn't
4233 * caught up yet. Populate those with zeroed out pages
4234 * to prevent the VFS from repeatedly attempting to
4235 * fill them until the writes are flushed.
4237 zero_user(page, 0, PAGE_SIZE);
4238 lru_cache_add(page);
4239 flush_dcache_page(page);
4240 SetPageUptodate(page);
4243 rdata->pages[i] = NULL;
4247 /* no need to hold page hostage */
4248 lru_cache_add(page);
4251 rdata->pages[i] = NULL;
4257 result = copy_page_from_iter(
4258 page, page_offset, n, iter);
4259 #ifdef CONFIG_CIFS_SMB_DIRECT
4264 result = cifs_read_page_from_socket(
4265 server, page, page_offset, n);
4269 rdata->got_bytes += result;
4272 return rdata->got_bytes > 0 && result != -ECONNABORTED ?
4273 rdata->got_bytes : result;
4277 cifs_readpages_read_into_pages(struct TCP_Server_Info *server,
4278 struct cifs_readdata *rdata, unsigned int len)
4280 return readpages_fill_pages(server, rdata, NULL, len);
4284 cifs_readpages_copy_into_pages(struct TCP_Server_Info *server,
4285 struct cifs_readdata *rdata,
4286 struct iov_iter *iter)
4288 return readpages_fill_pages(server, rdata, iter, iter->count);
4292 readpages_get_pages(struct address_space *mapping, struct list_head *page_list,
4293 unsigned int rsize, struct list_head *tmplist,
4294 unsigned int *nr_pages, loff_t *offset, unsigned int *bytes)
4296 struct page *page, *tpage;
4297 unsigned int expected_index;
4299 gfp_t gfp = readahead_gfp_mask(mapping);
4301 INIT_LIST_HEAD(tmplist);
4303 page = lru_to_page(page_list);
4306 * Lock the page and put it in the cache. Since no one else
4307 * should have access to this page, we're safe to simply set
4308 * PG_locked without checking it first.
4310 __SetPageLocked(page);
4311 rc = add_to_page_cache_locked(page, mapping,
4314 /* give up if we can't stick it in the cache */
4316 __ClearPageLocked(page);
4320 /* move first page to the tmplist */
4321 *offset = (loff_t)page->index << PAGE_SHIFT;
4324 list_move_tail(&page->lru, tmplist);
4326 /* now try and add more pages onto the request */
4327 expected_index = page->index + 1;
4328 list_for_each_entry_safe_reverse(page, tpage, page_list, lru) {
4329 /* discontinuity ? */
4330 if (page->index != expected_index)
4333 /* would this page push the read over the rsize? */
4334 if (*bytes + PAGE_SIZE > rsize)
4337 __SetPageLocked(page);
4338 rc = add_to_page_cache_locked(page, mapping, page->index, gfp);
4340 __ClearPageLocked(page);
4343 list_move_tail(&page->lru, tmplist);
4344 (*bytes) += PAGE_SIZE;
4351 static int cifs_readpages(struct file *file, struct address_space *mapping,
4352 struct list_head *page_list, unsigned num_pages)
4356 struct list_head tmplist;
4357 struct cifsFileInfo *open_file = file->private_data;
4358 struct cifs_sb_info *cifs_sb = CIFS_FILE_SB(file);
4359 struct TCP_Server_Info *server;
4365 * Reads as many pages as possible from fscache. Returns -ENOBUFS
4366 * immediately if the cookie is negative
4368 * After this point, every page in the list might have PG_fscache set,
4369 * so we will need to clean that up off of every page we don't use.
4371 rc = cifs_readpages_from_fscache(mapping->host, mapping, page_list,
4378 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
4379 pid = open_file->pid;
4381 pid = current->tgid;
4384 server = cifs_pick_channel(tlink_tcon(open_file->tlink)->ses);
4386 cifs_dbg(FYI, "%s: file=%p mapping=%p num_pages=%u\n",
4387 __func__, file, mapping, num_pages);
4390 * Start with the page at end of list and move it to private
4391 * list. Do the same with any following pages until we hit
4392 * the rsize limit, hit an index discontinuity, or run out of
4393 * pages. Issue the async read and then start the loop again
4394 * until the list is empty.
4396 * Note that list order is important. The page_list is in
4397 * the order of declining indexes. When we put the pages in
4398 * the rdata->pages, then we want them in increasing order.
4400 while (!list_empty(page_list) && !err) {
4401 unsigned int i, nr_pages, bytes, rsize;
4403 struct page *page, *tpage;
4404 struct cifs_readdata *rdata;
4405 struct cifs_credits credits_on_stack;
4406 struct cifs_credits *credits = &credits_on_stack;
4408 if (open_file->invalidHandle) {
4409 rc = cifs_reopen_file(open_file, true);
4416 rc = server->ops->wait_mtu_credits(server, cifs_sb->ctx->rsize,
4422 * Give up immediately if rsize is too small to read an entire
4423 * page. The VFS will fall back to readpage. We should never
4424 * reach this point however since we set ra_pages to 0 when the
4425 * rsize is smaller than a cache page.
4427 if (unlikely(rsize < PAGE_SIZE)) {
4428 add_credits_and_wake_if(server, credits, 0);
4434 err = readpages_get_pages(mapping, page_list, rsize, &tmplist,
4435 &nr_pages, &offset, &bytes);
4437 add_credits_and_wake_if(server, credits, 0);
4441 rdata = cifs_readdata_alloc(nr_pages, cifs_readv_complete);
4443 /* best to give up if we're out of mem */
4444 list_for_each_entry_safe(page, tpage, &tmplist, lru) {
4445 list_del(&page->lru);
4446 lru_cache_add(page);
4451 add_credits_and_wake_if(server, credits, 0);
4455 rdata->cfile = cifsFileInfo_get(open_file);
4456 rdata->server = server;
4457 rdata->mapping = mapping;
4458 rdata->offset = offset;
4459 rdata->bytes = bytes;
4461 rdata->pagesz = PAGE_SIZE;
4462 rdata->tailsz = PAGE_SIZE;
4463 rdata->read_into_pages = cifs_readpages_read_into_pages;
4464 rdata->copy_into_pages = cifs_readpages_copy_into_pages;
4465 rdata->credits = credits_on_stack;
4467 list_for_each_entry_safe(page, tpage, &tmplist, lru) {
4468 list_del(&page->lru);
4469 rdata->pages[rdata->nr_pages++] = page;
4472 rc = adjust_credits(server, &rdata->credits, rdata->bytes);
4475 if (rdata->cfile->invalidHandle)
4478 rc = server->ops->async_readv(rdata);
4482 add_credits_and_wake_if(server, &rdata->credits, 0);
4483 for (i = 0; i < rdata->nr_pages; i++) {
4484 page = rdata->pages[i];
4485 lru_cache_add(page);
4489 /* Fallback to the readpage in error/reconnect cases */
4490 kref_put(&rdata->refcount, cifs_readdata_release);
4494 kref_put(&rdata->refcount, cifs_readdata_release);
4497 /* Any pages that have been shown to fscache but didn't get added to
4498 * the pagecache must be uncached before they get returned to the
4501 cifs_fscache_readpages_cancel(mapping->host, page_list);
4507 * cifs_readpage_worker must be called with the page pinned
4509 static int cifs_readpage_worker(struct file *file, struct page *page,
4515 /* Is the page cached? */
4516 rc = cifs_readpage_from_fscache(file_inode(file), page);
4520 read_data = kmap(page);
4521 /* for reads over a certain size could initiate async read ahead */
4523 rc = cifs_read(file, read_data, PAGE_SIZE, poffset);
4528 cifs_dbg(FYI, "Bytes read %d\n", rc);
4530 /* we do not want atime to be less than mtime, it broke some apps */
4531 file_inode(file)->i_atime = current_time(file_inode(file));
4532 if (timespec64_compare(&(file_inode(file)->i_atime), &(file_inode(file)->i_mtime)))
4533 file_inode(file)->i_atime = file_inode(file)->i_mtime;
4535 file_inode(file)->i_atime = current_time(file_inode(file));
4538 memset(read_data + rc, 0, PAGE_SIZE - rc);
4540 flush_dcache_page(page);
4541 SetPageUptodate(page);
4543 /* send this page to the cache */
4544 cifs_readpage_to_fscache(file_inode(file), page);
4556 static int cifs_readpage(struct file *file, struct page *page)
4558 loff_t offset = (loff_t)page->index << PAGE_SHIFT;
4564 if (file->private_data == NULL) {
4570 cifs_dbg(FYI, "readpage %p at offset %d 0x%x\n",
4571 page, (int)offset, (int)offset);
4573 rc = cifs_readpage_worker(file, page, &offset);
4579 static int is_inode_writable(struct cifsInodeInfo *cifs_inode)
4581 struct cifsFileInfo *open_file;
4583 spin_lock(&cifs_inode->open_file_lock);
4584 list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
4585 if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) {
4586 spin_unlock(&cifs_inode->open_file_lock);
4590 spin_unlock(&cifs_inode->open_file_lock);
4594 /* We do not want to update the file size from server for inodes
4595 open for write - to avoid races with writepage extending
4596 the file - in the future we could consider allowing
4597 refreshing the inode only on increases in the file size
4598 but this is tricky to do without racing with writebehind
4599 page caching in the current Linux kernel design */
4600 bool is_size_safe_to_change(struct cifsInodeInfo *cifsInode, __u64 end_of_file)
4605 if (is_inode_writable(cifsInode)) {
4606 /* This inode is open for write at least once */
4607 struct cifs_sb_info *cifs_sb;
4609 cifs_sb = CIFS_SB(cifsInode->vfs_inode.i_sb);
4610 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DIRECT_IO) {
4611 /* since no page cache to corrupt on directio
4612 we can change size safely */
4616 if (i_size_read(&cifsInode->vfs_inode) < end_of_file)
4624 static int cifs_write_begin(struct file *file, struct address_space *mapping,
4625 loff_t pos, unsigned len, unsigned flags,
4626 struct page **pagep, void **fsdata)
4629 pgoff_t index = pos >> PAGE_SHIFT;
4630 loff_t offset = pos & (PAGE_SIZE - 1);
4631 loff_t page_start = pos & PAGE_MASK;
4636 cifs_dbg(FYI, "write_begin from %lld len %d\n", (long long)pos, len);
4639 page = grab_cache_page_write_begin(mapping, index, flags);
4645 if (PageUptodate(page))
4649 * If we write a full page it will be up to date, no need to read from
4650 * the server. If the write is short, we'll end up doing a sync write
4653 if (len == PAGE_SIZE)
4657 * optimize away the read when we have an oplock, and we're not
4658 * expecting to use any of the data we'd be reading in. That
4659 * is, when the page lies beyond the EOF, or straddles the EOF
4660 * and the write will cover all of the existing data.
4662 if (CIFS_CACHE_READ(CIFS_I(mapping->host))) {
4663 i_size = i_size_read(mapping->host);
4664 if (page_start >= i_size ||
4665 (offset == 0 && (pos + len) >= i_size)) {
4666 zero_user_segments(page, 0, offset,
4670 * PageChecked means that the parts of the page
4671 * to which we're not writing are considered up
4672 * to date. Once the data is copied to the
4673 * page, it can be set uptodate.
4675 SetPageChecked(page);
4680 if ((file->f_flags & O_ACCMODE) != O_WRONLY && !oncethru) {
4682 * might as well read a page, it is fast enough. If we get
4683 * an error, we don't need to return it. cifs_write_end will
4684 * do a sync write instead since PG_uptodate isn't set.
4686 cifs_readpage_worker(file, page, &page_start);
4691 /* we could try using another file handle if there is one -
4692 but how would we lock it to prevent close of that handle
4693 racing with this read? In any case
4694 this will be written out by write_end so is fine */
4701 static int cifs_release_page(struct page *page, gfp_t gfp)
4703 if (PagePrivate(page))
4706 return cifs_fscache_release_page(page, gfp);
4709 static void cifs_invalidate_page(struct page *page, unsigned int offset,
4710 unsigned int length)
4712 struct cifsInodeInfo *cifsi = CIFS_I(page->mapping->host);
4714 if (offset == 0 && length == PAGE_SIZE)
4715 cifs_fscache_invalidate_page(page, &cifsi->vfs_inode);
4718 static int cifs_launder_page(struct page *page)
4721 loff_t range_start = page_offset(page);
4722 loff_t range_end = range_start + (loff_t)(PAGE_SIZE - 1);
4723 struct writeback_control wbc = {
4724 .sync_mode = WB_SYNC_ALL,
4726 .range_start = range_start,
4727 .range_end = range_end,
4730 cifs_dbg(FYI, "Launder page: %p\n", page);
4732 if (clear_page_dirty_for_io(page))
4733 rc = cifs_writepage_locked(page, &wbc);
4735 cifs_fscache_invalidate_page(page, page->mapping->host);
4739 void cifs_oplock_break(struct work_struct *work)
4741 struct cifsFileInfo *cfile = container_of(work, struct cifsFileInfo,
4743 struct inode *inode = d_inode(cfile->dentry);
4744 struct cifsInodeInfo *cinode = CIFS_I(inode);
4745 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
4746 struct TCP_Server_Info *server = tcon->ses->server;
4748 bool purge_cache = false;
4750 wait_on_bit(&cinode->flags, CIFS_INODE_PENDING_WRITERS,
4751 TASK_UNINTERRUPTIBLE);
4753 server->ops->downgrade_oplock(server, cinode, cfile->oplock_level,
4754 cfile->oplock_epoch, &purge_cache);
4756 if (!CIFS_CACHE_WRITE(cinode) && CIFS_CACHE_READ(cinode) &&
4757 cifs_has_mand_locks(cinode)) {
4758 cifs_dbg(FYI, "Reset oplock to None for inode=%p due to mand locks\n",
4763 if (inode && S_ISREG(inode->i_mode)) {
4764 if (CIFS_CACHE_READ(cinode))
4765 break_lease(inode, O_RDONLY);
4767 break_lease(inode, O_WRONLY);
4768 rc = filemap_fdatawrite(inode->i_mapping);
4769 if (!CIFS_CACHE_READ(cinode) || purge_cache) {
4770 rc = filemap_fdatawait(inode->i_mapping);
4771 mapping_set_error(inode->i_mapping, rc);
4772 cifs_zap_mapping(inode);
4774 cifs_dbg(FYI, "Oplock flush inode %p rc %d\n", inode, rc);
4775 if (CIFS_CACHE_WRITE(cinode))
4776 goto oplock_break_ack;
4779 rc = cifs_push_locks(cfile);
4781 cifs_dbg(VFS, "Push locks rc = %d\n", rc);
4785 * releasing stale oplock after recent reconnect of smb session using
4786 * a now incorrect file handle is not a data integrity issue but do
4787 * not bother sending an oplock release if session to server still is
4788 * disconnected since oplock already released by the server
4790 if (!cfile->oplock_break_cancelled) {
4791 rc = tcon->ses->server->ops->oplock_response(tcon, &cfile->fid,
4793 cifs_dbg(FYI, "Oplock release rc = %d\n", rc);
4795 _cifsFileInfo_put(cfile, false /* do not wait for ourself */, false);
4796 cifs_done_oplock_break(cinode);
4800 * The presence of cifs_direct_io() in the address space ops vector
4801 * allowes open() O_DIRECT flags which would have failed otherwise.
4803 * In the non-cached mode (mount with cache=none), we shunt off direct read and write requests
4804 * so this method should never be called.
4806 * Direct IO is not yet supported in the cached mode.
4809 cifs_direct_io(struct kiocb *iocb, struct iov_iter *iter)
4813 * Eventually need to support direct IO for non forcedirectio mounts
4818 static int cifs_swap_activate(struct swap_info_struct *sis,
4819 struct file *swap_file, sector_t *span)
4821 struct cifsFileInfo *cfile = swap_file->private_data;
4822 struct inode *inode = swap_file->f_mapping->host;
4823 unsigned long blocks;
4826 cifs_dbg(FYI, "swap activate\n");
4828 spin_lock(&inode->i_lock);
4829 blocks = inode->i_blocks;
4830 isize = inode->i_size;
4831 spin_unlock(&inode->i_lock);
4832 if (blocks*512 < isize) {
4833 pr_warn("swap activate: swapfile has holes\n");
4838 pr_warn_once("Swap support over SMB3 is experimental\n");
4841 * TODO: consider adding ACL (or documenting how) to prevent other
4842 * users (on this or other systems) from reading it
4846 /* TODO: add sk_set_memalloc(inet) or similar */
4849 cfile->swapfile = true;
4851 * TODO: Since file already open, we can't open with DENY_ALL here
4852 * but we could add call to grab a byte range lock to prevent others
4853 * from reading or writing the file
4859 static void cifs_swap_deactivate(struct file *file)
4861 struct cifsFileInfo *cfile = file->private_data;
4863 cifs_dbg(FYI, "swap deactivate\n");
4865 /* TODO: undo sk_set_memalloc(inet) will eventually be needed */
4868 cfile->swapfile = false;
4870 /* do we need to unpin (or unlock) the file */
4873 const struct address_space_operations cifs_addr_ops = {
4874 .readpage = cifs_readpage,
4875 .readpages = cifs_readpages,
4876 .writepage = cifs_writepage,
4877 .writepages = cifs_writepages,
4878 .write_begin = cifs_write_begin,
4879 .write_end = cifs_write_end,
4880 .set_page_dirty = __set_page_dirty_nobuffers,
4881 .releasepage = cifs_release_page,
4882 .direct_IO = cifs_direct_io,
4883 .invalidatepage = cifs_invalidate_page,
4884 .launder_page = cifs_launder_page,
4886 * TODO: investigate and if useful we could add an cifs_migratePage
4887 * helper (under an CONFIG_MIGRATION) in the future, and also
4888 * investigate and add an is_dirty_writeback helper if needed
4890 .swap_activate = cifs_swap_activate,
4891 .swap_deactivate = cifs_swap_deactivate,
4895 * cifs_readpages requires the server to support a buffer large enough to
4896 * contain the header plus one complete page of data. Otherwise, we need
4897 * to leave cifs_readpages out of the address space operations.
4899 const struct address_space_operations cifs_addr_ops_smallbuf = {
4900 .readpage = cifs_readpage,
4901 .writepage = cifs_writepage,
4902 .writepages = cifs_writepages,
4903 .write_begin = cifs_write_begin,
4904 .write_end = cifs_write_end,
4905 .set_page_dirty = __set_page_dirty_nobuffers,
4906 .releasepage = cifs_release_page,
4907 .invalidatepage = cifs_invalidate_page,
4908 .launder_page = cifs_launder_page,