4 * vfs operations that deal with files
6 * Copyright (C) International Business Machines Corp., 2002,2010
7 * Author(s): Steve French (sfrench@us.ibm.com)
8 * Jeremy Allison (jra@samba.org)
10 * This library is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU Lesser General Public License as published
12 * by the Free Software Foundation; either version 2.1 of the License, or
13 * (at your option) any later version.
15 * This library is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
18 * the GNU Lesser General Public License for more details.
20 * You should have received a copy of the GNU Lesser General Public License
21 * along with this library; if not, write to the Free Software
22 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
25 #include <linux/backing-dev.h>
26 #include <linux/stat.h>
27 #include <linux/fcntl.h>
28 #include <linux/pagemap.h>
29 #include <linux/pagevec.h>
30 #include <linux/writeback.h>
31 #include <linux/task_io_accounting_ops.h>
32 #include <linux/delay.h>
33 #include <linux/mount.h>
34 #include <linux/slab.h>
35 #include <linux/swap.h>
36 #include <asm/div64.h>
40 #include "cifsproto.h"
41 #include "cifs_unicode.h"
42 #include "cifs_debug.h"
43 #include "cifs_fs_sb.h"
46 static inline int cifs_convert_flags(unsigned int flags)
48 if ((flags & O_ACCMODE) == O_RDONLY)
50 else if ((flags & O_ACCMODE) == O_WRONLY)
52 else if ((flags & O_ACCMODE) == O_RDWR) {
53 /* GENERIC_ALL is too much permission to request
54 can cause unnecessary access denied on create */
55 /* return GENERIC_ALL; */
56 return (GENERIC_READ | GENERIC_WRITE);
59 return (READ_CONTROL | FILE_WRITE_ATTRIBUTES | FILE_READ_ATTRIBUTES |
60 FILE_WRITE_EA | FILE_APPEND_DATA | FILE_WRITE_DATA |
64 static u32 cifs_posix_convert_flags(unsigned int flags)
68 if ((flags & O_ACCMODE) == O_RDONLY)
69 posix_flags = SMB_O_RDONLY;
70 else if ((flags & O_ACCMODE) == O_WRONLY)
71 posix_flags = SMB_O_WRONLY;
72 else if ((flags & O_ACCMODE) == O_RDWR)
73 posix_flags = SMB_O_RDWR;
76 posix_flags |= SMB_O_CREAT;
78 posix_flags |= SMB_O_EXCL;
80 posix_flags |= SMB_O_TRUNC;
81 /* be safe and imply O_SYNC for O_DSYNC */
83 posix_flags |= SMB_O_SYNC;
84 if (flags & O_DIRECTORY)
85 posix_flags |= SMB_O_DIRECTORY;
86 if (flags & O_NOFOLLOW)
87 posix_flags |= SMB_O_NOFOLLOW;
89 posix_flags |= SMB_O_DIRECT;
94 static inline int cifs_get_disposition(unsigned int flags)
96 if ((flags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL))
98 else if ((flags & (O_CREAT | O_TRUNC)) == (O_CREAT | O_TRUNC))
99 return FILE_OVERWRITE_IF;
100 else if ((flags & O_CREAT) == O_CREAT)
102 else if ((flags & O_TRUNC) == O_TRUNC)
103 return FILE_OVERWRITE;
108 int cifs_posix_open(char *full_path, struct inode **pinode,
109 struct super_block *sb, int mode, unsigned int f_flags,
110 __u32 *poplock, __u16 *pnetfid, unsigned int xid)
113 FILE_UNIX_BASIC_INFO *presp_data;
114 __u32 posix_flags = 0;
115 struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
116 struct cifs_fattr fattr;
117 struct tcon_link *tlink;
118 struct cifs_tcon *tcon;
120 cFYI(1, "posix open %s", full_path);
122 presp_data = kzalloc(sizeof(FILE_UNIX_BASIC_INFO), GFP_KERNEL);
123 if (presp_data == NULL)
126 tlink = cifs_sb_tlink(cifs_sb);
132 tcon = tlink_tcon(tlink);
133 mode &= ~current_umask();
135 posix_flags = cifs_posix_convert_flags(f_flags);
136 rc = CIFSPOSIXCreate(xid, tcon, posix_flags, mode, pnetfid, presp_data,
137 poplock, full_path, cifs_sb->local_nls,
138 cifs_sb->mnt_cifs_flags &
139 CIFS_MOUNT_MAP_SPECIAL_CHR);
140 cifs_put_tlink(tlink);
145 if (presp_data->Type == cpu_to_le32(-1))
146 goto posix_open_ret; /* open ok, caller does qpathinfo */
149 goto posix_open_ret; /* caller does not need info */
151 cifs_unix_basic_to_fattr(&fattr, presp_data, cifs_sb);
153 /* get new inode and set it up */
154 if (*pinode == NULL) {
155 cifs_fill_uniqueid(sb, &fattr);
156 *pinode = cifs_iget(sb, &fattr);
162 cifs_fattr_to_inode(*pinode, &fattr);
171 cifs_nt_open(char *full_path, struct inode *inode, struct cifs_sb_info *cifs_sb,
172 struct cifs_tcon *tcon, unsigned int f_flags, __u32 *oplock,
173 struct cifs_fid *fid, unsigned int xid)
178 int create_options = CREATE_NOT_DIR;
181 if (!tcon->ses->server->ops->open)
184 desired_access = cifs_convert_flags(f_flags);
186 /*********************************************************************
187 * open flag mapping table:
189 * POSIX Flag CIFS Disposition
190 * ---------- ----------------
191 * O_CREAT FILE_OPEN_IF
192 * O_CREAT | O_EXCL FILE_CREATE
193 * O_CREAT | O_TRUNC FILE_OVERWRITE_IF
194 * O_TRUNC FILE_OVERWRITE
195 * none of the above FILE_OPEN
197 * Note that there is not a direct match between disposition
198 * FILE_SUPERSEDE (ie create whether or not file exists although
199 * O_CREAT | O_TRUNC is similar but truncates the existing
200 * file rather than creating a new file as FILE_SUPERSEDE does
201 * (which uses the attributes / metadata passed in on open call)
203 *? O_SYNC is a reasonable match to CIFS writethrough flag
204 *? and the read write flags match reasonably. O_LARGEFILE
205 *? is irrelevant because largefile support is always used
206 *? by this client. Flags O_APPEND, O_DIRECT, O_DIRECTORY,
207 * O_FASYNC, O_NOFOLLOW, O_NONBLOCK need further investigation
208 *********************************************************************/
210 disposition = cifs_get_disposition(f_flags);
212 /* BB pass O_SYNC flag through on file attributes .. BB */
214 buf = kmalloc(sizeof(FILE_ALL_INFO), GFP_KERNEL);
218 if (backup_cred(cifs_sb))
219 create_options |= CREATE_OPEN_BACKUP_INTENT;
221 rc = tcon->ses->server->ops->open(xid, tcon, full_path, disposition,
222 desired_access, create_options, fid,
223 oplock, buf, cifs_sb);
229 rc = cifs_get_inode_info_unix(&inode, full_path, inode->i_sb,
232 rc = cifs_get_inode_info(&inode, full_path, buf, inode->i_sb,
240 struct cifsFileInfo *
241 cifs_new_fileinfo(struct cifs_fid *fid, struct file *file,
242 struct tcon_link *tlink, __u32 oplock)
244 struct dentry *dentry = file->f_path.dentry;
245 struct inode *inode = dentry->d_inode;
246 struct cifsInodeInfo *cinode = CIFS_I(inode);
247 struct cifsFileInfo *cfile;
249 cfile = kzalloc(sizeof(struct cifsFileInfo), GFP_KERNEL);
254 cfile->pid = current->tgid;
255 cfile->uid = current_fsuid();
256 cfile->dentry = dget(dentry);
257 cfile->f_flags = file->f_flags;
258 cfile->invalidHandle = false;
259 cfile->tlink = cifs_get_tlink(tlink);
260 mutex_init(&cfile->fh_mutex);
261 INIT_WORK(&cfile->oplock_break, cifs_oplock_break);
262 INIT_LIST_HEAD(&cfile->llist);
263 tlink_tcon(tlink)->ses->server->ops->set_fid(cfile, fid, oplock);
265 spin_lock(&cifs_file_list_lock);
266 list_add(&cfile->tlist, &(tlink_tcon(tlink)->openFileList));
267 /* if readable file instance put first in list*/
268 if (file->f_mode & FMODE_READ)
269 list_add(&cfile->flist, &cinode->openFileList);
271 list_add_tail(&cfile->flist, &cinode->openFileList);
272 spin_unlock(&cifs_file_list_lock);
274 file->private_data = cfile;
278 static void cifs_del_lock_waiters(struct cifsLockInfo *lock);
280 struct cifsFileInfo *
281 cifsFileInfo_get(struct cifsFileInfo *cifs_file)
283 spin_lock(&cifs_file_list_lock);
284 cifsFileInfo_get_locked(cifs_file);
285 spin_unlock(&cifs_file_list_lock);
290 * Release a reference on the file private data. This may involve closing
291 * the filehandle out on the server. Must be called without holding
292 * cifs_file_list_lock.
294 void cifsFileInfo_put(struct cifsFileInfo *cifs_file)
296 struct inode *inode = cifs_file->dentry->d_inode;
297 struct cifs_tcon *tcon = tlink_tcon(cifs_file->tlink);
298 struct cifsInodeInfo *cifsi = CIFS_I(inode);
299 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
300 struct cifsLockInfo *li, *tmp;
302 spin_lock(&cifs_file_list_lock);
303 if (--cifs_file->count > 0) {
304 spin_unlock(&cifs_file_list_lock);
308 /* remove it from the lists */
309 list_del(&cifs_file->flist);
310 list_del(&cifs_file->tlist);
312 if (list_empty(&cifsi->openFileList)) {
313 cFYI(1, "closing last open instance for inode %p",
314 cifs_file->dentry->d_inode);
316 * In strict cache mode we need invalidate mapping on the last
317 * close because it may cause a error when we open this file
318 * again and get at least level II oplock.
320 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO)
321 CIFS_I(inode)->invalid_mapping = true;
322 cifs_set_oplock_level(cifsi, 0);
324 spin_unlock(&cifs_file_list_lock);
326 cancel_work_sync(&cifs_file->oplock_break);
328 if (!tcon->need_reconnect && !cifs_file->invalidHandle) {
329 struct TCP_Server_Info *server = tcon->ses->server;
334 if (server->ops->close)
335 rc = server->ops->close(xid, tcon, &cifs_file->fid);
339 /* Delete any outstanding lock records. We'll lose them when the file
342 mutex_lock(&cifsi->lock_mutex);
343 list_for_each_entry_safe(li, tmp, &cifs_file->llist, llist) {
344 list_del(&li->llist);
345 cifs_del_lock_waiters(li);
348 mutex_unlock(&cifsi->lock_mutex);
350 cifs_put_tlink(cifs_file->tlink);
351 dput(cifs_file->dentry);
355 int cifs_open(struct inode *inode, struct file *file)
360 struct cifs_sb_info *cifs_sb;
361 struct cifs_tcon *tcon;
362 struct tcon_link *tlink;
363 struct cifsFileInfo *cfile = NULL;
364 char *full_path = NULL;
365 bool posix_open_ok = false;
370 cifs_sb = CIFS_SB(inode->i_sb);
371 tlink = cifs_sb_tlink(cifs_sb);
374 return PTR_ERR(tlink);
376 tcon = tlink_tcon(tlink);
378 full_path = build_path_from_dentry(file->f_path.dentry);
379 if (full_path == NULL) {
384 cFYI(1, "inode = 0x%p file flags are 0x%x for %s",
385 inode, file->f_flags, full_path);
387 if (tcon->ses->server->oplocks)
392 if (!tcon->broken_posix_open && tcon->unix_ext &&
393 cap_unix(tcon->ses) && (CIFS_UNIX_POSIX_PATH_OPS_CAP &
394 le64_to_cpu(tcon->fsUnixInfo.Capability))) {
395 /* can not refresh inode info since size could be stale */
396 rc = cifs_posix_open(full_path, &inode, inode->i_sb,
397 cifs_sb->mnt_file_mode /* ignored */,
398 file->f_flags, &oplock, &fid.netfid, xid);
400 cFYI(1, "posix open succeeded");
401 posix_open_ok = true;
402 } else if ((rc == -EINVAL) || (rc == -EOPNOTSUPP)) {
403 if (tcon->ses->serverNOS)
404 cERROR(1, "server %s of type %s returned"
405 " unexpected error on SMB posix open"
406 ", disabling posix open support."
407 " Check if server update available.",
408 tcon->ses->serverName,
409 tcon->ses->serverNOS);
410 tcon->broken_posix_open = true;
411 } else if ((rc != -EIO) && (rc != -EREMOTE) &&
412 (rc != -EOPNOTSUPP)) /* path not found or net err */
415 * Else fallthrough to retry open the old way on network i/o
420 if (!posix_open_ok) {
421 rc = cifs_nt_open(full_path, inode, cifs_sb, tcon,
422 file->f_flags, &oplock, &fid, xid);
427 cfile = cifs_new_fileinfo(&fid, file, tlink, oplock);
429 if (tcon->ses->server->ops->close)
430 tcon->ses->server->ops->close(xid, tcon, &fid);
435 cifs_fscache_set_inode_cookie(inode, file);
437 if ((oplock & CIFS_CREATE_ACTION) && !posix_open_ok && tcon->unix_ext) {
439 * Time to set mode which we can not set earlier due to
440 * problems creating new read-only files.
442 struct cifs_unix_set_info_args args = {
443 .mode = inode->i_mode,
446 .ctime = NO_CHANGE_64,
447 .atime = NO_CHANGE_64,
448 .mtime = NO_CHANGE_64,
451 CIFSSMBUnixSetFileInfo(xid, tcon, &args, fid.netfid,
458 cifs_put_tlink(tlink);
462 /* Try to reacquire byte range locks that were released when session */
463 /* to server was lost */
464 static int cifs_relock_file(struct cifsFileInfo *cifsFile)
468 /* BB list all locks open on this file and relock */
473 static int cifs_reopen_file(struct cifsFileInfo *pCifsFile, bool can_flush)
478 struct cifs_sb_info *cifs_sb;
479 struct cifs_tcon *tcon;
480 struct cifsInodeInfo *pCifsInode;
482 char *full_path = NULL;
484 int disposition = FILE_OPEN;
485 int create_options = CREATE_NOT_DIR;
489 mutex_lock(&pCifsFile->fh_mutex);
490 if (!pCifsFile->invalidHandle) {
491 mutex_unlock(&pCifsFile->fh_mutex);
497 inode = pCifsFile->dentry->d_inode;
498 cifs_sb = CIFS_SB(inode->i_sb);
499 tcon = tlink_tcon(pCifsFile->tlink);
501 /* can not grab rename sem here because various ops, including
502 those that already have the rename sem can end up causing writepage
503 to get called and if the server was down that means we end up here,
504 and we can never tell if the caller already has the rename_sem */
505 full_path = build_path_from_dentry(pCifsFile->dentry);
506 if (full_path == NULL) {
508 mutex_unlock(&pCifsFile->fh_mutex);
513 cFYI(1, "inode = 0x%p file flags 0x%x for %s",
514 inode, pCifsFile->f_flags, full_path);
516 if (tcon->ses->server->oplocks)
521 if (tcon->unix_ext && cap_unix(tcon->ses) &&
522 (CIFS_UNIX_POSIX_PATH_OPS_CAP &
523 le64_to_cpu(tcon->fsUnixInfo.Capability))) {
525 * O_CREAT, O_EXCL and O_TRUNC already had their effect on the
526 * original open. Must mask them off for a reopen.
528 unsigned int oflags = pCifsFile->f_flags &
529 ~(O_CREAT | O_EXCL | O_TRUNC);
531 rc = cifs_posix_open(full_path, NULL, inode->i_sb,
532 cifs_sb->mnt_file_mode /* ignored */,
533 oflags, &oplock, &netfid, xid);
535 cFYI(1, "posix reopen succeeded");
538 /* fallthrough to retry open the old way on errors, especially
539 in the reconnect path it is important to retry hard */
542 desiredAccess = cifs_convert_flags(pCifsFile->f_flags);
544 if (backup_cred(cifs_sb))
545 create_options |= CREATE_OPEN_BACKUP_INTENT;
547 /* Can not refresh inode by passing in file_info buf to be returned
548 by SMBOpen and then calling get_inode_info with returned buf
549 since file might have write behind data that needs to be flushed
550 and server version of file size can be stale. If we knew for sure
551 that inode was not dirty locally we could do this */
553 rc = CIFSSMBOpen(xid, tcon, full_path, disposition, desiredAccess,
554 create_options, &netfid, &oplock, NULL,
555 cifs_sb->local_nls, cifs_sb->mnt_cifs_flags &
556 CIFS_MOUNT_MAP_SPECIAL_CHR);
558 mutex_unlock(&pCifsFile->fh_mutex);
559 cFYI(1, "cifs_open returned 0x%x", rc);
560 cFYI(1, "oplock: %d", oplock);
561 goto reopen_error_exit;
565 pCifsFile->fid.netfid = netfid;
566 pCifsFile->invalidHandle = false;
567 mutex_unlock(&pCifsFile->fh_mutex);
568 pCifsInode = CIFS_I(inode);
571 rc = filemap_write_and_wait(inode->i_mapping);
572 mapping_set_error(inode->i_mapping, rc);
575 rc = cifs_get_inode_info_unix(&inode,
576 full_path, inode->i_sb, xid);
578 rc = cifs_get_inode_info(&inode,
579 full_path, NULL, inode->i_sb,
581 } /* else we are writing out data to server already
582 and could deadlock if we tried to flush data, and
583 since we do not know if we have data that would
584 invalidate the current end of file on the server
585 we can not go to the server to get the new inod
588 cifs_set_oplock_level(pCifsInode, oplock);
590 cifs_relock_file(pCifsFile);
598 int cifs_close(struct inode *inode, struct file *file)
600 if (file->private_data != NULL) {
601 cifsFileInfo_put(file->private_data);
602 file->private_data = NULL;
605 /* return code from the ->release op is always ignored */
609 int cifs_closedir(struct inode *inode, struct file *file)
613 struct cifsFileInfo *cfile = file->private_data;
616 cFYI(1, "Closedir inode = 0x%p", inode);
621 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
623 cFYI(1, "Freeing private data in close dir");
624 spin_lock(&cifs_file_list_lock);
625 if (!cfile->srch_inf.endOfSearch && !cfile->invalidHandle) {
626 cfile->invalidHandle = true;
627 spin_unlock(&cifs_file_list_lock);
628 rc = CIFSFindClose(xid, tcon, cfile->fid.netfid);
629 cFYI(1, "Closing uncompleted readdir with rc %d", rc);
630 /* not much we can do if it fails anyway, ignore rc */
633 spin_unlock(&cifs_file_list_lock);
634 tmp = cfile->srch_inf.ntwrk_buf_start;
636 cFYI(1, "closedir free smb buf in srch struct");
637 cfile->srch_inf.ntwrk_buf_start = NULL;
638 if (cfile->srch_inf.smallBuf)
639 cifs_small_buf_release(tmp);
641 cifs_buf_release(tmp);
643 cifs_put_tlink(cfile->tlink);
644 kfree(file->private_data);
645 file->private_data = NULL;
647 /* BB can we lock the filestruct while this is going on? */
652 static struct cifsLockInfo *
653 cifs_lock_init(__u64 offset, __u64 length, __u8 type)
655 struct cifsLockInfo *lock =
656 kmalloc(sizeof(struct cifsLockInfo), GFP_KERNEL);
659 lock->offset = offset;
660 lock->length = length;
662 lock->pid = current->tgid;
663 INIT_LIST_HEAD(&lock->blist);
664 init_waitqueue_head(&lock->block_q);
669 cifs_del_lock_waiters(struct cifsLockInfo *lock)
671 struct cifsLockInfo *li, *tmp;
672 list_for_each_entry_safe(li, tmp, &lock->blist, blist) {
673 list_del_init(&li->blist);
674 wake_up(&li->block_q);
679 cifs_find_fid_lock_conflict(struct cifsFileInfo *cfile, __u64 offset,
680 __u64 length, __u8 type, struct cifsFileInfo *cur,
681 struct cifsLockInfo **conf_lock)
683 struct cifsLockInfo *li;
684 struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
686 list_for_each_entry(li, &cfile->llist, llist) {
687 if (offset + length <= li->offset ||
688 offset >= li->offset + li->length)
690 else if ((type & server->vals->shared_lock_type) &&
691 ((server->ops->compare_fids(cur, cfile) &&
692 current->tgid == li->pid) || type == li->type))
703 cifs_find_lock_conflict(struct cifsFileInfo *cfile, __u64 offset, __u64 length,
704 __u8 type, struct cifsLockInfo **conf_lock)
707 struct cifsFileInfo *fid, *tmp;
708 struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode);
710 spin_lock(&cifs_file_list_lock);
711 list_for_each_entry_safe(fid, tmp, &cinode->openFileList, flist) {
712 rc = cifs_find_fid_lock_conflict(fid, offset, length, type,
717 spin_unlock(&cifs_file_list_lock);
723 * Check if there is another lock that prevents us to set the lock (mandatory
724 * style). If such a lock exists, update the flock structure with its
725 * properties. Otherwise, set the flock type to F_UNLCK if we can cache brlocks
726 * or leave it the same if we can't. Returns 0 if we don't need to request to
727 * the server or 1 otherwise.
730 cifs_lock_test(struct cifsFileInfo *cfile, __u64 offset, __u64 length,
731 __u8 type, struct file_lock *flock)
734 struct cifsLockInfo *conf_lock;
735 struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode);
736 struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
739 mutex_lock(&cinode->lock_mutex);
741 exist = cifs_find_lock_conflict(cfile, offset, length, type,
744 flock->fl_start = conf_lock->offset;
745 flock->fl_end = conf_lock->offset + conf_lock->length - 1;
746 flock->fl_pid = conf_lock->pid;
747 if (conf_lock->type & server->vals->shared_lock_type)
748 flock->fl_type = F_RDLCK;
750 flock->fl_type = F_WRLCK;
751 } else if (!cinode->can_cache_brlcks)
754 flock->fl_type = F_UNLCK;
756 mutex_unlock(&cinode->lock_mutex);
761 cifs_lock_add(struct cifsFileInfo *cfile, struct cifsLockInfo *lock)
763 struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode);
764 mutex_lock(&cinode->lock_mutex);
765 list_add_tail(&lock->llist, &cfile->llist);
766 mutex_unlock(&cinode->lock_mutex);
770 * Set the byte-range lock (mandatory style). Returns:
771 * 1) 0, if we set the lock and don't need to request to the server;
772 * 2) 1, if no locks prevent us but we need to request to the server;
773 * 3) -EACCESS, if there is a lock that prevents us and wait is false.
776 cifs_lock_add_if(struct cifsFileInfo *cfile, struct cifsLockInfo *lock,
779 struct cifsLockInfo *conf_lock;
780 struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode);
786 mutex_lock(&cinode->lock_mutex);
788 exist = cifs_find_lock_conflict(cfile, lock->offset, lock->length,
789 lock->type, &conf_lock);
790 if (!exist && cinode->can_cache_brlcks) {
791 list_add_tail(&lock->llist, &cfile->llist);
792 mutex_unlock(&cinode->lock_mutex);
801 list_add_tail(&lock->blist, &conf_lock->blist);
802 mutex_unlock(&cinode->lock_mutex);
803 rc = wait_event_interruptible(lock->block_q,
804 (lock->blist.prev == &lock->blist) &&
805 (lock->blist.next == &lock->blist));
808 mutex_lock(&cinode->lock_mutex);
809 list_del_init(&lock->blist);
812 mutex_unlock(&cinode->lock_mutex);
817 * Check if there is another lock that prevents us to set the lock (posix
818 * style). If such a lock exists, update the flock structure with its
819 * properties. Otherwise, set the flock type to F_UNLCK if we can cache brlocks
820 * or leave it the same if we can't. Returns 0 if we don't need to request to
821 * the server or 1 otherwise.
824 cifs_posix_lock_test(struct file *file, struct file_lock *flock)
827 struct cifsInodeInfo *cinode = CIFS_I(file->f_path.dentry->d_inode);
828 unsigned char saved_type = flock->fl_type;
830 if ((flock->fl_flags & FL_POSIX) == 0)
833 mutex_lock(&cinode->lock_mutex);
834 posix_test_lock(file, flock);
836 if (flock->fl_type == F_UNLCK && !cinode->can_cache_brlcks) {
837 flock->fl_type = saved_type;
841 mutex_unlock(&cinode->lock_mutex);
846 * Set the byte-range lock (posix style). Returns:
847 * 1) 0, if we set the lock and don't need to request to the server;
848 * 2) 1, if we need to request to the server;
849 * 3) <0, if the error occurs while setting the lock.
852 cifs_posix_lock_set(struct file *file, struct file_lock *flock)
854 struct cifsInodeInfo *cinode = CIFS_I(file->f_path.dentry->d_inode);
857 if ((flock->fl_flags & FL_POSIX) == 0)
861 mutex_lock(&cinode->lock_mutex);
862 if (!cinode->can_cache_brlcks) {
863 mutex_unlock(&cinode->lock_mutex);
867 rc = posix_lock_file(file, flock, NULL);
868 mutex_unlock(&cinode->lock_mutex);
869 if (rc == FILE_LOCK_DEFERRED) {
870 rc = wait_event_interruptible(flock->fl_wait, !flock->fl_next);
873 locks_delete_block(flock);
879 cifs_push_mandatory_locks(struct cifsFileInfo *cfile)
882 int rc = 0, stored_rc;
883 struct cifsLockInfo *li, *tmp;
884 struct cifs_tcon *tcon;
885 struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode);
886 unsigned int num, max_num, max_buf;
887 LOCKING_ANDX_RANGE *buf, *cur;
888 int types[] = {LOCKING_ANDX_LARGE_FILES,
889 LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES};
893 tcon = tlink_tcon(cfile->tlink);
895 mutex_lock(&cinode->lock_mutex);
896 if (!cinode->can_cache_brlcks) {
897 mutex_unlock(&cinode->lock_mutex);
903 * Accessing maxBuf is racy with cifs_reconnect - need to store value
904 * and check it for zero before using.
906 max_buf = tcon->ses->server->maxBuf;
908 mutex_unlock(&cinode->lock_mutex);
913 max_num = (max_buf - sizeof(struct smb_hdr)) /
914 sizeof(LOCKING_ANDX_RANGE);
915 buf = kzalloc(max_num * sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
917 mutex_unlock(&cinode->lock_mutex);
922 for (i = 0; i < 2; i++) {
925 list_for_each_entry_safe(li, tmp, &cfile->llist, llist) {
926 if (li->type != types[i])
928 cur->Pid = cpu_to_le16(li->pid);
929 cur->LengthLow = cpu_to_le32((u32)li->length);
930 cur->LengthHigh = cpu_to_le32((u32)(li->length>>32));
931 cur->OffsetLow = cpu_to_le32((u32)li->offset);
932 cur->OffsetHigh = cpu_to_le32((u32)(li->offset>>32));
933 if (++num == max_num) {
934 stored_rc = cifs_lockv(xid, tcon,
936 (__u8)li->type, 0, num,
947 stored_rc = cifs_lockv(xid, tcon, cfile->fid.netfid,
948 (__u8)types[i], 0, num, buf);
954 cinode->can_cache_brlcks = false;
955 mutex_unlock(&cinode->lock_mutex);
962 /* copied from fs/locks.c with a name change */
963 #define cifs_for_each_lock(inode, lockp) \
964 for (lockp = &inode->i_flock; *lockp != NULL; \
965 lockp = &(*lockp)->fl_next)
967 struct lock_to_push {
968 struct list_head llist;
977 cifs_push_posix_locks(struct cifsFileInfo *cfile)
979 struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode);
980 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
981 struct file_lock *flock, **before;
982 unsigned int count = 0, i = 0;
983 int rc = 0, xid, type;
984 struct list_head locks_to_send, *el;
985 struct lock_to_push *lck, *tmp;
990 mutex_lock(&cinode->lock_mutex);
991 if (!cinode->can_cache_brlcks) {
992 mutex_unlock(&cinode->lock_mutex);
998 cifs_for_each_lock(cfile->dentry->d_inode, before) {
999 if ((*before)->fl_flags & FL_POSIX)
1004 INIT_LIST_HEAD(&locks_to_send);
1007 * Allocating count locks is enough because no FL_POSIX locks can be
1008 * added to the list while we are holding cinode->lock_mutex that
1009 * protects locking operations of this inode.
1011 for (; i < count; i++) {
1012 lck = kmalloc(sizeof(struct lock_to_push), GFP_KERNEL);
1017 list_add_tail(&lck->llist, &locks_to_send);
1020 el = locks_to_send.next;
1022 cifs_for_each_lock(cfile->dentry->d_inode, before) {
1024 if ((flock->fl_flags & FL_POSIX) == 0)
1026 if (el == &locks_to_send) {
1028 * The list ended. We don't have enough allocated
1029 * structures - something is really wrong.
1031 cERROR(1, "Can't push all brlocks!");
1034 length = 1 + flock->fl_end - flock->fl_start;
1035 if (flock->fl_type == F_RDLCK || flock->fl_type == F_SHLCK)
1039 lck = list_entry(el, struct lock_to_push, llist);
1040 lck->pid = flock->fl_pid;
1041 lck->netfid = cfile->fid.netfid;
1042 lck->length = length;
1044 lck->offset = flock->fl_start;
1049 list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) {
1052 stored_rc = CIFSSMBPosixLock(xid, tcon, lck->netfid, lck->pid,
1053 lck->offset, lck->length, NULL,
1057 list_del(&lck->llist);
1062 cinode->can_cache_brlcks = false;
1063 mutex_unlock(&cinode->lock_mutex);
1068 list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) {
1069 list_del(&lck->llist);
1076 cifs_push_locks(struct cifsFileInfo *cfile)
1078 struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
1079 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1081 if (cap_unix(tcon->ses) &&
1082 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
1083 ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
1084 return cifs_push_posix_locks(cfile);
1086 return cifs_push_mandatory_locks(cfile);
1090 cifs_read_flock(struct file_lock *flock, __u32 *type, int *lock, int *unlock,
1091 bool *wait_flag, struct TCP_Server_Info *server)
1093 if (flock->fl_flags & FL_POSIX)
1095 if (flock->fl_flags & FL_FLOCK)
1097 if (flock->fl_flags & FL_SLEEP) {
1098 cFYI(1, "Blocking lock");
1101 if (flock->fl_flags & FL_ACCESS)
1102 cFYI(1, "Process suspended by mandatory locking - "
1103 "not implemented yet");
1104 if (flock->fl_flags & FL_LEASE)
1105 cFYI(1, "Lease on file - not implemented yet");
1106 if (flock->fl_flags &
1107 (~(FL_POSIX | FL_FLOCK | FL_SLEEP | FL_ACCESS | FL_LEASE)))
1108 cFYI(1, "Unknown lock flags 0x%x", flock->fl_flags);
1110 *type = server->vals->large_lock_type;
1111 if (flock->fl_type == F_WRLCK) {
1112 cFYI(1, "F_WRLCK ");
1113 *type |= server->vals->exclusive_lock_type;
1115 } else if (flock->fl_type == F_UNLCK) {
1117 *type |= server->vals->unlock_lock_type;
1119 /* Check if unlock includes more than one lock range */
1120 } else if (flock->fl_type == F_RDLCK) {
1122 *type |= server->vals->shared_lock_type;
1124 } else if (flock->fl_type == F_EXLCK) {
1126 *type |= server->vals->exclusive_lock_type;
1128 } else if (flock->fl_type == F_SHLCK) {
1130 *type |= server->vals->shared_lock_type;
1133 cFYI(1, "Unknown type of lock");
1137 cifs_mandatory_lock(unsigned int xid, struct cifsFileInfo *cfile, __u64 offset,
1138 __u64 length, __u32 type, int lock, int unlock, bool wait)
1140 return CIFSSMBLock(xid, tlink_tcon(cfile->tlink), cfile->fid.netfid,
1141 current->tgid, length, offset, unlock, lock,
1142 (__u8)type, wait, 0);
1146 cifs_getlk(struct file *file, struct file_lock *flock, __u32 type,
1147 bool wait_flag, bool posix_lck, unsigned int xid)
1150 __u64 length = 1 + flock->fl_end - flock->fl_start;
1151 struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
1152 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1153 struct TCP_Server_Info *server = tcon->ses->server;
1154 __u16 netfid = cfile->fid.netfid;
1157 int posix_lock_type;
1159 rc = cifs_posix_lock_test(file, flock);
1163 if (type & server->vals->shared_lock_type)
1164 posix_lock_type = CIFS_RDLCK;
1166 posix_lock_type = CIFS_WRLCK;
1167 rc = CIFSSMBPosixLock(xid, tcon, netfid, current->tgid,
1168 flock->fl_start, length, flock,
1169 posix_lock_type, wait_flag);
1173 rc = cifs_lock_test(cfile, flock->fl_start, length, type, flock);
1177 /* BB we could chain these into one lock request BB */
1178 rc = cifs_mandatory_lock(xid, cfile, flock->fl_start, length, type,
1181 rc = cifs_mandatory_lock(xid, cfile, flock->fl_start, length,
1183 flock->fl_type = F_UNLCK;
1185 cERROR(1, "Error unlocking previously locked "
1186 "range %d during test of lock", rc);
1190 if (type & server->vals->shared_lock_type) {
1191 flock->fl_type = F_WRLCK;
1195 rc = cifs_mandatory_lock(xid, cfile, flock->fl_start, length,
1196 type | server->vals->shared_lock_type, 1, 0,
1199 rc = cifs_mandatory_lock(xid, cfile, flock->fl_start, length,
1200 type | server->vals->shared_lock_type,
1202 flock->fl_type = F_RDLCK;
1204 cERROR(1, "Error unlocking previously locked "
1205 "range %d during test of lock", rc);
1207 flock->fl_type = F_WRLCK;
1213 cifs_move_llist(struct list_head *source, struct list_head *dest)
1215 struct list_head *li, *tmp;
1216 list_for_each_safe(li, tmp, source)
1217 list_move(li, dest);
1221 cifs_free_llist(struct list_head *llist)
1223 struct cifsLockInfo *li, *tmp;
1224 list_for_each_entry_safe(li, tmp, llist, llist) {
1225 cifs_del_lock_waiters(li);
1226 list_del(&li->llist);
1232 cifs_unlock_range(struct cifsFileInfo *cfile, struct file_lock *flock,
1235 int rc = 0, stored_rc;
1236 int types[] = {LOCKING_ANDX_LARGE_FILES,
1237 LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES};
1239 unsigned int max_num, num, max_buf;
1240 LOCKING_ANDX_RANGE *buf, *cur;
1241 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1242 struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode);
1243 struct cifsLockInfo *li, *tmp;
1244 __u64 length = 1 + flock->fl_end - flock->fl_start;
1245 struct list_head tmp_llist;
1247 INIT_LIST_HEAD(&tmp_llist);
1250 * Accessing maxBuf is racy with cifs_reconnect - need to store value
1251 * and check it for zero before using.
1253 max_buf = tcon->ses->server->maxBuf;
1257 max_num = (max_buf - sizeof(struct smb_hdr)) /
1258 sizeof(LOCKING_ANDX_RANGE);
1259 buf = kzalloc(max_num * sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
1263 mutex_lock(&cinode->lock_mutex);
1264 for (i = 0; i < 2; i++) {
1267 list_for_each_entry_safe(li, tmp, &cfile->llist, llist) {
1268 if (flock->fl_start > li->offset ||
1269 (flock->fl_start + length) <
1270 (li->offset + li->length))
1272 if (current->tgid != li->pid)
1274 if (types[i] != li->type)
1276 if (cinode->can_cache_brlcks) {
1278 * We can cache brlock requests - simply remove
1279 * a lock from the file's list.
1281 list_del(&li->llist);
1282 cifs_del_lock_waiters(li);
1286 cur->Pid = cpu_to_le16(li->pid);
1287 cur->LengthLow = cpu_to_le32((u32)li->length);
1288 cur->LengthHigh = cpu_to_le32((u32)(li->length>>32));
1289 cur->OffsetLow = cpu_to_le32((u32)li->offset);
1290 cur->OffsetHigh = cpu_to_le32((u32)(li->offset>>32));
1292 * We need to save a lock here to let us add it again to
1293 * the file's list if the unlock range request fails on
1296 list_move(&li->llist, &tmp_llist);
1297 if (++num == max_num) {
1298 stored_rc = cifs_lockv(xid, tcon,
1300 li->type, num, 0, buf);
1303 * We failed on the unlock range
1304 * request - add all locks from the tmp
1305 * list to the head of the file's list.
1307 cifs_move_llist(&tmp_llist,
1312 * The unlock range request succeed -
1313 * free the tmp list.
1315 cifs_free_llist(&tmp_llist);
1322 stored_rc = cifs_lockv(xid, tcon, cfile->fid.netfid,
1323 types[i], num, 0, buf);
1325 cifs_move_llist(&tmp_llist, &cfile->llist);
1328 cifs_free_llist(&tmp_llist);
1332 mutex_unlock(&cinode->lock_mutex);
1338 cifs_setlk(struct file *file, struct file_lock *flock, __u32 type,
1339 bool wait_flag, bool posix_lck, int lock, int unlock,
1343 __u64 length = 1 + flock->fl_end - flock->fl_start;
1344 struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
1345 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1346 struct TCP_Server_Info *server = tcon->ses->server;
1347 __u16 netfid = cfile->fid.netfid;
1350 int posix_lock_type;
1352 rc = cifs_posix_lock_set(file, flock);
1356 if (type & server->vals->shared_lock_type)
1357 posix_lock_type = CIFS_RDLCK;
1359 posix_lock_type = CIFS_WRLCK;
1362 posix_lock_type = CIFS_UNLCK;
1364 rc = CIFSSMBPosixLock(xid, tcon, netfid, current->tgid,
1365 flock->fl_start, length, NULL,
1366 posix_lock_type, wait_flag);
1371 struct cifsLockInfo *lock;
1373 lock = cifs_lock_init(flock->fl_start, length, type);
1377 rc = cifs_lock_add_if(cfile, lock, wait_flag);
1383 rc = cifs_mandatory_lock(xid, cfile, flock->fl_start, length,
1384 type, 1, 0, wait_flag);
1390 cifs_lock_add(cfile, lock);
1392 rc = cifs_unlock_range(cfile, flock, xid);
1395 if (flock->fl_flags & FL_POSIX)
1396 posix_lock_file_wait(file, flock);
1400 int cifs_lock(struct file *file, int cmd, struct file_lock *flock)
1403 int lock = 0, unlock = 0;
1404 bool wait_flag = false;
1405 bool posix_lck = false;
1406 struct cifs_sb_info *cifs_sb;
1407 struct cifs_tcon *tcon;
1408 struct cifsInodeInfo *cinode;
1409 struct cifsFileInfo *cfile;
1416 cFYI(1, "Lock parm: 0x%x flockflags: 0x%x flocktype: 0x%x start: %lld "
1417 "end: %lld", cmd, flock->fl_flags, flock->fl_type,
1418 flock->fl_start, flock->fl_end);
1420 cfile = (struct cifsFileInfo *)file->private_data;
1421 tcon = tlink_tcon(cfile->tlink);
1423 cifs_read_flock(flock, &type, &lock, &unlock, &wait_flag,
1426 cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
1427 netfid = cfile->fid.netfid;
1428 cinode = CIFS_I(file->f_path.dentry->d_inode);
1430 if (cap_unix(tcon->ses) &&
1431 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
1432 ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
1435 * BB add code here to normalize offset and length to account for
1436 * negative length which we can not accept over the wire.
1438 if (IS_GETLK(cmd)) {
1439 rc = cifs_getlk(file, flock, type, wait_flag, posix_lck, xid);
1444 if (!lock && !unlock) {
1446 * if no lock or unlock then nothing to do since we do not
1453 rc = cifs_setlk(file, flock, type, wait_flag, posix_lck, lock, unlock,
1460 * update the file size (if needed) after a write. Should be called with
1461 * the inode->i_lock held
1464 cifs_update_eof(struct cifsInodeInfo *cifsi, loff_t offset,
1465 unsigned int bytes_written)
1467 loff_t end_of_write = offset + bytes_written;
1469 if (end_of_write > cifsi->server_eof)
1470 cifsi->server_eof = end_of_write;
1473 static ssize_t cifs_write(struct cifsFileInfo *open_file, __u32 pid,
1474 const char *write_data, size_t write_size,
1478 unsigned int bytes_written = 0;
1479 unsigned int total_written;
1480 struct cifs_sb_info *cifs_sb;
1481 struct cifs_tcon *pTcon;
1483 struct dentry *dentry = open_file->dentry;
1484 struct cifsInodeInfo *cifsi = CIFS_I(dentry->d_inode);
1485 struct cifs_io_parms io_parms;
1487 cifs_sb = CIFS_SB(dentry->d_sb);
1489 cFYI(1, "write %zd bytes to offset %lld of %s", write_size,
1490 *poffset, dentry->d_name.name);
1492 pTcon = tlink_tcon(open_file->tlink);
1496 for (total_written = 0; write_size > total_written;
1497 total_written += bytes_written) {
1499 while (rc == -EAGAIN) {
1503 if (open_file->invalidHandle) {
1504 /* we could deadlock if we called
1505 filemap_fdatawait from here so tell
1506 reopen_file not to flush data to
1508 rc = cifs_reopen_file(open_file, false);
1513 len = min((size_t)cifs_sb->wsize,
1514 write_size - total_written);
1515 /* iov[0] is reserved for smb header */
1516 iov[1].iov_base = (char *)write_data + total_written;
1517 iov[1].iov_len = len;
1518 io_parms.netfid = open_file->fid.netfid;
1520 io_parms.tcon = pTcon;
1521 io_parms.offset = *poffset;
1522 io_parms.length = len;
1523 rc = CIFSSMBWrite2(xid, &io_parms, &bytes_written, iov,
1526 if (rc || (bytes_written == 0)) {
1534 spin_lock(&dentry->d_inode->i_lock);
1535 cifs_update_eof(cifsi, *poffset, bytes_written);
1536 spin_unlock(&dentry->d_inode->i_lock);
1537 *poffset += bytes_written;
1541 cifs_stats_bytes_written(pTcon, total_written);
1543 if (total_written > 0) {
1544 spin_lock(&dentry->d_inode->i_lock);
1545 if (*poffset > dentry->d_inode->i_size)
1546 i_size_write(dentry->d_inode, *poffset);
1547 spin_unlock(&dentry->d_inode->i_lock);
1549 mark_inode_dirty_sync(dentry->d_inode);
1551 return total_written;
1554 struct cifsFileInfo *find_readable_file(struct cifsInodeInfo *cifs_inode,
1557 struct cifsFileInfo *open_file = NULL;
1558 struct cifs_sb_info *cifs_sb = CIFS_SB(cifs_inode->vfs_inode.i_sb);
1560 /* only filter by fsuid on multiuser mounts */
1561 if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
1564 spin_lock(&cifs_file_list_lock);
1565 /* we could simply get the first_list_entry since write-only entries
1566 are always at the end of the list but since the first entry might
1567 have a close pending, we go through the whole list */
1568 list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
1569 if (fsuid_only && open_file->uid != current_fsuid())
1571 if (OPEN_FMODE(open_file->f_flags) & FMODE_READ) {
1572 if (!open_file->invalidHandle) {
1573 /* found a good file */
1574 /* lock it so it will not be closed on us */
1575 cifsFileInfo_get_locked(open_file);
1576 spin_unlock(&cifs_file_list_lock);
1578 } /* else might as well continue, and look for
1579 another, or simply have the caller reopen it
1580 again rather than trying to fix this handle */
1581 } else /* write only file */
1582 break; /* write only files are last so must be done */
1584 spin_unlock(&cifs_file_list_lock);
1588 struct cifsFileInfo *find_writable_file(struct cifsInodeInfo *cifs_inode,
1591 struct cifsFileInfo *open_file, *inv_file = NULL;
1592 struct cifs_sb_info *cifs_sb;
1593 bool any_available = false;
1595 unsigned int refind = 0;
1597 /* Having a null inode here (because mapping->host was set to zero by
1598 the VFS or MM) should not happen but we had reports of on oops (due to
1599 it being zero) during stress testcases so we need to check for it */
1601 if (cifs_inode == NULL) {
1602 cERROR(1, "Null inode passed to cifs_writeable_file");
1607 cifs_sb = CIFS_SB(cifs_inode->vfs_inode.i_sb);
1609 /* only filter by fsuid on multiuser mounts */
1610 if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
1613 spin_lock(&cifs_file_list_lock);
1615 if (refind > MAX_REOPEN_ATT) {
1616 spin_unlock(&cifs_file_list_lock);
1619 list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
1620 if (!any_available && open_file->pid != current->tgid)
1622 if (fsuid_only && open_file->uid != current_fsuid())
1624 if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) {
1625 if (!open_file->invalidHandle) {
1626 /* found a good writable file */
1627 cifsFileInfo_get_locked(open_file);
1628 spin_unlock(&cifs_file_list_lock);
1632 inv_file = open_file;
1636 /* couldn't find useable FH with same pid, try any available */
1637 if (!any_available) {
1638 any_available = true;
1639 goto refind_writable;
1643 any_available = false;
1644 cifsFileInfo_get_locked(inv_file);
1647 spin_unlock(&cifs_file_list_lock);
1650 rc = cifs_reopen_file(inv_file, false);
1654 spin_lock(&cifs_file_list_lock);
1655 list_move_tail(&inv_file->flist,
1656 &cifs_inode->openFileList);
1657 spin_unlock(&cifs_file_list_lock);
1658 cifsFileInfo_put(inv_file);
1659 spin_lock(&cifs_file_list_lock);
1661 goto refind_writable;
1668 static int cifs_partialpagewrite(struct page *page, unsigned from, unsigned to)
1670 struct address_space *mapping = page->mapping;
1671 loff_t offset = (loff_t)page->index << PAGE_CACHE_SHIFT;
1674 int bytes_written = 0;
1675 struct inode *inode;
1676 struct cifsFileInfo *open_file;
1678 if (!mapping || !mapping->host)
1681 inode = page->mapping->host;
1683 offset += (loff_t)from;
1684 write_data = kmap(page);
1687 if ((to > PAGE_CACHE_SIZE) || (from > to)) {
1692 /* racing with truncate? */
1693 if (offset > mapping->host->i_size) {
1695 return 0; /* don't care */
1698 /* check to make sure that we are not extending the file */
1699 if (mapping->host->i_size - offset < (loff_t)to)
1700 to = (unsigned)(mapping->host->i_size - offset);
1702 open_file = find_writable_file(CIFS_I(mapping->host), false);
1704 bytes_written = cifs_write(open_file, open_file->pid,
1705 write_data, to - from, &offset);
1706 cifsFileInfo_put(open_file);
1707 /* Does mm or vfs already set times? */
1708 inode->i_atime = inode->i_mtime = current_fs_time(inode->i_sb);
1709 if ((bytes_written > 0) && (offset))
1711 else if (bytes_written < 0)
1714 cFYI(1, "No writeable filehandles for inode");
1723 * Marshal up the iov array, reserving the first one for the header. Also,
1727 cifs_writepages_marshal_iov(struct kvec *iov, struct cifs_writedata *wdata)
1730 struct inode *inode = wdata->cfile->dentry->d_inode;
1731 loff_t size = i_size_read(inode);
1733 /* marshal up the pages into iov array */
1735 for (i = 0; i < wdata->nr_pages; i++) {
1736 iov[i + 1].iov_len = min(size - page_offset(wdata->pages[i]),
1737 (loff_t)PAGE_CACHE_SIZE);
1738 iov[i + 1].iov_base = kmap(wdata->pages[i]);
1739 wdata->bytes += iov[i + 1].iov_len;
1743 static int cifs_writepages(struct address_space *mapping,
1744 struct writeback_control *wbc)
1746 struct cifs_sb_info *cifs_sb = CIFS_SB(mapping->host->i_sb);
1747 bool done = false, scanned = false, range_whole = false;
1749 struct cifs_writedata *wdata;
1754 * If wsize is smaller than the page cache size, default to writing
1755 * one page at a time via cifs_writepage
1757 if (cifs_sb->wsize < PAGE_CACHE_SIZE)
1758 return generic_writepages(mapping, wbc);
1760 if (wbc->range_cyclic) {
1761 index = mapping->writeback_index; /* Start from prev offset */
1764 index = wbc->range_start >> PAGE_CACHE_SHIFT;
1765 end = wbc->range_end >> PAGE_CACHE_SHIFT;
1766 if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
1771 while (!done && index <= end) {
1772 unsigned int i, nr_pages, found_pages;
1773 pgoff_t next = 0, tofind;
1774 struct page **pages;
1776 tofind = min((cifs_sb->wsize / PAGE_CACHE_SIZE) - 1,
1779 wdata = cifs_writedata_alloc((unsigned int)tofind,
1780 cifs_writev_complete);
1787 * find_get_pages_tag seems to return a max of 256 on each
1788 * iteration, so we must call it several times in order to
1789 * fill the array or the wsize is effectively limited to
1790 * 256 * PAGE_CACHE_SIZE.
1793 pages = wdata->pages;
1795 nr_pages = find_get_pages_tag(mapping, &index,
1796 PAGECACHE_TAG_DIRTY,
1798 found_pages += nr_pages;
1801 } while (nr_pages && tofind && index <= end);
1803 if (found_pages == 0) {
1804 kref_put(&wdata->refcount, cifs_writedata_release);
1809 for (i = 0; i < found_pages; i++) {
1810 page = wdata->pages[i];
1812 * At this point we hold neither mapping->tree_lock nor
1813 * lock on the page itself: the page may be truncated or
1814 * invalidated (changing page->mapping to NULL), or even
1815 * swizzled back from swapper_space to tmpfs file
1821 else if (!trylock_page(page))
1824 if (unlikely(page->mapping != mapping)) {
1829 if (!wbc->range_cyclic && page->index > end) {
1835 if (next && (page->index != next)) {
1836 /* Not next consecutive page */
1841 if (wbc->sync_mode != WB_SYNC_NONE)
1842 wait_on_page_writeback(page);
1844 if (PageWriteback(page) ||
1845 !clear_page_dirty_for_io(page)) {
1851 * This actually clears the dirty bit in the radix tree.
1852 * See cifs_writepage() for more commentary.
1854 set_page_writeback(page);
1856 if (page_offset(page) >= mapping->host->i_size) {
1859 end_page_writeback(page);
1863 wdata->pages[i] = page;
1864 next = page->index + 1;
1868 /* reset index to refind any pages skipped */
1870 index = wdata->pages[0]->index + 1;
1872 /* put any pages we aren't going to use */
1873 for (i = nr_pages; i < found_pages; i++) {
1874 page_cache_release(wdata->pages[i]);
1875 wdata->pages[i] = NULL;
1878 /* nothing to write? */
1879 if (nr_pages == 0) {
1880 kref_put(&wdata->refcount, cifs_writedata_release);
1884 wdata->sync_mode = wbc->sync_mode;
1885 wdata->nr_pages = nr_pages;
1886 wdata->offset = page_offset(wdata->pages[0]);
1887 wdata->marshal_iov = cifs_writepages_marshal_iov;
1890 if (wdata->cfile != NULL)
1891 cifsFileInfo_put(wdata->cfile);
1892 wdata->cfile = find_writable_file(CIFS_I(mapping->host),
1894 if (!wdata->cfile) {
1895 cERROR(1, "No writable handles for inode");
1899 wdata->pid = wdata->cfile->pid;
1900 rc = cifs_async_writev(wdata);
1901 } while (wbc->sync_mode == WB_SYNC_ALL && rc == -EAGAIN);
1903 for (i = 0; i < nr_pages; ++i)
1904 unlock_page(wdata->pages[i]);
1906 /* send failure -- clean up the mess */
1908 for (i = 0; i < nr_pages; ++i) {
1910 redirty_page_for_writepage(wbc,
1913 SetPageError(wdata->pages[i]);
1914 end_page_writeback(wdata->pages[i]);
1915 page_cache_release(wdata->pages[i]);
1918 mapping_set_error(mapping, rc);
1920 kref_put(&wdata->refcount, cifs_writedata_release);
1922 wbc->nr_to_write -= nr_pages;
1923 if (wbc->nr_to_write <= 0)
1929 if (!scanned && !done) {
1931 * We hit the last page and there is more work to be done: wrap
1932 * back to the start of the file
1939 if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0))
1940 mapping->writeback_index = index;
1946 cifs_writepage_locked(struct page *page, struct writeback_control *wbc)
1952 /* BB add check for wbc flags */
1953 page_cache_get(page);
1954 if (!PageUptodate(page))
1955 cFYI(1, "ppw - page not up to date");
1958 * Set the "writeback" flag, and clear "dirty" in the radix tree.
1960 * A writepage() implementation always needs to do either this,
1961 * or re-dirty the page with "redirty_page_for_writepage()" in
1962 * the case of a failure.
1964 * Just unlocking the page will cause the radix tree tag-bits
1965 * to fail to update with the state of the page correctly.
1967 set_page_writeback(page);
1969 rc = cifs_partialpagewrite(page, 0, PAGE_CACHE_SIZE);
1970 if (rc == -EAGAIN && wbc->sync_mode == WB_SYNC_ALL)
1972 else if (rc == -EAGAIN)
1973 redirty_page_for_writepage(wbc, page);
1977 SetPageUptodate(page);
1978 end_page_writeback(page);
1979 page_cache_release(page);
1984 static int cifs_writepage(struct page *page, struct writeback_control *wbc)
1986 int rc = cifs_writepage_locked(page, wbc);
1991 static int cifs_write_end(struct file *file, struct address_space *mapping,
1992 loff_t pos, unsigned len, unsigned copied,
1993 struct page *page, void *fsdata)
1996 struct inode *inode = mapping->host;
1997 struct cifsFileInfo *cfile = file->private_data;
1998 struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
2001 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
2004 pid = current->tgid;
2006 cFYI(1, "write_end for page %p from pos %lld with %d bytes",
2009 if (PageChecked(page)) {
2011 SetPageUptodate(page);
2012 ClearPageChecked(page);
2013 } else if (!PageUptodate(page) && copied == PAGE_CACHE_SIZE)
2014 SetPageUptodate(page);
2016 if (!PageUptodate(page)) {
2018 unsigned offset = pos & (PAGE_CACHE_SIZE - 1);
2022 /* this is probably better than directly calling
2023 partialpage_write since in this function the file handle is
2024 known which we might as well leverage */
2025 /* BB check if anything else missing out of ppw
2026 such as updating last write time */
2027 page_data = kmap(page);
2028 rc = cifs_write(cfile, pid, page_data + offset, copied, &pos);
2029 /* if (rc < 0) should we set writebehind rc? */
2036 set_page_dirty(page);
2040 spin_lock(&inode->i_lock);
2041 if (pos > inode->i_size)
2042 i_size_write(inode, pos);
2043 spin_unlock(&inode->i_lock);
2047 page_cache_release(page);
2052 int cifs_strict_fsync(struct file *file, loff_t start, loff_t end,
2057 struct cifs_tcon *tcon;
2058 struct cifsFileInfo *smbfile = file->private_data;
2059 struct inode *inode = file->f_path.dentry->d_inode;
2060 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
2062 rc = filemap_write_and_wait_range(inode->i_mapping, start, end);
2065 mutex_lock(&inode->i_mutex);
2069 cFYI(1, "Sync file - name: %s datasync: 0x%x",
2070 file->f_path.dentry->d_name.name, datasync);
2072 if (!CIFS_I(inode)->clientCanCacheRead) {
2073 rc = cifs_invalidate_mapping(inode);
2075 cFYI(1, "rc: %d during invalidate phase", rc);
2076 rc = 0; /* don't care about it in fsync */
2080 tcon = tlink_tcon(smbfile->tlink);
2081 if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC))
2082 rc = CIFSSMBFlush(xid, tcon, smbfile->fid.netfid);
2085 mutex_unlock(&inode->i_mutex);
2089 int cifs_fsync(struct file *file, loff_t start, loff_t end, int datasync)
2093 struct cifs_tcon *tcon;
2094 struct cifsFileInfo *smbfile = file->private_data;
2095 struct cifs_sb_info *cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
2096 struct inode *inode = file->f_mapping->host;
2098 rc = filemap_write_and_wait_range(inode->i_mapping, start, end);
2101 mutex_lock(&inode->i_mutex);
2105 cFYI(1, "Sync file - name: %s datasync: 0x%x",
2106 file->f_path.dentry->d_name.name, datasync);
2108 tcon = tlink_tcon(smbfile->tlink);
2109 if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC))
2110 rc = CIFSSMBFlush(xid, tcon, smbfile->fid.netfid);
2113 mutex_unlock(&inode->i_mutex);
2118 * As file closes, flush all cached write data for this inode checking
2119 * for write behind errors.
2121 int cifs_flush(struct file *file, fl_owner_t id)
2123 struct inode *inode = file->f_path.dentry->d_inode;
2126 if (file->f_mode & FMODE_WRITE)
2127 rc = filemap_write_and_wait(inode->i_mapping);
2129 cFYI(1, "Flush inode %p file %p rc %d", inode, file, rc);
2135 cifs_write_allocate_pages(struct page **pages, unsigned long num_pages)
2140 for (i = 0; i < num_pages; i++) {
2141 pages[i] = alloc_page(GFP_KERNEL|__GFP_HIGHMEM);
2144 * save number of pages we have already allocated and
2145 * return with ENOMEM error
2154 for (i = 0; i < num_pages; i++)
2161 size_t get_numpages(const size_t wsize, const size_t len, size_t *cur_len)
2166 clen = min_t(const size_t, len, wsize);
2167 num_pages = DIV_ROUND_UP(clen, PAGE_SIZE);
2176 cifs_uncached_marshal_iov(struct kvec *iov, struct cifs_writedata *wdata)
2179 size_t bytes = wdata->bytes;
2181 /* marshal up the pages into iov array */
2182 for (i = 0; i < wdata->nr_pages; i++) {
2183 iov[i + 1].iov_len = min_t(size_t, bytes, PAGE_SIZE);
2184 iov[i + 1].iov_base = kmap(wdata->pages[i]);
2185 bytes -= iov[i + 1].iov_len;
2190 cifs_uncached_writev_complete(struct work_struct *work)
2193 struct cifs_writedata *wdata = container_of(work,
2194 struct cifs_writedata, work);
2195 struct inode *inode = wdata->cfile->dentry->d_inode;
2196 struct cifsInodeInfo *cifsi = CIFS_I(inode);
2198 spin_lock(&inode->i_lock);
2199 cifs_update_eof(cifsi, wdata->offset, wdata->bytes);
2200 if (cifsi->server_eof > inode->i_size)
2201 i_size_write(inode, cifsi->server_eof);
2202 spin_unlock(&inode->i_lock);
2204 complete(&wdata->done);
2206 if (wdata->result != -EAGAIN) {
2207 for (i = 0; i < wdata->nr_pages; i++)
2208 put_page(wdata->pages[i]);
2211 kref_put(&wdata->refcount, cifs_writedata_release);
2214 /* attempt to send write to server, retry on any -EAGAIN errors */
2216 cifs_uncached_retry_writev(struct cifs_writedata *wdata)
2221 if (wdata->cfile->invalidHandle) {
2222 rc = cifs_reopen_file(wdata->cfile, false);
2226 rc = cifs_async_writev(wdata);
2227 } while (rc == -EAGAIN);
2233 cifs_iovec_write(struct file *file, const struct iovec *iov,
2234 unsigned long nr_segs, loff_t *poffset)
2236 unsigned long nr_pages, i;
2237 size_t copied, len, cur_len;
2238 ssize_t total_written = 0;
2241 struct cifsFileInfo *open_file;
2242 struct cifs_tcon *tcon;
2243 struct cifs_sb_info *cifs_sb;
2244 struct cifs_writedata *wdata, *tmp;
2245 struct list_head wdata_list;
2249 len = iov_length(iov, nr_segs);
2253 rc = generic_write_checks(file, poffset, &len, 0);
2257 INIT_LIST_HEAD(&wdata_list);
2258 cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
2259 open_file = file->private_data;
2260 tcon = tlink_tcon(open_file->tlink);
2263 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
2264 pid = open_file->pid;
2266 pid = current->tgid;
2268 iov_iter_init(&it, iov, nr_segs, len, 0);
2272 nr_pages = get_numpages(cifs_sb->wsize, len, &cur_len);
2273 wdata = cifs_writedata_alloc(nr_pages,
2274 cifs_uncached_writev_complete);
2280 rc = cifs_write_allocate_pages(wdata->pages, nr_pages);
2287 for (i = 0; i < nr_pages; i++) {
2288 copied = min_t(const size_t, cur_len, PAGE_SIZE);
2289 copied = iov_iter_copy_from_user(wdata->pages[i], &it,
2292 iov_iter_advance(&it, copied);
2294 cur_len = save_len - cur_len;
2296 wdata->sync_mode = WB_SYNC_ALL;
2297 wdata->nr_pages = nr_pages;
2298 wdata->offset = (__u64)offset;
2299 wdata->cfile = cifsFileInfo_get(open_file);
2301 wdata->bytes = cur_len;
2302 wdata->marshal_iov = cifs_uncached_marshal_iov;
2303 rc = cifs_uncached_retry_writev(wdata);
2305 kref_put(&wdata->refcount, cifs_writedata_release);
2309 list_add_tail(&wdata->list, &wdata_list);
2315 * If at least one write was successfully sent, then discard any rc
2316 * value from the later writes. If the other write succeeds, then
2317 * we'll end up returning whatever was written. If it fails, then
2318 * we'll get a new rc value from that.
2320 if (!list_empty(&wdata_list))
2324 * Wait for and collect replies for any successful sends in order of
2325 * increasing offset. Once an error is hit or we get a fatal signal
2326 * while waiting, then return without waiting for any more replies.
2329 list_for_each_entry_safe(wdata, tmp, &wdata_list, list) {
2331 /* FIXME: freezable too? */
2332 rc = wait_for_completion_killable(&wdata->done);
2335 else if (wdata->result)
2338 total_written += wdata->bytes;
2340 /* resend call if it's a retryable error */
2341 if (rc == -EAGAIN) {
2342 rc = cifs_uncached_retry_writev(wdata);
2346 list_del_init(&wdata->list);
2347 kref_put(&wdata->refcount, cifs_writedata_release);
2350 if (total_written > 0)
2351 *poffset += total_written;
2353 cifs_stats_bytes_written(tcon, total_written);
2354 return total_written ? total_written : (ssize_t)rc;
2357 ssize_t cifs_user_writev(struct kiocb *iocb, const struct iovec *iov,
2358 unsigned long nr_segs, loff_t pos)
2361 struct inode *inode;
2363 inode = iocb->ki_filp->f_path.dentry->d_inode;
2366 * BB - optimize the way when signing is disabled. We can drop this
2367 * extra memory-to-memory copying and use iovec buffers for constructing
2371 written = cifs_iovec_write(iocb->ki_filp, iov, nr_segs, &pos);
2373 CIFS_I(inode)->invalid_mapping = true;
2380 ssize_t cifs_strict_writev(struct kiocb *iocb, const struct iovec *iov,
2381 unsigned long nr_segs, loff_t pos)
2383 struct inode *inode;
2385 inode = iocb->ki_filp->f_path.dentry->d_inode;
2387 if (CIFS_I(inode)->clientCanCacheAll)
2388 return generic_file_aio_write(iocb, iov, nr_segs, pos);
2391 * In strict cache mode we need to write the data to the server exactly
2392 * from the pos to pos+len-1 rather than flush all affected pages
2393 * because it may cause a error with mandatory locks on these pages but
2394 * not on the region from pos to ppos+len-1.
2397 return cifs_user_writev(iocb, iov, nr_segs, pos);
2400 static struct cifs_readdata *
2401 cifs_readdata_alloc(unsigned int nr_vecs, work_func_t complete)
2403 struct cifs_readdata *rdata;
2405 rdata = kzalloc(sizeof(*rdata) +
2406 sizeof(struct kvec) * nr_vecs, GFP_KERNEL);
2407 if (rdata != NULL) {
2408 kref_init(&rdata->refcount);
2409 INIT_LIST_HEAD(&rdata->list);
2410 init_completion(&rdata->done);
2411 INIT_WORK(&rdata->work, complete);
2412 INIT_LIST_HEAD(&rdata->pages);
2418 cifs_readdata_release(struct kref *refcount)
2420 struct cifs_readdata *rdata = container_of(refcount,
2421 struct cifs_readdata, refcount);
2424 cifsFileInfo_put(rdata->cfile);
2430 cifs_read_allocate_pages(struct list_head *list, unsigned int npages)
2433 struct page *page, *tpage;
2436 for (i = 0; i < npages; i++) {
2437 page = alloc_page(GFP_KERNEL|__GFP_HIGHMEM);
2442 list_add(&page->lru, list);
2446 list_for_each_entry_safe(page, tpage, list, lru) {
2447 list_del(&page->lru);
2455 cifs_uncached_readdata_release(struct kref *refcount)
2457 struct page *page, *tpage;
2458 struct cifs_readdata *rdata = container_of(refcount,
2459 struct cifs_readdata, refcount);
2461 list_for_each_entry_safe(page, tpage, &rdata->pages, lru) {
2462 list_del(&page->lru);
2465 cifs_readdata_release(refcount);
2469 cifs_retry_async_readv(struct cifs_readdata *rdata)
2474 if (rdata->cfile->invalidHandle) {
2475 rc = cifs_reopen_file(rdata->cfile, true);
2479 rc = cifs_async_readv(rdata);
2480 } while (rc == -EAGAIN);
2486 * cifs_readdata_to_iov - copy data from pages in response to an iovec
2487 * @rdata: the readdata response with list of pages holding data
2488 * @iov: vector in which we should copy the data
2489 * @nr_segs: number of segments in vector
2490 * @offset: offset into file of the first iovec
2491 * @copied: used to return the amount of data copied to the iov
2493 * This function copies data from a list of pages in a readdata response into
2494 * an array of iovecs. It will first calculate where the data should go
2495 * based on the info in the readdata and then copy the data into that spot.
2498 cifs_readdata_to_iov(struct cifs_readdata *rdata, const struct iovec *iov,
2499 unsigned long nr_segs, loff_t offset, ssize_t *copied)
2503 size_t pos = rdata->offset - offset;
2504 struct page *page, *tpage;
2505 ssize_t remaining = rdata->bytes;
2506 unsigned char *pdata;
2508 /* set up iov_iter and advance to the correct offset */
2509 iov_iter_init(&ii, iov, nr_segs, iov_length(iov, nr_segs), 0);
2510 iov_iter_advance(&ii, pos);
2513 list_for_each_entry_safe(page, tpage, &rdata->pages, lru) {
2516 /* copy a whole page or whatever's left */
2517 copy = min_t(ssize_t, remaining, PAGE_SIZE);
2519 /* ...but limit it to whatever space is left in the iov */
2520 copy = min_t(ssize_t, copy, iov_iter_count(&ii));
2522 /* go while there's data to be copied and no errors */
2525 rc = memcpy_toiovecend(ii.iov, pdata, ii.iov_offset,
2531 iov_iter_advance(&ii, copy);
2535 list_del(&page->lru);
2543 cifs_uncached_readv_complete(struct work_struct *work)
2545 struct cifs_readdata *rdata = container_of(work,
2546 struct cifs_readdata, work);
2548 /* if the result is non-zero then the pages weren't kmapped */
2549 if (rdata->result == 0) {
2552 list_for_each_entry(page, &rdata->pages, lru)
2556 complete(&rdata->done);
2557 kref_put(&rdata->refcount, cifs_uncached_readdata_release);
2561 cifs_uncached_read_marshal_iov(struct cifs_readdata *rdata,
2562 unsigned int remaining)
2565 struct page *page, *tpage;
2568 list_for_each_entry_safe(page, tpage, &rdata->pages, lru) {
2569 if (remaining >= PAGE_SIZE) {
2570 /* enough data to fill the page */
2571 rdata->iov[rdata->nr_iov].iov_base = kmap(page);
2572 rdata->iov[rdata->nr_iov].iov_len = PAGE_SIZE;
2573 cFYI(1, "%u: idx=%lu iov_base=%p iov_len=%zu",
2574 rdata->nr_iov, page->index,
2575 rdata->iov[rdata->nr_iov].iov_base,
2576 rdata->iov[rdata->nr_iov].iov_len);
2579 remaining -= PAGE_SIZE;
2580 } else if (remaining > 0) {
2581 /* enough for partial page, fill and zero the rest */
2582 rdata->iov[rdata->nr_iov].iov_base = kmap(page);
2583 rdata->iov[rdata->nr_iov].iov_len = remaining;
2584 cFYI(1, "%u: idx=%lu iov_base=%p iov_len=%zu",
2585 rdata->nr_iov, page->index,
2586 rdata->iov[rdata->nr_iov].iov_base,
2587 rdata->iov[rdata->nr_iov].iov_len);
2588 memset(rdata->iov[rdata->nr_iov].iov_base + remaining,
2589 '\0', PAGE_SIZE - remaining);
2594 /* no need to hold page hostage */
2595 list_del(&page->lru);
2604 cifs_iovec_read(struct file *file, const struct iovec *iov,
2605 unsigned long nr_segs, loff_t *poffset)
2608 size_t len, cur_len;
2609 ssize_t total_read = 0;
2610 loff_t offset = *poffset;
2611 unsigned int npages;
2612 struct cifs_sb_info *cifs_sb;
2613 struct cifs_tcon *tcon;
2614 struct cifsFileInfo *open_file;
2615 struct cifs_readdata *rdata, *tmp;
2616 struct list_head rdata_list;
2622 len = iov_length(iov, nr_segs);
2626 INIT_LIST_HEAD(&rdata_list);
2627 cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
2628 open_file = file->private_data;
2629 tcon = tlink_tcon(open_file->tlink);
2631 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
2632 pid = open_file->pid;
2634 pid = current->tgid;
2636 if ((file->f_flags & O_ACCMODE) == O_WRONLY)
2637 cFYI(1, "attempting read on write only file instance");
2640 cur_len = min_t(const size_t, len - total_read, cifs_sb->rsize);
2641 npages = DIV_ROUND_UP(cur_len, PAGE_SIZE);
2643 /* allocate a readdata struct */
2644 rdata = cifs_readdata_alloc(npages,
2645 cifs_uncached_readv_complete);
2651 rc = cifs_read_allocate_pages(&rdata->pages, npages);
2655 rdata->cfile = cifsFileInfo_get(open_file);
2656 rdata->offset = offset;
2657 rdata->bytes = cur_len;
2659 rdata->marshal_iov = cifs_uncached_read_marshal_iov;
2661 rc = cifs_retry_async_readv(rdata);
2664 kref_put(&rdata->refcount,
2665 cifs_uncached_readdata_release);
2669 list_add_tail(&rdata->list, &rdata_list);
2674 /* if at least one read request send succeeded, then reset rc */
2675 if (!list_empty(&rdata_list))
2678 /* the loop below should proceed in the order of increasing offsets */
2680 list_for_each_entry_safe(rdata, tmp, &rdata_list, list) {
2684 /* FIXME: freezable sleep too? */
2685 rc = wait_for_completion_killable(&rdata->done);
2688 else if (rdata->result)
2691 rc = cifs_readdata_to_iov(rdata, iov,
2694 total_read += copied;
2697 /* resend call if it's a retryable error */
2698 if (rc == -EAGAIN) {
2699 rc = cifs_retry_async_readv(rdata);
2703 list_del_init(&rdata->list);
2704 kref_put(&rdata->refcount, cifs_uncached_readdata_release);
2707 cifs_stats_bytes_read(tcon, total_read);
2708 *poffset += total_read;
2710 return total_read ? total_read : rc;
2713 ssize_t cifs_user_readv(struct kiocb *iocb, const struct iovec *iov,
2714 unsigned long nr_segs, loff_t pos)
2718 read = cifs_iovec_read(iocb->ki_filp, iov, nr_segs, &pos);
2725 ssize_t cifs_strict_readv(struct kiocb *iocb, const struct iovec *iov,
2726 unsigned long nr_segs, loff_t pos)
2728 struct inode *inode;
2730 inode = iocb->ki_filp->f_path.dentry->d_inode;
2732 if (CIFS_I(inode)->clientCanCacheRead)
2733 return generic_file_aio_read(iocb, iov, nr_segs, pos);
2736 * In strict cache mode we need to read from the server all the time
2737 * if we don't have level II oplock because the server can delay mtime
2738 * change - so we can't make a decision about inode invalidating.
2739 * And we can also fail with pagereading if there are mandatory locks
2740 * on pages affected by this read but not on the region from pos to
2744 return cifs_user_readv(iocb, iov, nr_segs, pos);
2747 static ssize_t cifs_read(struct file *file, char *read_data, size_t read_size,
2751 unsigned int bytes_read = 0;
2752 unsigned int total_read;
2753 unsigned int current_read_size;
2755 struct cifs_sb_info *cifs_sb;
2756 struct cifs_tcon *tcon;
2758 char *current_offset;
2759 struct cifsFileInfo *open_file;
2760 struct cifs_io_parms io_parms;
2761 int buf_type = CIFS_NO_BUFFER;
2765 cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
2767 /* FIXME: set up handlers for larger reads and/or convert to async */
2768 rsize = min_t(unsigned int, cifs_sb->rsize, CIFSMaxBufSize);
2770 if (file->private_data == NULL) {
2775 open_file = file->private_data;
2776 tcon = tlink_tcon(open_file->tlink);
2778 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
2779 pid = open_file->pid;
2781 pid = current->tgid;
2783 if ((file->f_flags & O_ACCMODE) == O_WRONLY)
2784 cFYI(1, "attempting read on write only file instance");
2786 for (total_read = 0, current_offset = read_data;
2787 read_size > total_read;
2788 total_read += bytes_read, current_offset += bytes_read) {
2789 current_read_size = min_t(uint, read_size - total_read, rsize);
2791 * For windows me and 9x we do not want to request more than it
2792 * negotiated since it will refuse the read then.
2794 if ((tcon->ses) && !(tcon->ses->capabilities &
2795 tcon->ses->server->vals->cap_large_files)) {
2796 current_read_size = min_t(uint, current_read_size,
2800 while (rc == -EAGAIN) {
2801 if (open_file->invalidHandle) {
2802 rc = cifs_reopen_file(open_file, true);
2806 io_parms.netfid = open_file->fid.netfid;
2808 io_parms.tcon = tcon;
2809 io_parms.offset = *poffset;
2810 io_parms.length = current_read_size;
2811 rc = CIFSSMBRead(xid, &io_parms, &bytes_read,
2812 ¤t_offset, &buf_type);
2814 if (rc || (bytes_read == 0)) {
2822 cifs_stats_bytes_read(tcon, total_read);
2823 *poffset += bytes_read;
2831 * If the page is mmap'ed into a process' page tables, then we need to make
2832 * sure that it doesn't change while being written back.
2835 cifs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
2837 struct page *page = vmf->page;
2840 return VM_FAULT_LOCKED;
2843 static struct vm_operations_struct cifs_file_vm_ops = {
2844 .fault = filemap_fault,
2845 .page_mkwrite = cifs_page_mkwrite,
2848 int cifs_file_strict_mmap(struct file *file, struct vm_area_struct *vma)
2851 struct inode *inode = file->f_path.dentry->d_inode;
2855 if (!CIFS_I(inode)->clientCanCacheRead) {
2856 rc = cifs_invalidate_mapping(inode);
2861 rc = generic_file_mmap(file, vma);
2863 vma->vm_ops = &cifs_file_vm_ops;
2868 int cifs_file_mmap(struct file *file, struct vm_area_struct *vma)
2873 rc = cifs_revalidate_file(file);
2875 cFYI(1, "Validation prior to mmap failed, error=%d", rc);
2879 rc = generic_file_mmap(file, vma);
2881 vma->vm_ops = &cifs_file_vm_ops;
2887 cifs_readv_complete(struct work_struct *work)
2889 struct cifs_readdata *rdata = container_of(work,
2890 struct cifs_readdata, work);
2891 struct page *page, *tpage;
2893 list_for_each_entry_safe(page, tpage, &rdata->pages, lru) {
2894 list_del(&page->lru);
2895 lru_cache_add_file(page);
2897 if (rdata->result == 0) {
2899 flush_dcache_page(page);
2900 SetPageUptodate(page);
2905 if (rdata->result == 0)
2906 cifs_readpage_to_fscache(rdata->mapping->host, page);
2908 page_cache_release(page);
2910 kref_put(&rdata->refcount, cifs_readdata_release);
2914 cifs_readpages_marshal_iov(struct cifs_readdata *rdata, unsigned int remaining)
2917 struct page *page, *tpage;
2921 /* determine the eof that the server (probably) has */
2922 eof = CIFS_I(rdata->mapping->host)->server_eof;
2923 eof_index = eof ? (eof - 1) >> PAGE_CACHE_SHIFT : 0;
2924 cFYI(1, "eof=%llu eof_index=%lu", eof, eof_index);
2927 list_for_each_entry_safe(page, tpage, &rdata->pages, lru) {
2928 if (remaining >= PAGE_CACHE_SIZE) {
2929 /* enough data to fill the page */
2930 rdata->iov[rdata->nr_iov].iov_base = kmap(page);
2931 rdata->iov[rdata->nr_iov].iov_len = PAGE_CACHE_SIZE;
2932 cFYI(1, "%u: idx=%lu iov_base=%p iov_len=%zu",
2933 rdata->nr_iov, page->index,
2934 rdata->iov[rdata->nr_iov].iov_base,
2935 rdata->iov[rdata->nr_iov].iov_len);
2937 len += PAGE_CACHE_SIZE;
2938 remaining -= PAGE_CACHE_SIZE;
2939 } else if (remaining > 0) {
2940 /* enough for partial page, fill and zero the rest */
2941 rdata->iov[rdata->nr_iov].iov_base = kmap(page);
2942 rdata->iov[rdata->nr_iov].iov_len = remaining;
2943 cFYI(1, "%u: idx=%lu iov_base=%p iov_len=%zu",
2944 rdata->nr_iov, page->index,
2945 rdata->iov[rdata->nr_iov].iov_base,
2946 rdata->iov[rdata->nr_iov].iov_len);
2947 memset(rdata->iov[rdata->nr_iov].iov_base + remaining,
2948 '\0', PAGE_CACHE_SIZE - remaining);
2952 } else if (page->index > eof_index) {
2954 * The VFS will not try to do readahead past the
2955 * i_size, but it's possible that we have outstanding
2956 * writes with gaps in the middle and the i_size hasn't
2957 * caught up yet. Populate those with zeroed out pages
2958 * to prevent the VFS from repeatedly attempting to
2959 * fill them until the writes are flushed.
2961 zero_user(page, 0, PAGE_CACHE_SIZE);
2962 list_del(&page->lru);
2963 lru_cache_add_file(page);
2964 flush_dcache_page(page);
2965 SetPageUptodate(page);
2967 page_cache_release(page);
2969 /* no need to hold page hostage */
2970 list_del(&page->lru);
2971 lru_cache_add_file(page);
2973 page_cache_release(page);
2980 static int cifs_readpages(struct file *file, struct address_space *mapping,
2981 struct list_head *page_list, unsigned num_pages)
2984 struct list_head tmplist;
2985 struct cifsFileInfo *open_file = file->private_data;
2986 struct cifs_sb_info *cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
2987 unsigned int rsize = cifs_sb->rsize;
2991 * Give up immediately if rsize is too small to read an entire page.
2992 * The VFS will fall back to readpage. We should never reach this
2993 * point however since we set ra_pages to 0 when the rsize is smaller
2994 * than a cache page.
2996 if (unlikely(rsize < PAGE_CACHE_SIZE))
3000 * Reads as many pages as possible from fscache. Returns -ENOBUFS
3001 * immediately if the cookie is negative
3003 rc = cifs_readpages_from_fscache(mapping->host, mapping, page_list,
3008 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
3009 pid = open_file->pid;
3011 pid = current->tgid;
3014 INIT_LIST_HEAD(&tmplist);
3016 cFYI(1, "%s: file=%p mapping=%p num_pages=%u", __func__, file,
3017 mapping, num_pages);
3020 * Start with the page at end of list and move it to private
3021 * list. Do the same with any following pages until we hit
3022 * the rsize limit, hit an index discontinuity, or run out of
3023 * pages. Issue the async read and then start the loop again
3024 * until the list is empty.
3026 * Note that list order is important. The page_list is in
3027 * the order of declining indexes. When we put the pages in
3028 * the rdata->pages, then we want them in increasing order.
3030 while (!list_empty(page_list)) {
3031 unsigned int bytes = PAGE_CACHE_SIZE;
3032 unsigned int expected_index;
3033 unsigned int nr_pages = 1;
3035 struct page *page, *tpage;
3036 struct cifs_readdata *rdata;
3038 page = list_entry(page_list->prev, struct page, lru);
3041 * Lock the page and put it in the cache. Since no one else
3042 * should have access to this page, we're safe to simply set
3043 * PG_locked without checking it first.
3045 __set_page_locked(page);
3046 rc = add_to_page_cache_locked(page, mapping,
3047 page->index, GFP_KERNEL);
3049 /* give up if we can't stick it in the cache */
3051 __clear_page_locked(page);
3055 /* move first page to the tmplist */
3056 offset = (loff_t)page->index << PAGE_CACHE_SHIFT;
3057 list_move_tail(&page->lru, &tmplist);
3059 /* now try and add more pages onto the request */
3060 expected_index = page->index + 1;
3061 list_for_each_entry_safe_reverse(page, tpage, page_list, lru) {
3062 /* discontinuity ? */
3063 if (page->index != expected_index)
3066 /* would this page push the read over the rsize? */
3067 if (bytes + PAGE_CACHE_SIZE > rsize)
3070 __set_page_locked(page);
3071 if (add_to_page_cache_locked(page, mapping,
3072 page->index, GFP_KERNEL)) {
3073 __clear_page_locked(page);
3076 list_move_tail(&page->lru, &tmplist);
3077 bytes += PAGE_CACHE_SIZE;
3082 rdata = cifs_readdata_alloc(nr_pages, cifs_readv_complete);
3084 /* best to give up if we're out of mem */
3085 list_for_each_entry_safe(page, tpage, &tmplist, lru) {
3086 list_del(&page->lru);
3087 lru_cache_add_file(page);
3089 page_cache_release(page);
3095 rdata->cfile = cifsFileInfo_get(open_file);
3096 rdata->mapping = mapping;
3097 rdata->offset = offset;
3098 rdata->bytes = bytes;
3100 rdata->marshal_iov = cifs_readpages_marshal_iov;
3101 list_splice_init(&tmplist, &rdata->pages);
3103 rc = cifs_retry_async_readv(rdata);
3105 list_for_each_entry_safe(page, tpage, &rdata->pages,
3107 list_del(&page->lru);
3108 lru_cache_add_file(page);
3110 page_cache_release(page);
3112 kref_put(&rdata->refcount, cifs_readdata_release);
3116 kref_put(&rdata->refcount, cifs_readdata_release);
3122 static int cifs_readpage_worker(struct file *file, struct page *page,
3128 /* Is the page cached? */
3129 rc = cifs_readpage_from_fscache(file->f_path.dentry->d_inode, page);
3133 page_cache_get(page);
3134 read_data = kmap(page);
3135 /* for reads over a certain size could initiate async read ahead */
3137 rc = cifs_read(file, read_data, PAGE_CACHE_SIZE, poffset);
3142 cFYI(1, "Bytes read %d", rc);
3144 file->f_path.dentry->d_inode->i_atime =
3145 current_fs_time(file->f_path.dentry->d_inode->i_sb);
3147 if (PAGE_CACHE_SIZE > rc)
3148 memset(read_data + rc, 0, PAGE_CACHE_SIZE - rc);
3150 flush_dcache_page(page);
3151 SetPageUptodate(page);
3153 /* send this page to the cache */
3154 cifs_readpage_to_fscache(file->f_path.dentry->d_inode, page);
3160 page_cache_release(page);
3166 static int cifs_readpage(struct file *file, struct page *page)
3168 loff_t offset = (loff_t)page->index << PAGE_CACHE_SHIFT;
3174 if (file->private_data == NULL) {
3180 cFYI(1, "readpage %p at offset %d 0x%x",
3181 page, (int)offset, (int)offset);
3183 rc = cifs_readpage_worker(file, page, &offset);
3191 static int is_inode_writable(struct cifsInodeInfo *cifs_inode)
3193 struct cifsFileInfo *open_file;
3195 spin_lock(&cifs_file_list_lock);
3196 list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
3197 if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) {
3198 spin_unlock(&cifs_file_list_lock);
3202 spin_unlock(&cifs_file_list_lock);
3206 /* We do not want to update the file size from server for inodes
3207 open for write - to avoid races with writepage extending
3208 the file - in the future we could consider allowing
3209 refreshing the inode only on increases in the file size
3210 but this is tricky to do without racing with writebehind
3211 page caching in the current Linux kernel design */
3212 bool is_size_safe_to_change(struct cifsInodeInfo *cifsInode, __u64 end_of_file)
3217 if (is_inode_writable(cifsInode)) {
3218 /* This inode is open for write at least once */
3219 struct cifs_sb_info *cifs_sb;
3221 cifs_sb = CIFS_SB(cifsInode->vfs_inode.i_sb);
3222 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DIRECT_IO) {
3223 /* since no page cache to corrupt on directio
3224 we can change size safely */
3228 if (i_size_read(&cifsInode->vfs_inode) < end_of_file)
3236 static int cifs_write_begin(struct file *file, struct address_space *mapping,
3237 loff_t pos, unsigned len, unsigned flags,
3238 struct page **pagep, void **fsdata)
3240 pgoff_t index = pos >> PAGE_CACHE_SHIFT;
3241 loff_t offset = pos & (PAGE_CACHE_SIZE - 1);
3242 loff_t page_start = pos & PAGE_MASK;
3247 cFYI(1, "write_begin from %lld len %d", (long long)pos, len);
3249 page = grab_cache_page_write_begin(mapping, index, flags);
3255 if (PageUptodate(page))
3259 * If we write a full page it will be up to date, no need to read from
3260 * the server. If the write is short, we'll end up doing a sync write
3263 if (len == PAGE_CACHE_SIZE)
3267 * optimize away the read when we have an oplock, and we're not
3268 * expecting to use any of the data we'd be reading in. That
3269 * is, when the page lies beyond the EOF, or straddles the EOF
3270 * and the write will cover all of the existing data.
3272 if (CIFS_I(mapping->host)->clientCanCacheRead) {
3273 i_size = i_size_read(mapping->host);
3274 if (page_start >= i_size ||
3275 (offset == 0 && (pos + len) >= i_size)) {
3276 zero_user_segments(page, 0, offset,
3280 * PageChecked means that the parts of the page
3281 * to which we're not writing are considered up
3282 * to date. Once the data is copied to the
3283 * page, it can be set uptodate.
3285 SetPageChecked(page);
3290 if ((file->f_flags & O_ACCMODE) != O_WRONLY) {
3292 * might as well read a page, it is fast enough. If we get
3293 * an error, we don't need to return it. cifs_write_end will
3294 * do a sync write instead since PG_uptodate isn't set.
3296 cifs_readpage_worker(file, page, &page_start);
3298 /* we could try using another file handle if there is one -
3299 but how would we lock it to prevent close of that handle
3300 racing with this read? In any case
3301 this will be written out by write_end so is fine */
3308 static int cifs_release_page(struct page *page, gfp_t gfp)
3310 if (PagePrivate(page))
3313 return cifs_fscache_release_page(page, gfp);
3316 static void cifs_invalidate_page(struct page *page, unsigned long offset)
3318 struct cifsInodeInfo *cifsi = CIFS_I(page->mapping->host);
3321 cifs_fscache_invalidate_page(page, &cifsi->vfs_inode);
3324 static int cifs_launder_page(struct page *page)
3327 loff_t range_start = page_offset(page);
3328 loff_t range_end = range_start + (loff_t)(PAGE_CACHE_SIZE - 1);
3329 struct writeback_control wbc = {
3330 .sync_mode = WB_SYNC_ALL,
3332 .range_start = range_start,
3333 .range_end = range_end,
3336 cFYI(1, "Launder page: %p", page);
3338 if (clear_page_dirty_for_io(page))
3339 rc = cifs_writepage_locked(page, &wbc);
3341 cifs_fscache_invalidate_page(page, page->mapping->host);
3345 void cifs_oplock_break(struct work_struct *work)
3347 struct cifsFileInfo *cfile = container_of(work, struct cifsFileInfo,
3349 struct inode *inode = cfile->dentry->d_inode;
3350 struct cifsInodeInfo *cinode = CIFS_I(inode);
3353 if (inode && S_ISREG(inode->i_mode)) {
3354 if (cinode->clientCanCacheRead)
3355 break_lease(inode, O_RDONLY);
3357 break_lease(inode, O_WRONLY);
3358 rc = filemap_fdatawrite(inode->i_mapping);
3359 if (cinode->clientCanCacheRead == 0) {
3360 rc = filemap_fdatawait(inode->i_mapping);
3361 mapping_set_error(inode->i_mapping, rc);
3362 invalidate_remote_inode(inode);
3364 cFYI(1, "Oplock flush inode %p rc %d", inode, rc);
3367 rc = cifs_push_locks(cfile);
3369 cERROR(1, "Push locks rc = %d", rc);
3372 * releasing stale oplock after recent reconnect of smb session using
3373 * a now incorrect file handle is not a data integrity issue but do
3374 * not bother sending an oplock release if session to server still is
3375 * disconnected since oplock already released by the server
3377 if (!cfile->oplock_break_cancelled) {
3378 rc = CIFSSMBLock(0, tlink_tcon(cfile->tlink), cfile->fid.netfid,
3379 current->tgid, 0, 0, 0, 0,
3380 LOCKING_ANDX_OPLOCK_RELEASE, false,
3381 cinode->clientCanCacheRead ? 1 : 0);
3382 cFYI(1, "Oplock release rc = %d", rc);
3386 const struct address_space_operations cifs_addr_ops = {
3387 .readpage = cifs_readpage,
3388 .readpages = cifs_readpages,
3389 .writepage = cifs_writepage,
3390 .writepages = cifs_writepages,
3391 .write_begin = cifs_write_begin,
3392 .write_end = cifs_write_end,
3393 .set_page_dirty = __set_page_dirty_nobuffers,
3394 .releasepage = cifs_release_page,
3395 .invalidatepage = cifs_invalidate_page,
3396 .launder_page = cifs_launder_page,
3400 * cifs_readpages requires the server to support a buffer large enough to
3401 * contain the header plus one complete page of data. Otherwise, we need
3402 * to leave cifs_readpages out of the address space operations.
3404 const struct address_space_operations cifs_addr_ops_smallbuf = {
3405 .readpage = cifs_readpage,
3406 .writepage = cifs_writepage,
3407 .writepages = cifs_writepages,
3408 .write_begin = cifs_write_begin,
3409 .write_end = cifs_write_end,
3410 .set_page_dirty = __set_page_dirty_nobuffers,
3411 .releasepage = cifs_release_page,
3412 .invalidatepage = cifs_invalidate_page,
3413 .launder_page = cifs_launder_page,