4 * vfs operations that deal with files
6 * Copyright (C) International Business Machines Corp., 2002,2010
7 * Author(s): Steve French (sfrench@us.ibm.com)
8 * Jeremy Allison (jra@samba.org)
10 * This library is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU Lesser General Public License as published
12 * by the Free Software Foundation; either version 2.1 of the License, or
13 * (at your option) any later version.
15 * This library is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
18 * the GNU Lesser General Public License for more details.
20 * You should have received a copy of the GNU Lesser General Public License
21 * along with this library; if not, write to the Free Software
22 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
25 #include <linux/backing-dev.h>
26 #include <linux/stat.h>
27 #include <linux/fcntl.h>
28 #include <linux/pagemap.h>
29 #include <linux/pagevec.h>
30 #include <linux/writeback.h>
31 #include <linux/task_io_accounting_ops.h>
32 #include <linux/delay.h>
33 #include <linux/mount.h>
34 #include <linux/slab.h>
35 #include <asm/div64.h>
39 #include "cifsproto.h"
40 #include "cifs_unicode.h"
41 #include "cifs_debug.h"
42 #include "cifs_fs_sb.h"
45 static inline int cifs_convert_flags(unsigned int flags)
47 if ((flags & O_ACCMODE) == O_RDONLY)
49 else if ((flags & O_ACCMODE) == O_WRONLY)
51 else if ((flags & O_ACCMODE) == O_RDWR) {
52 /* GENERIC_ALL is too much permission to request
53 can cause unnecessary access denied on create */
54 /* return GENERIC_ALL; */
55 return (GENERIC_READ | GENERIC_WRITE);
58 return (READ_CONTROL | FILE_WRITE_ATTRIBUTES | FILE_READ_ATTRIBUTES |
59 FILE_WRITE_EA | FILE_APPEND_DATA | FILE_WRITE_DATA |
63 static u32 cifs_posix_convert_flags(unsigned int flags)
67 if ((flags & O_ACCMODE) == O_RDONLY)
68 posix_flags = SMB_O_RDONLY;
69 else if ((flags & O_ACCMODE) == O_WRONLY)
70 posix_flags = SMB_O_WRONLY;
71 else if ((flags & O_ACCMODE) == O_RDWR)
72 posix_flags = SMB_O_RDWR;
75 posix_flags |= SMB_O_CREAT;
77 posix_flags |= SMB_O_EXCL;
79 posix_flags |= SMB_O_TRUNC;
80 /* be safe and imply O_SYNC for O_DSYNC */
82 posix_flags |= SMB_O_SYNC;
83 if (flags & O_DIRECTORY)
84 posix_flags |= SMB_O_DIRECTORY;
85 if (flags & O_NOFOLLOW)
86 posix_flags |= SMB_O_NOFOLLOW;
88 posix_flags |= SMB_O_DIRECT;
93 static inline int cifs_get_disposition(unsigned int flags)
95 if ((flags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL))
97 else if ((flags & (O_CREAT | O_TRUNC)) == (O_CREAT | O_TRUNC))
98 return FILE_OVERWRITE_IF;
99 else if ((flags & O_CREAT) == O_CREAT)
101 else if ((flags & O_TRUNC) == O_TRUNC)
102 return FILE_OVERWRITE;
107 int cifs_posix_open(char *full_path, struct inode **pinode,
108 struct super_block *sb, int mode, unsigned int f_flags,
109 __u32 *poplock, __u16 *pnetfid, int xid)
112 FILE_UNIX_BASIC_INFO *presp_data;
113 __u32 posix_flags = 0;
114 struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
115 struct cifs_fattr fattr;
116 struct tcon_link *tlink;
117 struct cifs_tcon *tcon;
119 cFYI(1, "posix open %s", full_path);
121 presp_data = kzalloc(sizeof(FILE_UNIX_BASIC_INFO), GFP_KERNEL);
122 if (presp_data == NULL)
125 tlink = cifs_sb_tlink(cifs_sb);
131 tcon = tlink_tcon(tlink);
132 mode &= ~current_umask();
134 posix_flags = cifs_posix_convert_flags(f_flags);
135 rc = CIFSPOSIXCreate(xid, tcon, posix_flags, mode, pnetfid, presp_data,
136 poplock, full_path, cifs_sb->local_nls,
137 cifs_sb->mnt_cifs_flags &
138 CIFS_MOUNT_MAP_SPECIAL_CHR);
139 cifs_put_tlink(tlink);
144 if (presp_data->Type == cpu_to_le32(-1))
145 goto posix_open_ret; /* open ok, caller does qpathinfo */
148 goto posix_open_ret; /* caller does not need info */
150 cifs_unix_basic_to_fattr(&fattr, presp_data, cifs_sb);
152 /* get new inode and set it up */
153 if (*pinode == NULL) {
154 cifs_fill_uniqueid(sb, &fattr);
155 *pinode = cifs_iget(sb, &fattr);
161 cifs_fattr_to_inode(*pinode, &fattr);
170 cifs_nt_open(char *full_path, struct inode *inode, struct cifs_sb_info *cifs_sb,
171 struct cifs_tcon *tcon, unsigned int f_flags, __u32 *poplock,
172 __u16 *pnetfid, int xid)
177 int create_options = CREATE_NOT_DIR;
180 desiredAccess = cifs_convert_flags(f_flags);
182 /*********************************************************************
183 * open flag mapping table:
185 * POSIX Flag CIFS Disposition
186 * ---------- ----------------
187 * O_CREAT FILE_OPEN_IF
188 * O_CREAT | O_EXCL FILE_CREATE
189 * O_CREAT | O_TRUNC FILE_OVERWRITE_IF
190 * O_TRUNC FILE_OVERWRITE
191 * none of the above FILE_OPEN
193 * Note that there is not a direct match between disposition
194 * FILE_SUPERSEDE (ie create whether or not file exists although
195 * O_CREAT | O_TRUNC is similar but truncates the existing
196 * file rather than creating a new file as FILE_SUPERSEDE does
197 * (which uses the attributes / metadata passed in on open call)
199 *? O_SYNC is a reasonable match to CIFS writethrough flag
200 *? and the read write flags match reasonably. O_LARGEFILE
201 *? is irrelevant because largefile support is always used
202 *? by this client. Flags O_APPEND, O_DIRECT, O_DIRECTORY,
203 * O_FASYNC, O_NOFOLLOW, O_NONBLOCK need further investigation
204 *********************************************************************/
206 disposition = cifs_get_disposition(f_flags);
208 /* BB pass O_SYNC flag through on file attributes .. BB */
210 buf = kmalloc(sizeof(FILE_ALL_INFO), GFP_KERNEL);
214 if (backup_cred(cifs_sb))
215 create_options |= CREATE_OPEN_BACKUP_INTENT;
217 if (tcon->ses->capabilities & CAP_NT_SMBS)
218 rc = CIFSSMBOpen(xid, tcon, full_path, disposition,
219 desiredAccess, create_options, pnetfid, poplock, buf,
220 cifs_sb->local_nls, cifs_sb->mnt_cifs_flags
221 & CIFS_MOUNT_MAP_SPECIAL_CHR);
223 rc = SMBLegacyOpen(xid, tcon, full_path, disposition,
224 desiredAccess, CREATE_NOT_DIR, pnetfid, poplock, buf,
225 cifs_sb->local_nls, cifs_sb->mnt_cifs_flags
226 & CIFS_MOUNT_MAP_SPECIAL_CHR);
232 rc = cifs_get_inode_info_unix(&inode, full_path, inode->i_sb,
235 rc = cifs_get_inode_info(&inode, full_path, buf, inode->i_sb,
243 struct cifsFileInfo *
244 cifs_new_fileinfo(__u16 fileHandle, struct file *file,
245 struct tcon_link *tlink, __u32 oplock)
247 struct dentry *dentry = file->f_path.dentry;
248 struct inode *inode = dentry->d_inode;
249 struct cifsInodeInfo *pCifsInode = CIFS_I(inode);
250 struct cifsFileInfo *pCifsFile;
252 pCifsFile = kzalloc(sizeof(struct cifsFileInfo), GFP_KERNEL);
253 if (pCifsFile == NULL)
256 pCifsFile->count = 1;
257 pCifsFile->netfid = fileHandle;
258 pCifsFile->pid = current->tgid;
259 pCifsFile->uid = current_fsuid();
260 pCifsFile->dentry = dget(dentry);
261 pCifsFile->f_flags = file->f_flags;
262 pCifsFile->invalidHandle = false;
263 pCifsFile->tlink = cifs_get_tlink(tlink);
264 mutex_init(&pCifsFile->fh_mutex);
265 INIT_WORK(&pCifsFile->oplock_break, cifs_oplock_break);
267 spin_lock(&cifs_file_list_lock);
268 list_add(&pCifsFile->tlist, &(tlink_tcon(tlink)->openFileList));
269 /* if readable file instance put first in list*/
270 if (file->f_mode & FMODE_READ)
271 list_add(&pCifsFile->flist, &pCifsInode->openFileList);
273 list_add_tail(&pCifsFile->flist, &pCifsInode->openFileList);
274 spin_unlock(&cifs_file_list_lock);
276 cifs_set_oplock_level(pCifsInode, oplock);
278 file->private_data = pCifsFile;
283 * Release a reference on the file private data. This may involve closing
284 * the filehandle out on the server. Must be called without holding
285 * cifs_file_list_lock.
287 void cifsFileInfo_put(struct cifsFileInfo *cifs_file)
289 struct inode *inode = cifs_file->dentry->d_inode;
290 struct cifs_tcon *tcon = tlink_tcon(cifs_file->tlink);
291 struct cifsInodeInfo *cifsi = CIFS_I(inode);
292 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
293 struct cifsLockInfo *li, *tmp;
295 spin_lock(&cifs_file_list_lock);
296 if (--cifs_file->count > 0) {
297 spin_unlock(&cifs_file_list_lock);
301 /* remove it from the lists */
302 list_del(&cifs_file->flist);
303 list_del(&cifs_file->tlist);
305 if (list_empty(&cifsi->openFileList)) {
306 cFYI(1, "closing last open instance for inode %p",
307 cifs_file->dentry->d_inode);
309 /* in strict cache mode we need invalidate mapping on the last
310 close because it may cause a error when we open this file
311 again and get at least level II oplock */
312 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO)
313 CIFS_I(inode)->invalid_mapping = true;
315 cifs_set_oplock_level(cifsi, 0);
317 spin_unlock(&cifs_file_list_lock);
319 cancel_work_sync(&cifs_file->oplock_break);
321 if (!tcon->need_reconnect && !cifs_file->invalidHandle) {
325 rc = CIFSSMBClose(xid, tcon, cifs_file->netfid);
329 /* Delete any outstanding lock records. We'll lose them when the file
332 mutex_lock(&cifsi->lock_mutex);
333 list_for_each_entry_safe(li, tmp, &cifsi->llist, llist) {
334 if (li->netfid != cifs_file->netfid)
336 list_del(&li->llist);
339 mutex_unlock(&cifsi->lock_mutex);
341 cifs_put_tlink(cifs_file->tlink);
342 dput(cifs_file->dentry);
346 int cifs_open(struct inode *inode, struct file *file)
351 struct cifs_sb_info *cifs_sb;
352 struct cifs_tcon *tcon;
353 struct tcon_link *tlink;
354 struct cifsFileInfo *pCifsFile = NULL;
355 char *full_path = NULL;
356 bool posix_open_ok = false;
361 cifs_sb = CIFS_SB(inode->i_sb);
362 tlink = cifs_sb_tlink(cifs_sb);
365 return PTR_ERR(tlink);
367 tcon = tlink_tcon(tlink);
369 full_path = build_path_from_dentry(file->f_path.dentry);
370 if (full_path == NULL) {
375 cFYI(1, "inode = 0x%p file flags are 0x%x for %s",
376 inode, file->f_flags, full_path);
383 if (!tcon->broken_posix_open && tcon->unix_ext &&
384 (tcon->ses->capabilities & CAP_UNIX) &&
385 (CIFS_UNIX_POSIX_PATH_OPS_CAP &
386 le64_to_cpu(tcon->fsUnixInfo.Capability))) {
387 /* can not refresh inode info since size could be stale */
388 rc = cifs_posix_open(full_path, &inode, inode->i_sb,
389 cifs_sb->mnt_file_mode /* ignored */,
390 file->f_flags, &oplock, &netfid, xid);
392 cFYI(1, "posix open succeeded");
393 posix_open_ok = true;
394 } else if ((rc == -EINVAL) || (rc == -EOPNOTSUPP)) {
395 if (tcon->ses->serverNOS)
396 cERROR(1, "server %s of type %s returned"
397 " unexpected error on SMB posix open"
398 ", disabling posix open support."
399 " Check if server update available.",
400 tcon->ses->serverName,
401 tcon->ses->serverNOS);
402 tcon->broken_posix_open = true;
403 } else if ((rc != -EIO) && (rc != -EREMOTE) &&
404 (rc != -EOPNOTSUPP)) /* path not found or net err */
406 /* else fallthrough to retry open the old way on network i/o
410 if (!posix_open_ok) {
411 rc = cifs_nt_open(full_path, inode, cifs_sb, tcon,
412 file->f_flags, &oplock, &netfid, xid);
417 pCifsFile = cifs_new_fileinfo(netfid, file, tlink, oplock);
418 if (pCifsFile == NULL) {
419 CIFSSMBClose(xid, tcon, netfid);
424 cifs_fscache_set_inode_cookie(inode, file);
426 if ((oplock & CIFS_CREATE_ACTION) && !posix_open_ok && tcon->unix_ext) {
427 /* time to set mode which we can not set earlier due to
428 problems creating new read-only files */
429 struct cifs_unix_set_info_args args = {
430 .mode = inode->i_mode,
433 .ctime = NO_CHANGE_64,
434 .atime = NO_CHANGE_64,
435 .mtime = NO_CHANGE_64,
438 CIFSSMBUnixSetFileInfo(xid, tcon, &args, netfid,
445 cifs_put_tlink(tlink);
449 /* Try to reacquire byte range locks that were released when session */
450 /* to server was lost */
451 static int cifs_relock_file(struct cifsFileInfo *cifsFile)
455 /* BB list all locks open on this file and relock */
460 static int cifs_reopen_file(struct cifsFileInfo *pCifsFile, bool can_flush)
465 struct cifs_sb_info *cifs_sb;
466 struct cifs_tcon *tcon;
467 struct cifsInodeInfo *pCifsInode;
469 char *full_path = NULL;
471 int disposition = FILE_OPEN;
472 int create_options = CREATE_NOT_DIR;
476 mutex_lock(&pCifsFile->fh_mutex);
477 if (!pCifsFile->invalidHandle) {
478 mutex_unlock(&pCifsFile->fh_mutex);
484 inode = pCifsFile->dentry->d_inode;
485 cifs_sb = CIFS_SB(inode->i_sb);
486 tcon = tlink_tcon(pCifsFile->tlink);
488 /* can not grab rename sem here because various ops, including
489 those that already have the rename sem can end up causing writepage
490 to get called and if the server was down that means we end up here,
491 and we can never tell if the caller already has the rename_sem */
492 full_path = build_path_from_dentry(pCifsFile->dentry);
493 if (full_path == NULL) {
495 mutex_unlock(&pCifsFile->fh_mutex);
500 cFYI(1, "inode = 0x%p file flags 0x%x for %s",
501 inode, pCifsFile->f_flags, full_path);
508 if (tcon->unix_ext && (tcon->ses->capabilities & CAP_UNIX) &&
509 (CIFS_UNIX_POSIX_PATH_OPS_CAP &
510 le64_to_cpu(tcon->fsUnixInfo.Capability))) {
513 * O_CREAT, O_EXCL and O_TRUNC already had their effect on the
514 * original open. Must mask them off for a reopen.
516 unsigned int oflags = pCifsFile->f_flags &
517 ~(O_CREAT | O_EXCL | O_TRUNC);
519 rc = cifs_posix_open(full_path, NULL, inode->i_sb,
520 cifs_sb->mnt_file_mode /* ignored */,
521 oflags, &oplock, &netfid, xid);
523 cFYI(1, "posix reopen succeeded");
526 /* fallthrough to retry open the old way on errors, especially
527 in the reconnect path it is important to retry hard */
530 desiredAccess = cifs_convert_flags(pCifsFile->f_flags);
532 if (backup_cred(cifs_sb))
533 create_options |= CREATE_OPEN_BACKUP_INTENT;
535 /* Can not refresh inode by passing in file_info buf to be returned
536 by SMBOpen and then calling get_inode_info with returned buf
537 since file might have write behind data that needs to be flushed
538 and server version of file size can be stale. If we knew for sure
539 that inode was not dirty locally we could do this */
541 rc = CIFSSMBOpen(xid, tcon, full_path, disposition, desiredAccess,
542 create_options, &netfid, &oplock, NULL,
543 cifs_sb->local_nls, cifs_sb->mnt_cifs_flags &
544 CIFS_MOUNT_MAP_SPECIAL_CHR);
546 mutex_unlock(&pCifsFile->fh_mutex);
547 cFYI(1, "cifs_open returned 0x%x", rc);
548 cFYI(1, "oplock: %d", oplock);
549 goto reopen_error_exit;
553 pCifsFile->netfid = netfid;
554 pCifsFile->invalidHandle = false;
555 mutex_unlock(&pCifsFile->fh_mutex);
556 pCifsInode = CIFS_I(inode);
559 rc = filemap_write_and_wait(inode->i_mapping);
560 mapping_set_error(inode->i_mapping, rc);
563 rc = cifs_get_inode_info_unix(&inode,
564 full_path, inode->i_sb, xid);
566 rc = cifs_get_inode_info(&inode,
567 full_path, NULL, inode->i_sb,
569 } /* else we are writing out data to server already
570 and could deadlock if we tried to flush data, and
571 since we do not know if we have data that would
572 invalidate the current end of file on the server
573 we can not go to the server to get the new inod
576 cifs_set_oplock_level(pCifsInode, oplock);
578 cifs_relock_file(pCifsFile);
586 int cifs_close(struct inode *inode, struct file *file)
588 if (file->private_data != NULL) {
589 cifsFileInfo_put(file->private_data);
590 file->private_data = NULL;
593 /* return code from the ->release op is always ignored */
597 int cifs_closedir(struct inode *inode, struct file *file)
601 struct cifsFileInfo *pCFileStruct = file->private_data;
604 cFYI(1, "Closedir inode = 0x%p", inode);
609 struct cifs_tcon *pTcon = tlink_tcon(pCFileStruct->tlink);
611 cFYI(1, "Freeing private data in close dir");
612 spin_lock(&cifs_file_list_lock);
613 if (!pCFileStruct->srch_inf.endOfSearch &&
614 !pCFileStruct->invalidHandle) {
615 pCFileStruct->invalidHandle = true;
616 spin_unlock(&cifs_file_list_lock);
617 rc = CIFSFindClose(xid, pTcon, pCFileStruct->netfid);
618 cFYI(1, "Closing uncompleted readdir with rc %d",
620 /* not much we can do if it fails anyway, ignore rc */
623 spin_unlock(&cifs_file_list_lock);
624 ptmp = pCFileStruct->srch_inf.ntwrk_buf_start;
626 cFYI(1, "closedir free smb buf in srch struct");
627 pCFileStruct->srch_inf.ntwrk_buf_start = NULL;
628 if (pCFileStruct->srch_inf.smallBuf)
629 cifs_small_buf_release(ptmp);
631 cifs_buf_release(ptmp);
633 cifs_put_tlink(pCFileStruct->tlink);
634 kfree(file->private_data);
635 file->private_data = NULL;
637 /* BB can we lock the filestruct while this is going on? */
642 static int store_file_lock(struct cifsInodeInfo *cinode, __u64 len,
643 __u64 offset, __u8 type, __u16 netfid)
645 struct cifsLockInfo *li =
646 kmalloc(sizeof(struct cifsLockInfo), GFP_KERNEL);
653 li->pid = current->tgid;
654 mutex_lock(&cinode->lock_mutex);
655 list_add_tail(&li->llist, &cinode->llist);
656 mutex_unlock(&cinode->lock_mutex);
661 cifs_read_flock(struct file_lock *flock, __u8 *type, int *lock, int *unlock,
664 if (flock->fl_flags & FL_POSIX)
666 if (flock->fl_flags & FL_FLOCK)
668 if (flock->fl_flags & FL_SLEEP) {
669 cFYI(1, "Blocking lock");
672 if (flock->fl_flags & FL_ACCESS)
673 cFYI(1, "Process suspended by mandatory locking - "
674 "not implemented yet");
675 if (flock->fl_flags & FL_LEASE)
676 cFYI(1, "Lease on file - not implemented yet");
677 if (flock->fl_flags &
678 (~(FL_POSIX | FL_FLOCK | FL_SLEEP | FL_ACCESS | FL_LEASE)))
679 cFYI(1, "Unknown lock flags 0x%x", flock->fl_flags);
681 *type = LOCKING_ANDX_LARGE_FILES;
682 if (flock->fl_type == F_WRLCK) {
685 } else if (flock->fl_type == F_UNLCK) {
688 /* Check if unlock includes more than one lock range */
689 } else if (flock->fl_type == F_RDLCK) {
691 *type |= LOCKING_ANDX_SHARED_LOCK;
693 } else if (flock->fl_type == F_EXLCK) {
696 } else if (flock->fl_type == F_SHLCK) {
698 *type |= LOCKING_ANDX_SHARED_LOCK;
701 cFYI(1, "Unknown type of lock");
705 cifs_getlk(struct cifsFileInfo *cfile, struct file_lock *flock, __u8 type,
706 bool wait_flag, bool posix_lck, int xid)
709 __u64 length = 1 + flock->fl_end - flock->fl_start;
710 __u16 netfid = cfile->netfid;
711 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
715 if (type & LOCKING_ANDX_SHARED_LOCK)
716 posix_lock_type = CIFS_RDLCK;
718 posix_lock_type = CIFS_WRLCK;
719 rc = CIFSSMBPosixLock(xid, tcon, netfid, 1 /* get */,
720 length, flock, posix_lock_type,
725 /* BB we could chain these into one lock request BB */
726 rc = CIFSSMBLock(xid, tcon, netfid, current->tgid, length,
727 flock->fl_start, 0, 1, type, 0, 0);
729 rc = CIFSSMBLock(xid, tcon, netfid, current->tgid,
730 length, flock->fl_start, 1, 0,
732 flock->fl_type = F_UNLCK;
734 cERROR(1, "Error unlocking previously locked "
735 "range %d during test of lock", rc);
740 if (type & LOCKING_ANDX_SHARED_LOCK) {
741 flock->fl_type = F_WRLCK;
746 rc = CIFSSMBLock(xid, tcon, netfid, current->tgid, length,
747 flock->fl_start, 0, 1,
748 type | LOCKING_ANDX_SHARED_LOCK, 0, 0);
750 rc = CIFSSMBLock(xid, tcon, netfid, current->tgid,
751 length, flock->fl_start, 1, 0,
752 type | LOCKING_ANDX_SHARED_LOCK,
754 flock->fl_type = F_RDLCK;
756 cERROR(1, "Error unlocking previously locked "
757 "range %d during test of lock", rc);
759 flock->fl_type = F_WRLCK;
766 cifs_setlk(struct file *file, struct file_lock *flock, __u8 type,
767 bool wait_flag, bool posix_lck, int lock, int unlock, int xid)
770 __u64 length = 1 + flock->fl_end - flock->fl_start;
771 struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
772 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
773 struct cifsInodeInfo *cinode = CIFS_I(file->f_path.dentry->d_inode);
774 __u16 netfid = cfile->netfid;
778 if (type & LOCKING_ANDX_SHARED_LOCK)
779 posix_lock_type = CIFS_RDLCK;
781 posix_lock_type = CIFS_WRLCK;
784 posix_lock_type = CIFS_UNLCK;
786 rc = CIFSSMBPosixLock(xid, tcon, netfid, 0 /* set */, length,
787 flock, posix_lock_type, wait_flag);
792 rc = CIFSSMBLock(xid, tcon, netfid, current->tgid, length,
793 flock->fl_start, 0, lock, type, wait_flag, 0);
795 /* For Windows locks we must store them. */
796 rc = store_file_lock(cinode, length, flock->fl_start,
801 * For each stored lock that this unlock overlaps completely,
805 struct cifsLockInfo *li, *tmp;
807 mutex_lock(&cinode->lock_mutex);
808 list_for_each_entry_safe(li, tmp, &cinode->llist, llist) {
809 if (flock->fl_start > li->offset ||
810 (flock->fl_start + length) <
811 (li->offset + li->length))
813 if (current->tgid != li->pid)
815 if (cfile->netfid != li->netfid)
818 stored_rc = CIFSSMBLock(xid, tcon, netfid,
819 current->tgid, li->length,
820 li->offset, 1, 0, li->type,
825 list_del(&li->llist);
829 mutex_unlock(&cinode->lock_mutex);
832 if (flock->fl_flags & FL_POSIX)
833 posix_lock_file_wait(file, flock);
837 int cifs_lock(struct file *file, int cmd, struct file_lock *flock)
840 int lock = 0, unlock = 0;
841 bool wait_flag = false;
842 bool posix_lck = false;
843 struct cifs_sb_info *cifs_sb;
844 struct cifs_tcon *tcon;
845 struct cifsInodeInfo *cinode;
846 struct cifsFileInfo *cfile;
853 cFYI(1, "Lock parm: 0x%x flockflags: 0x%x flocktype: 0x%x start: %lld "
854 "end: %lld", cmd, flock->fl_flags, flock->fl_type,
855 flock->fl_start, flock->fl_end);
857 cifs_read_flock(flock, &type, &lock, &unlock, &wait_flag);
859 cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
860 cfile = (struct cifsFileInfo *)file->private_data;
861 tcon = tlink_tcon(cfile->tlink);
862 netfid = cfile->netfid;
863 cinode = CIFS_I(file->f_path.dentry->d_inode);
865 if ((tcon->ses->capabilities & CAP_UNIX) &&
866 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
867 ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
870 * BB add code here to normalize offset and length to account for
871 * negative length which we can not accept over the wire.
874 rc = cifs_getlk(cfile, flock, type, wait_flag, posix_lck, xid);
879 if (!lock && !unlock) {
881 * if no lock or unlock then nothing to do since we do not
888 rc = cifs_setlk(file, flock, type, wait_flag, posix_lck, lock, unlock,
894 /* update the file size (if needed) after a write */
896 cifs_update_eof(struct cifsInodeInfo *cifsi, loff_t offset,
897 unsigned int bytes_written)
899 loff_t end_of_write = offset + bytes_written;
901 if (end_of_write > cifsi->server_eof)
902 cifsi->server_eof = end_of_write;
905 static ssize_t cifs_write(struct cifsFileInfo *open_file, __u32 pid,
906 const char *write_data, size_t write_size,
910 unsigned int bytes_written = 0;
911 unsigned int total_written;
912 struct cifs_sb_info *cifs_sb;
913 struct cifs_tcon *pTcon;
915 struct dentry *dentry = open_file->dentry;
916 struct cifsInodeInfo *cifsi = CIFS_I(dentry->d_inode);
917 struct cifs_io_parms io_parms;
919 cifs_sb = CIFS_SB(dentry->d_sb);
921 cFYI(1, "write %zd bytes to offset %lld of %s", write_size,
922 *poffset, dentry->d_name.name);
924 pTcon = tlink_tcon(open_file->tlink);
928 for (total_written = 0; write_size > total_written;
929 total_written += bytes_written) {
931 while (rc == -EAGAIN) {
935 if (open_file->invalidHandle) {
936 /* we could deadlock if we called
937 filemap_fdatawait from here so tell
938 reopen_file not to flush data to
940 rc = cifs_reopen_file(open_file, false);
945 len = min((size_t)cifs_sb->wsize,
946 write_size - total_written);
947 /* iov[0] is reserved for smb header */
948 iov[1].iov_base = (char *)write_data + total_written;
949 iov[1].iov_len = len;
950 io_parms.netfid = open_file->netfid;
952 io_parms.tcon = pTcon;
953 io_parms.offset = *poffset;
954 io_parms.length = len;
955 rc = CIFSSMBWrite2(xid, &io_parms, &bytes_written, iov,
958 if (rc || (bytes_written == 0)) {
966 cifs_update_eof(cifsi, *poffset, bytes_written);
967 *poffset += bytes_written;
971 cifs_stats_bytes_written(pTcon, total_written);
973 if (total_written > 0) {
974 spin_lock(&dentry->d_inode->i_lock);
975 if (*poffset > dentry->d_inode->i_size)
976 i_size_write(dentry->d_inode, *poffset);
977 spin_unlock(&dentry->d_inode->i_lock);
979 mark_inode_dirty_sync(dentry->d_inode);
981 return total_written;
984 struct cifsFileInfo *find_readable_file(struct cifsInodeInfo *cifs_inode,
987 struct cifsFileInfo *open_file = NULL;
988 struct cifs_sb_info *cifs_sb = CIFS_SB(cifs_inode->vfs_inode.i_sb);
990 /* only filter by fsuid on multiuser mounts */
991 if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
994 spin_lock(&cifs_file_list_lock);
995 /* we could simply get the first_list_entry since write-only entries
996 are always at the end of the list but since the first entry might
997 have a close pending, we go through the whole list */
998 list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
999 if (fsuid_only && open_file->uid != current_fsuid())
1001 if (OPEN_FMODE(open_file->f_flags) & FMODE_READ) {
1002 if (!open_file->invalidHandle) {
1003 /* found a good file */
1004 /* lock it so it will not be closed on us */
1005 cifsFileInfo_get(open_file);
1006 spin_unlock(&cifs_file_list_lock);
1008 } /* else might as well continue, and look for
1009 another, or simply have the caller reopen it
1010 again rather than trying to fix this handle */
1011 } else /* write only file */
1012 break; /* write only files are last so must be done */
1014 spin_unlock(&cifs_file_list_lock);
1018 struct cifsFileInfo *find_writable_file(struct cifsInodeInfo *cifs_inode,
1021 struct cifsFileInfo *open_file;
1022 struct cifs_sb_info *cifs_sb;
1023 bool any_available = false;
1026 /* Having a null inode here (because mapping->host was set to zero by
1027 the VFS or MM) should not happen but we had reports of on oops (due to
1028 it being zero) during stress testcases so we need to check for it */
1030 if (cifs_inode == NULL) {
1031 cERROR(1, "Null inode passed to cifs_writeable_file");
1036 cifs_sb = CIFS_SB(cifs_inode->vfs_inode.i_sb);
1038 /* only filter by fsuid on multiuser mounts */
1039 if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
1042 spin_lock(&cifs_file_list_lock);
1044 list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
1045 if (!any_available && open_file->pid != current->tgid)
1047 if (fsuid_only && open_file->uid != current_fsuid())
1049 if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) {
1050 cifsFileInfo_get(open_file);
1052 if (!open_file->invalidHandle) {
1053 /* found a good writable file */
1054 spin_unlock(&cifs_file_list_lock);
1058 spin_unlock(&cifs_file_list_lock);
1060 /* Had to unlock since following call can block */
1061 rc = cifs_reopen_file(open_file, false);
1065 /* if it fails, try another handle if possible */
1066 cFYI(1, "wp failed on reopen file");
1067 cifsFileInfo_put(open_file);
1069 spin_lock(&cifs_file_list_lock);
1071 /* else we simply continue to the next entry. Thus
1072 we do not loop on reopen errors. If we
1073 can not reopen the file, for example if we
1074 reconnected to a server with another client
1075 racing to delete or lock the file we would not
1076 make progress if we restarted before the beginning
1077 of the loop here. */
1080 /* couldn't find useable FH with same pid, try any available */
1081 if (!any_available) {
1082 any_available = true;
1083 goto refind_writable;
1085 spin_unlock(&cifs_file_list_lock);
1089 static int cifs_partialpagewrite(struct page *page, unsigned from, unsigned to)
1091 struct address_space *mapping = page->mapping;
1092 loff_t offset = (loff_t)page->index << PAGE_CACHE_SHIFT;
1095 int bytes_written = 0;
1096 struct inode *inode;
1097 struct cifsFileInfo *open_file;
1099 if (!mapping || !mapping->host)
1102 inode = page->mapping->host;
1104 offset += (loff_t)from;
1105 write_data = kmap(page);
1108 if ((to > PAGE_CACHE_SIZE) || (from > to)) {
1113 /* racing with truncate? */
1114 if (offset > mapping->host->i_size) {
1116 return 0; /* don't care */
1119 /* check to make sure that we are not extending the file */
1120 if (mapping->host->i_size - offset < (loff_t)to)
1121 to = (unsigned)(mapping->host->i_size - offset);
1123 open_file = find_writable_file(CIFS_I(mapping->host), false);
1125 bytes_written = cifs_write(open_file, open_file->pid,
1126 write_data, to - from, &offset);
1127 cifsFileInfo_put(open_file);
1128 /* Does mm or vfs already set times? */
1129 inode->i_atime = inode->i_mtime = current_fs_time(inode->i_sb);
1130 if ((bytes_written > 0) && (offset))
1132 else if (bytes_written < 0)
1135 cFYI(1, "No writeable filehandles for inode");
1143 static int cifs_writepages(struct address_space *mapping,
1144 struct writeback_control *wbc)
1146 struct cifs_sb_info *cifs_sb = CIFS_SB(mapping->host->i_sb);
1147 bool done = false, scanned = false, range_whole = false;
1149 struct cifs_writedata *wdata;
1154 * If wsize is smaller than the page cache size, default to writing
1155 * one page at a time via cifs_writepage
1157 if (cifs_sb->wsize < PAGE_CACHE_SIZE)
1158 return generic_writepages(mapping, wbc);
1160 if (wbc->range_cyclic) {
1161 index = mapping->writeback_index; /* Start from prev offset */
1164 index = wbc->range_start >> PAGE_CACHE_SHIFT;
1165 end = wbc->range_end >> PAGE_CACHE_SHIFT;
1166 if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
1171 while (!done && index <= end) {
1172 unsigned int i, nr_pages, found_pages;
1173 pgoff_t next = 0, tofind;
1174 struct page **pages;
1176 tofind = min((cifs_sb->wsize / PAGE_CACHE_SIZE) - 1,
1179 wdata = cifs_writedata_alloc((unsigned int)tofind);
1186 * find_get_pages_tag seems to return a max of 256 on each
1187 * iteration, so we must call it several times in order to
1188 * fill the array or the wsize is effectively limited to
1189 * 256 * PAGE_CACHE_SIZE.
1192 pages = wdata->pages;
1194 nr_pages = find_get_pages_tag(mapping, &index,
1195 PAGECACHE_TAG_DIRTY,
1197 found_pages += nr_pages;
1200 } while (nr_pages && tofind && index <= end);
1202 if (found_pages == 0) {
1203 kref_put(&wdata->refcount, cifs_writedata_release);
1208 for (i = 0; i < found_pages; i++) {
1209 page = wdata->pages[i];
1211 * At this point we hold neither mapping->tree_lock nor
1212 * lock on the page itself: the page may be truncated or
1213 * invalidated (changing page->mapping to NULL), or even
1214 * swizzled back from swapper_space to tmpfs file
1220 else if (!trylock_page(page))
1223 if (unlikely(page->mapping != mapping)) {
1228 if (!wbc->range_cyclic && page->index > end) {
1234 if (next && (page->index != next)) {
1235 /* Not next consecutive page */
1240 if (wbc->sync_mode != WB_SYNC_NONE)
1241 wait_on_page_writeback(page);
1243 if (PageWriteback(page) ||
1244 !clear_page_dirty_for_io(page)) {
1250 * This actually clears the dirty bit in the radix tree.
1251 * See cifs_writepage() for more commentary.
1253 set_page_writeback(page);
1255 if (page_offset(page) >= mapping->host->i_size) {
1258 end_page_writeback(page);
1262 wdata->pages[i] = page;
1263 next = page->index + 1;
1267 /* reset index to refind any pages skipped */
1269 index = wdata->pages[0]->index + 1;
1271 /* put any pages we aren't going to use */
1272 for (i = nr_pages; i < found_pages; i++) {
1273 page_cache_release(wdata->pages[i]);
1274 wdata->pages[i] = NULL;
1277 /* nothing to write? */
1278 if (nr_pages == 0) {
1279 kref_put(&wdata->refcount, cifs_writedata_release);
1283 wdata->sync_mode = wbc->sync_mode;
1284 wdata->nr_pages = nr_pages;
1285 wdata->offset = page_offset(wdata->pages[0]);
1288 if (wdata->cfile != NULL)
1289 cifsFileInfo_put(wdata->cfile);
1290 wdata->cfile = find_writable_file(CIFS_I(mapping->host),
1292 if (!wdata->cfile) {
1293 cERROR(1, "No writable handles for inode");
1297 rc = cifs_async_writev(wdata);
1298 } while (wbc->sync_mode == WB_SYNC_ALL && rc == -EAGAIN);
1300 for (i = 0; i < nr_pages; ++i)
1301 unlock_page(wdata->pages[i]);
1303 /* send failure -- clean up the mess */
1305 for (i = 0; i < nr_pages; ++i) {
1307 redirty_page_for_writepage(wbc,
1310 SetPageError(wdata->pages[i]);
1311 end_page_writeback(wdata->pages[i]);
1312 page_cache_release(wdata->pages[i]);
1315 mapping_set_error(mapping, rc);
1317 kref_put(&wdata->refcount, cifs_writedata_release);
1319 wbc->nr_to_write -= nr_pages;
1320 if (wbc->nr_to_write <= 0)
1326 if (!scanned && !done) {
1328 * We hit the last page and there is more work to be done: wrap
1329 * back to the start of the file
1336 if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0))
1337 mapping->writeback_index = index;
1343 cifs_writepage_locked(struct page *page, struct writeback_control *wbc)
1349 /* BB add check for wbc flags */
1350 page_cache_get(page);
1351 if (!PageUptodate(page))
1352 cFYI(1, "ppw - page not up to date");
1355 * Set the "writeback" flag, and clear "dirty" in the radix tree.
1357 * A writepage() implementation always needs to do either this,
1358 * or re-dirty the page with "redirty_page_for_writepage()" in
1359 * the case of a failure.
1361 * Just unlocking the page will cause the radix tree tag-bits
1362 * to fail to update with the state of the page correctly.
1364 set_page_writeback(page);
1366 rc = cifs_partialpagewrite(page, 0, PAGE_CACHE_SIZE);
1367 if (rc == -EAGAIN && wbc->sync_mode == WB_SYNC_ALL)
1369 else if (rc == -EAGAIN)
1370 redirty_page_for_writepage(wbc, page);
1374 SetPageUptodate(page);
1375 end_page_writeback(page);
1376 page_cache_release(page);
1381 static int cifs_writepage(struct page *page, struct writeback_control *wbc)
1383 int rc = cifs_writepage_locked(page, wbc);
1388 static int cifs_write_end(struct file *file, struct address_space *mapping,
1389 loff_t pos, unsigned len, unsigned copied,
1390 struct page *page, void *fsdata)
1393 struct inode *inode = mapping->host;
1394 struct cifsFileInfo *cfile = file->private_data;
1395 struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
1398 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
1401 pid = current->tgid;
1403 cFYI(1, "write_end for page %p from pos %lld with %d bytes",
1406 if (PageChecked(page)) {
1408 SetPageUptodate(page);
1409 ClearPageChecked(page);
1410 } else if (!PageUptodate(page) && copied == PAGE_CACHE_SIZE)
1411 SetPageUptodate(page);
1413 if (!PageUptodate(page)) {
1415 unsigned offset = pos & (PAGE_CACHE_SIZE - 1);
1419 /* this is probably better than directly calling
1420 partialpage_write since in this function the file handle is
1421 known which we might as well leverage */
1422 /* BB check if anything else missing out of ppw
1423 such as updating last write time */
1424 page_data = kmap(page);
1425 rc = cifs_write(cfile, pid, page_data + offset, copied, &pos);
1426 /* if (rc < 0) should we set writebehind rc? */
1433 set_page_dirty(page);
1437 spin_lock(&inode->i_lock);
1438 if (pos > inode->i_size)
1439 i_size_write(inode, pos);
1440 spin_unlock(&inode->i_lock);
1444 page_cache_release(page);
1449 int cifs_strict_fsync(struct file *file, loff_t start, loff_t end,
1454 struct cifs_tcon *tcon;
1455 struct cifsFileInfo *smbfile = file->private_data;
1456 struct inode *inode = file->f_path.dentry->d_inode;
1457 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
1459 rc = filemap_write_and_wait_range(inode->i_mapping, start, end);
1462 mutex_lock(&inode->i_mutex);
1466 cFYI(1, "Sync file - name: %s datasync: 0x%x",
1467 file->f_path.dentry->d_name.name, datasync);
1469 if (!CIFS_I(inode)->clientCanCacheRead) {
1470 rc = cifs_invalidate_mapping(inode);
1472 cFYI(1, "rc: %d during invalidate phase", rc);
1473 rc = 0; /* don't care about it in fsync */
1477 tcon = tlink_tcon(smbfile->tlink);
1478 if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC))
1479 rc = CIFSSMBFlush(xid, tcon, smbfile->netfid);
1482 mutex_unlock(&inode->i_mutex);
1486 int cifs_fsync(struct file *file, loff_t start, loff_t end, int datasync)
1490 struct cifs_tcon *tcon;
1491 struct cifsFileInfo *smbfile = file->private_data;
1492 struct cifs_sb_info *cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
1493 struct inode *inode = file->f_mapping->host;
1495 rc = filemap_write_and_wait_range(inode->i_mapping, start, end);
1498 mutex_lock(&inode->i_mutex);
1502 cFYI(1, "Sync file - name: %s datasync: 0x%x",
1503 file->f_path.dentry->d_name.name, datasync);
1505 tcon = tlink_tcon(smbfile->tlink);
1506 if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC))
1507 rc = CIFSSMBFlush(xid, tcon, smbfile->netfid);
1510 mutex_unlock(&inode->i_mutex);
1515 * As file closes, flush all cached write data for this inode checking
1516 * for write behind errors.
1518 int cifs_flush(struct file *file, fl_owner_t id)
1520 struct inode *inode = file->f_path.dentry->d_inode;
1523 if (file->f_mode & FMODE_WRITE)
1524 rc = filemap_write_and_wait(inode->i_mapping);
1526 cFYI(1, "Flush inode %p file %p rc %d", inode, file, rc);
1532 cifs_write_allocate_pages(struct page **pages, unsigned long num_pages)
1537 for (i = 0; i < num_pages; i++) {
1538 pages[i] = alloc_page(__GFP_HIGHMEM);
1541 * save number of pages we have already allocated and
1542 * return with ENOMEM error
1553 for (i = 0; i < num_pages; i++)
1559 size_t get_numpages(const size_t wsize, const size_t len, size_t *cur_len)
1564 clen = min_t(const size_t, len, wsize);
1565 num_pages = clen / PAGE_CACHE_SIZE;
1566 if (clen % PAGE_CACHE_SIZE)
1576 cifs_iovec_write(struct file *file, const struct iovec *iov,
1577 unsigned long nr_segs, loff_t *poffset)
1579 unsigned int written;
1580 unsigned long num_pages, npages, i;
1581 size_t copied, len, cur_len;
1582 ssize_t total_written = 0;
1583 struct kvec *to_send;
1584 struct page **pages;
1586 struct inode *inode;
1587 struct cifsFileInfo *open_file;
1588 struct cifs_tcon *pTcon;
1589 struct cifs_sb_info *cifs_sb;
1590 struct cifs_io_parms io_parms;
1594 len = iov_length(iov, nr_segs);
1598 rc = generic_write_checks(file, poffset, &len, 0);
1602 cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
1603 num_pages = get_numpages(cifs_sb->wsize, len, &cur_len);
1605 pages = kmalloc(sizeof(struct pages *)*num_pages, GFP_KERNEL);
1609 to_send = kmalloc(sizeof(struct kvec)*(num_pages + 1), GFP_KERNEL);
1615 rc = cifs_write_allocate_pages(pages, num_pages);
1623 open_file = file->private_data;
1625 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
1626 pid = open_file->pid;
1628 pid = current->tgid;
1630 pTcon = tlink_tcon(open_file->tlink);
1631 inode = file->f_path.dentry->d_inode;
1633 iov_iter_init(&it, iov, nr_segs, len, 0);
1637 size_t save_len = cur_len;
1638 for (i = 0; i < npages; i++) {
1639 copied = min_t(const size_t, cur_len, PAGE_CACHE_SIZE);
1640 copied = iov_iter_copy_from_user(pages[i], &it, 0,
1643 iov_iter_advance(&it, copied);
1644 to_send[i+1].iov_base = kmap(pages[i]);
1645 to_send[i+1].iov_len = copied;
1648 cur_len = save_len - cur_len;
1651 if (open_file->invalidHandle) {
1652 rc = cifs_reopen_file(open_file, false);
1656 io_parms.netfid = open_file->netfid;
1658 io_parms.tcon = pTcon;
1659 io_parms.offset = *poffset;
1660 io_parms.length = cur_len;
1661 rc = CIFSSMBWrite2(xid, &io_parms, &written, to_send,
1663 } while (rc == -EAGAIN);
1665 for (i = 0; i < npages; i++)
1670 total_written += written;
1671 cifs_update_eof(CIFS_I(inode), *poffset, written);
1672 *poffset += written;
1673 } else if (rc < 0) {
1679 /* get length and number of kvecs of the next write */
1680 npages = get_numpages(cifs_sb->wsize, len, &cur_len);
1683 if (total_written > 0) {
1684 spin_lock(&inode->i_lock);
1685 if (*poffset > inode->i_size)
1686 i_size_write(inode, *poffset);
1687 spin_unlock(&inode->i_lock);
1690 cifs_stats_bytes_written(pTcon, total_written);
1691 mark_inode_dirty_sync(inode);
1693 for (i = 0; i < num_pages; i++)
1698 return total_written;
1701 ssize_t cifs_user_writev(struct kiocb *iocb, const struct iovec *iov,
1702 unsigned long nr_segs, loff_t pos)
1705 struct inode *inode;
1707 inode = iocb->ki_filp->f_path.dentry->d_inode;
1710 * BB - optimize the way when signing is disabled. We can drop this
1711 * extra memory-to-memory copying and use iovec buffers for constructing
1715 written = cifs_iovec_write(iocb->ki_filp, iov, nr_segs, &pos);
1717 CIFS_I(inode)->invalid_mapping = true;
1724 ssize_t cifs_strict_writev(struct kiocb *iocb, const struct iovec *iov,
1725 unsigned long nr_segs, loff_t pos)
1727 struct inode *inode;
1729 inode = iocb->ki_filp->f_path.dentry->d_inode;
1731 if (CIFS_I(inode)->clientCanCacheAll)
1732 return generic_file_aio_write(iocb, iov, nr_segs, pos);
1735 * In strict cache mode we need to write the data to the server exactly
1736 * from the pos to pos+len-1 rather than flush all affected pages
1737 * because it may cause a error with mandatory locks on these pages but
1738 * not on the region from pos to ppos+len-1.
1741 return cifs_user_writev(iocb, iov, nr_segs, pos);
1745 cifs_iovec_read(struct file *file, const struct iovec *iov,
1746 unsigned long nr_segs, loff_t *poffset)
1751 unsigned int bytes_read = 0;
1752 size_t len, cur_len;
1754 struct cifs_sb_info *cifs_sb;
1755 struct cifs_tcon *pTcon;
1756 struct cifsFileInfo *open_file;
1757 struct smb_com_read_rsp *pSMBr;
1758 struct cifs_io_parms io_parms;
1765 len = iov_length(iov, nr_segs);
1770 cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
1772 open_file = file->private_data;
1773 pTcon = tlink_tcon(open_file->tlink);
1775 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
1776 pid = open_file->pid;
1778 pid = current->tgid;
1780 if ((file->f_flags & O_ACCMODE) == O_WRONLY)
1781 cFYI(1, "attempting read on write only file instance");
1783 for (total_read = 0; total_read < len; total_read += bytes_read) {
1784 cur_len = min_t(const size_t, len - total_read, cifs_sb->rsize);
1788 while (rc == -EAGAIN) {
1789 int buf_type = CIFS_NO_BUFFER;
1790 if (open_file->invalidHandle) {
1791 rc = cifs_reopen_file(open_file, true);
1795 io_parms.netfid = open_file->netfid;
1797 io_parms.tcon = pTcon;
1798 io_parms.offset = *poffset;
1799 io_parms.length = cur_len;
1800 rc = CIFSSMBRead(xid, &io_parms, &bytes_read,
1801 &read_data, &buf_type);
1802 pSMBr = (struct smb_com_read_rsp *)read_data;
1804 char *data_offset = read_data + 4 +
1805 le16_to_cpu(pSMBr->DataOffset);
1806 if (memcpy_toiovecend(iov, data_offset,
1807 iov_offset, bytes_read))
1809 if (buf_type == CIFS_SMALL_BUFFER)
1810 cifs_small_buf_release(read_data);
1811 else if (buf_type == CIFS_LARGE_BUFFER)
1812 cifs_buf_release(read_data);
1814 iov_offset += bytes_read;
1818 if (rc || (bytes_read == 0)) {
1826 cifs_stats_bytes_read(pTcon, bytes_read);
1827 *poffset += bytes_read;
1835 ssize_t cifs_user_readv(struct kiocb *iocb, const struct iovec *iov,
1836 unsigned long nr_segs, loff_t pos)
1840 read = cifs_iovec_read(iocb->ki_filp, iov, nr_segs, &pos);
1847 ssize_t cifs_strict_readv(struct kiocb *iocb, const struct iovec *iov,
1848 unsigned long nr_segs, loff_t pos)
1850 struct inode *inode;
1852 inode = iocb->ki_filp->f_path.dentry->d_inode;
1854 if (CIFS_I(inode)->clientCanCacheRead)
1855 return generic_file_aio_read(iocb, iov, nr_segs, pos);
1858 * In strict cache mode we need to read from the server all the time
1859 * if we don't have level II oplock because the server can delay mtime
1860 * change - so we can't make a decision about inode invalidating.
1861 * And we can also fail with pagereading if there are mandatory locks
1862 * on pages affected by this read but not on the region from pos to
1866 return cifs_user_readv(iocb, iov, nr_segs, pos);
1869 static ssize_t cifs_read(struct file *file, char *read_data, size_t read_size,
1873 unsigned int bytes_read = 0;
1874 unsigned int total_read;
1875 unsigned int current_read_size;
1876 struct cifs_sb_info *cifs_sb;
1877 struct cifs_tcon *pTcon;
1879 char *current_offset;
1880 struct cifsFileInfo *open_file;
1881 struct cifs_io_parms io_parms;
1882 int buf_type = CIFS_NO_BUFFER;
1886 cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
1888 if (file->private_data == NULL) {
1893 open_file = file->private_data;
1894 pTcon = tlink_tcon(open_file->tlink);
1896 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
1897 pid = open_file->pid;
1899 pid = current->tgid;
1901 if ((file->f_flags & O_ACCMODE) == O_WRONLY)
1902 cFYI(1, "attempting read on write only file instance");
1904 for (total_read = 0, current_offset = read_data;
1905 read_size > total_read;
1906 total_read += bytes_read, current_offset += bytes_read) {
1907 current_read_size = min_t(uint, read_size - total_read,
1909 /* For windows me and 9x we do not want to request more
1910 than it negotiated since it will refuse the read then */
1912 !(pTcon->ses->capabilities & CAP_LARGE_FILES)) {
1913 current_read_size = min_t(uint, current_read_size,
1917 while (rc == -EAGAIN) {
1918 if (open_file->invalidHandle) {
1919 rc = cifs_reopen_file(open_file, true);
1923 io_parms.netfid = open_file->netfid;
1925 io_parms.tcon = pTcon;
1926 io_parms.offset = *poffset;
1927 io_parms.length = current_read_size;
1928 rc = CIFSSMBRead(xid, &io_parms, &bytes_read,
1929 ¤t_offset, &buf_type);
1931 if (rc || (bytes_read == 0)) {
1939 cifs_stats_bytes_read(pTcon, total_read);
1940 *poffset += bytes_read;
1948 * If the page is mmap'ed into a process' page tables, then we need to make
1949 * sure that it doesn't change while being written back.
1952 cifs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
1954 struct page *page = vmf->page;
1957 return VM_FAULT_LOCKED;
1960 static struct vm_operations_struct cifs_file_vm_ops = {
1961 .fault = filemap_fault,
1962 .page_mkwrite = cifs_page_mkwrite,
1965 int cifs_file_strict_mmap(struct file *file, struct vm_area_struct *vma)
1968 struct inode *inode = file->f_path.dentry->d_inode;
1972 if (!CIFS_I(inode)->clientCanCacheRead) {
1973 rc = cifs_invalidate_mapping(inode);
1978 rc = generic_file_mmap(file, vma);
1980 vma->vm_ops = &cifs_file_vm_ops;
1985 int cifs_file_mmap(struct file *file, struct vm_area_struct *vma)
1990 rc = cifs_revalidate_file(file);
1992 cFYI(1, "Validation prior to mmap failed, error=%d", rc);
1996 rc = generic_file_mmap(file, vma);
1998 vma->vm_ops = &cifs_file_vm_ops;
2004 static void cifs_copy_cache_pages(struct address_space *mapping,
2005 struct list_head *pages, int bytes_read, char *data)
2010 while (bytes_read > 0) {
2011 if (list_empty(pages))
2014 page = list_entry(pages->prev, struct page, lru);
2015 list_del(&page->lru);
2017 if (add_to_page_cache_lru(page, mapping, page->index,
2019 page_cache_release(page);
2020 cFYI(1, "Add page cache failed");
2021 data += PAGE_CACHE_SIZE;
2022 bytes_read -= PAGE_CACHE_SIZE;
2025 page_cache_release(page);
2027 target = kmap_atomic(page, KM_USER0);
2029 if (PAGE_CACHE_SIZE > bytes_read) {
2030 memcpy(target, data, bytes_read);
2031 /* zero the tail end of this partial page */
2032 memset(target + bytes_read, 0,
2033 PAGE_CACHE_SIZE - bytes_read);
2036 memcpy(target, data, PAGE_CACHE_SIZE);
2037 bytes_read -= PAGE_CACHE_SIZE;
2039 kunmap_atomic(target, KM_USER0);
2041 flush_dcache_page(page);
2042 SetPageUptodate(page);
2044 data += PAGE_CACHE_SIZE;
2046 /* add page to FS-Cache */
2047 cifs_readpage_to_fscache(mapping->host, page);
2052 static int cifs_readpages(struct file *file, struct address_space *mapping,
2053 struct list_head *page_list, unsigned num_pages)
2059 struct cifs_sb_info *cifs_sb;
2060 struct cifs_tcon *pTcon;
2061 unsigned int bytes_read = 0;
2062 unsigned int read_size, i;
2063 char *smb_read_data = NULL;
2064 struct smb_com_read_rsp *pSMBr;
2065 struct cifsFileInfo *open_file;
2066 struct cifs_io_parms io_parms;
2067 int buf_type = CIFS_NO_BUFFER;
2071 if (file->private_data == NULL) {
2076 open_file = file->private_data;
2077 cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
2078 pTcon = tlink_tcon(open_file->tlink);
2081 * Reads as many pages as possible from fscache. Returns -ENOBUFS
2082 * immediately if the cookie is negative
2084 rc = cifs_readpages_from_fscache(mapping->host, mapping, page_list,
2089 cFYI(DBG2, "rpages: num pages %d", num_pages);
2090 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
2091 pid = open_file->pid;
2093 pid = current->tgid;
2095 for (i = 0; i < num_pages; ) {
2096 unsigned contig_pages;
2097 struct page *tmp_page;
2098 unsigned long expected_index;
2100 if (list_empty(page_list))
2103 page = list_entry(page_list->prev, struct page, lru);
2104 offset = (loff_t)page->index << PAGE_CACHE_SHIFT;
2106 /* count adjacent pages that we will read into */
2109 list_entry(page_list->prev, struct page, lru)->index;
2110 list_for_each_entry_reverse(tmp_page, page_list, lru) {
2111 if (tmp_page->index == expected_index) {
2117 if (contig_pages + i > num_pages)
2118 contig_pages = num_pages - i;
2120 /* for reads over a certain size could initiate async
2123 read_size = contig_pages * PAGE_CACHE_SIZE;
2124 /* Read size needs to be in multiples of one page */
2125 read_size = min_t(const unsigned int, read_size,
2126 cifs_sb->rsize & PAGE_CACHE_MASK);
2127 cFYI(DBG2, "rpages: read size 0x%x contiguous pages %d",
2128 read_size, contig_pages);
2130 while (rc == -EAGAIN) {
2131 if (open_file->invalidHandle) {
2132 rc = cifs_reopen_file(open_file, true);
2136 io_parms.netfid = open_file->netfid;
2138 io_parms.tcon = pTcon;
2139 io_parms.offset = offset;
2140 io_parms.length = read_size;
2141 rc = CIFSSMBRead(xid, &io_parms, &bytes_read,
2142 &smb_read_data, &buf_type);
2143 /* BB more RC checks ? */
2144 if (rc == -EAGAIN) {
2145 if (smb_read_data) {
2146 if (buf_type == CIFS_SMALL_BUFFER)
2147 cifs_small_buf_release(smb_read_data);
2148 else if (buf_type == CIFS_LARGE_BUFFER)
2149 cifs_buf_release(smb_read_data);
2150 smb_read_data = NULL;
2154 if ((rc < 0) || (smb_read_data == NULL)) {
2155 cFYI(1, "Read error in readpages: %d", rc);
2157 } else if (bytes_read > 0) {
2158 task_io_account_read(bytes_read);
2159 pSMBr = (struct smb_com_read_rsp *)smb_read_data;
2160 cifs_copy_cache_pages(mapping, page_list, bytes_read,
2161 smb_read_data + 4 /* RFC1001 hdr */ +
2162 le16_to_cpu(pSMBr->DataOffset));
2164 i += bytes_read >> PAGE_CACHE_SHIFT;
2165 cifs_stats_bytes_read(pTcon, bytes_read);
2166 if ((bytes_read & PAGE_CACHE_MASK) != bytes_read) {
2167 i++; /* account for partial page */
2169 /* server copy of file can have smaller size
2171 /* BB do we need to verify this common case ?
2172 this case is ok - if we are at server EOF
2173 we will hit it on next read */
2178 cFYI(1, "No bytes read (%d) at offset %lld . "
2179 "Cleaning remaining pages from readahead list",
2180 bytes_read, offset);
2181 /* BB turn off caching and do new lookup on
2182 file size at server? */
2185 if (smb_read_data) {
2186 if (buf_type == CIFS_SMALL_BUFFER)
2187 cifs_small_buf_release(smb_read_data);
2188 else if (buf_type == CIFS_LARGE_BUFFER)
2189 cifs_buf_release(smb_read_data);
2190 smb_read_data = NULL;
2195 /* need to free smb_read_data buf before exit */
2196 if (smb_read_data) {
2197 if (buf_type == CIFS_SMALL_BUFFER)
2198 cifs_small_buf_release(smb_read_data);
2199 else if (buf_type == CIFS_LARGE_BUFFER)
2200 cifs_buf_release(smb_read_data);
2201 smb_read_data = NULL;
2209 static int cifs_readpage_worker(struct file *file, struct page *page,
2215 /* Is the page cached? */
2216 rc = cifs_readpage_from_fscache(file->f_path.dentry->d_inode, page);
2220 page_cache_get(page);
2221 read_data = kmap(page);
2222 /* for reads over a certain size could initiate async read ahead */
2224 rc = cifs_read(file, read_data, PAGE_CACHE_SIZE, poffset);
2229 cFYI(1, "Bytes read %d", rc);
2231 file->f_path.dentry->d_inode->i_atime =
2232 current_fs_time(file->f_path.dentry->d_inode->i_sb);
2234 if (PAGE_CACHE_SIZE > rc)
2235 memset(read_data + rc, 0, PAGE_CACHE_SIZE - rc);
2237 flush_dcache_page(page);
2238 SetPageUptodate(page);
2240 /* send this page to the cache */
2241 cifs_readpage_to_fscache(file->f_path.dentry->d_inode, page);
2247 page_cache_release(page);
2253 static int cifs_readpage(struct file *file, struct page *page)
2255 loff_t offset = (loff_t)page->index << PAGE_CACHE_SHIFT;
2261 if (file->private_data == NULL) {
2267 cFYI(1, "readpage %p at offset %d 0x%x\n",
2268 page, (int)offset, (int)offset);
2270 rc = cifs_readpage_worker(file, page, &offset);
2278 static int is_inode_writable(struct cifsInodeInfo *cifs_inode)
2280 struct cifsFileInfo *open_file;
2282 spin_lock(&cifs_file_list_lock);
2283 list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
2284 if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) {
2285 spin_unlock(&cifs_file_list_lock);
2289 spin_unlock(&cifs_file_list_lock);
2293 /* We do not want to update the file size from server for inodes
2294 open for write - to avoid races with writepage extending
2295 the file - in the future we could consider allowing
2296 refreshing the inode only on increases in the file size
2297 but this is tricky to do without racing with writebehind
2298 page caching in the current Linux kernel design */
2299 bool is_size_safe_to_change(struct cifsInodeInfo *cifsInode, __u64 end_of_file)
2304 if (is_inode_writable(cifsInode)) {
2305 /* This inode is open for write at least once */
2306 struct cifs_sb_info *cifs_sb;
2308 cifs_sb = CIFS_SB(cifsInode->vfs_inode.i_sb);
2309 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DIRECT_IO) {
2310 /* since no page cache to corrupt on directio
2311 we can change size safely */
2315 if (i_size_read(&cifsInode->vfs_inode) < end_of_file)
2323 static int cifs_write_begin(struct file *file, struct address_space *mapping,
2324 loff_t pos, unsigned len, unsigned flags,
2325 struct page **pagep, void **fsdata)
2327 pgoff_t index = pos >> PAGE_CACHE_SHIFT;
2328 loff_t offset = pos & (PAGE_CACHE_SIZE - 1);
2329 loff_t page_start = pos & PAGE_MASK;
2334 cFYI(1, "write_begin from %lld len %d", (long long)pos, len);
2336 page = grab_cache_page_write_begin(mapping, index, flags);
2342 if (PageUptodate(page))
2346 * If we write a full page it will be up to date, no need to read from
2347 * the server. If the write is short, we'll end up doing a sync write
2350 if (len == PAGE_CACHE_SIZE)
2354 * optimize away the read when we have an oplock, and we're not
2355 * expecting to use any of the data we'd be reading in. That
2356 * is, when the page lies beyond the EOF, or straddles the EOF
2357 * and the write will cover all of the existing data.
2359 if (CIFS_I(mapping->host)->clientCanCacheRead) {
2360 i_size = i_size_read(mapping->host);
2361 if (page_start >= i_size ||
2362 (offset == 0 && (pos + len) >= i_size)) {
2363 zero_user_segments(page, 0, offset,
2367 * PageChecked means that the parts of the page
2368 * to which we're not writing are considered up
2369 * to date. Once the data is copied to the
2370 * page, it can be set uptodate.
2372 SetPageChecked(page);
2377 if ((file->f_flags & O_ACCMODE) != O_WRONLY) {
2379 * might as well read a page, it is fast enough. If we get
2380 * an error, we don't need to return it. cifs_write_end will
2381 * do a sync write instead since PG_uptodate isn't set.
2383 cifs_readpage_worker(file, page, &page_start);
2385 /* we could try using another file handle if there is one -
2386 but how would we lock it to prevent close of that handle
2387 racing with this read? In any case
2388 this will be written out by write_end so is fine */
2395 static int cifs_release_page(struct page *page, gfp_t gfp)
2397 if (PagePrivate(page))
2400 return cifs_fscache_release_page(page, gfp);
2403 static void cifs_invalidate_page(struct page *page, unsigned long offset)
2405 struct cifsInodeInfo *cifsi = CIFS_I(page->mapping->host);
2408 cifs_fscache_invalidate_page(page, &cifsi->vfs_inode);
2411 static int cifs_launder_page(struct page *page)
2414 loff_t range_start = page_offset(page);
2415 loff_t range_end = range_start + (loff_t)(PAGE_CACHE_SIZE - 1);
2416 struct writeback_control wbc = {
2417 .sync_mode = WB_SYNC_ALL,
2419 .range_start = range_start,
2420 .range_end = range_end,
2423 cFYI(1, "Launder page: %p", page);
2425 if (clear_page_dirty_for_io(page))
2426 rc = cifs_writepage_locked(page, &wbc);
2428 cifs_fscache_invalidate_page(page, page->mapping->host);
2432 void cifs_oplock_break(struct work_struct *work)
2434 struct cifsFileInfo *cfile = container_of(work, struct cifsFileInfo,
2436 struct inode *inode = cfile->dentry->d_inode;
2437 struct cifsInodeInfo *cinode = CIFS_I(inode);
2440 if (inode && S_ISREG(inode->i_mode)) {
2441 if (cinode->clientCanCacheRead)
2442 break_lease(inode, O_RDONLY);
2444 break_lease(inode, O_WRONLY);
2445 rc = filemap_fdatawrite(inode->i_mapping);
2446 if (cinode->clientCanCacheRead == 0) {
2447 rc = filemap_fdatawait(inode->i_mapping);
2448 mapping_set_error(inode->i_mapping, rc);
2449 invalidate_remote_inode(inode);
2451 cFYI(1, "Oplock flush inode %p rc %d", inode, rc);
2455 * releasing stale oplock after recent reconnect of smb session using
2456 * a now incorrect file handle is not a data integrity issue but do
2457 * not bother sending an oplock release if session to server still is
2458 * disconnected since oplock already released by the server
2460 if (!cfile->oplock_break_cancelled) {
2461 rc = CIFSSMBLock(0, tlink_tcon(cfile->tlink), cfile->netfid,
2462 current->tgid, 0, 0, 0, 0,
2463 LOCKING_ANDX_OPLOCK_RELEASE, false,
2464 cinode->clientCanCacheRead ? 1 : 0);
2465 cFYI(1, "Oplock release rc = %d", rc);
2469 const struct address_space_operations cifs_addr_ops = {
2470 .readpage = cifs_readpage,
2471 .readpages = cifs_readpages,
2472 .writepage = cifs_writepage,
2473 .writepages = cifs_writepages,
2474 .write_begin = cifs_write_begin,
2475 .write_end = cifs_write_end,
2476 .set_page_dirty = __set_page_dirty_nobuffers,
2477 .releasepage = cifs_release_page,
2478 .invalidatepage = cifs_invalidate_page,
2479 .launder_page = cifs_launder_page,
2483 * cifs_readpages requires the server to support a buffer large enough to
2484 * contain the header plus one complete page of data. Otherwise, we need
2485 * to leave cifs_readpages out of the address space operations.
2487 const struct address_space_operations cifs_addr_ops_smallbuf = {
2488 .readpage = cifs_readpage,
2489 .writepage = cifs_writepage,
2490 .writepages = cifs_writepages,
2491 .write_begin = cifs_write_begin,
2492 .write_end = cifs_write_end,
2493 .set_page_dirty = __set_page_dirty_nobuffers,
2494 .releasepage = cifs_release_page,
2495 .invalidatepage = cifs_invalidate_page,
2496 .launder_page = cifs_launder_page,