4 * vfs operations that deal with files
6 * Copyright (C) International Business Machines Corp., 2002,2010
7 * Author(s): Steve French (sfrench@us.ibm.com)
8 * Jeremy Allison (jra@samba.org)
10 * This library is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU Lesser General Public License as published
12 * by the Free Software Foundation; either version 2.1 of the License, or
13 * (at your option) any later version.
15 * This library is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
18 * the GNU Lesser General Public License for more details.
20 * You should have received a copy of the GNU Lesser General Public License
21 * along with this library; if not, write to the Free Software
22 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
25 #include <linux/backing-dev.h>
26 #include <linux/stat.h>
27 #include <linux/fcntl.h>
28 #include <linux/pagemap.h>
29 #include <linux/pagevec.h>
30 #include <linux/writeback.h>
31 #include <linux/task_io_accounting_ops.h>
32 #include <linux/delay.h>
33 #include <linux/mount.h>
34 #include <linux/slab.h>
35 #include <linux/swap.h>
36 #include <asm/div64.h>
40 #include "cifsproto.h"
41 #include "cifs_unicode.h"
42 #include "cifs_debug.h"
43 #include "cifs_fs_sb.h"
46 static inline int cifs_convert_flags(unsigned int flags)
48 if ((flags & O_ACCMODE) == O_RDONLY)
50 else if ((flags & O_ACCMODE) == O_WRONLY)
52 else if ((flags & O_ACCMODE) == O_RDWR) {
53 /* GENERIC_ALL is too much permission to request
54 can cause unnecessary access denied on create */
55 /* return GENERIC_ALL; */
56 return (GENERIC_READ | GENERIC_WRITE);
59 return (READ_CONTROL | FILE_WRITE_ATTRIBUTES | FILE_READ_ATTRIBUTES |
60 FILE_WRITE_EA | FILE_APPEND_DATA | FILE_WRITE_DATA |
64 static u32 cifs_posix_convert_flags(unsigned int flags)
68 if ((flags & O_ACCMODE) == O_RDONLY)
69 posix_flags = SMB_O_RDONLY;
70 else if ((flags & O_ACCMODE) == O_WRONLY)
71 posix_flags = SMB_O_WRONLY;
72 else if ((flags & O_ACCMODE) == O_RDWR)
73 posix_flags = SMB_O_RDWR;
76 posix_flags |= SMB_O_CREAT;
78 posix_flags |= SMB_O_EXCL;
80 posix_flags |= SMB_O_TRUNC;
81 /* be safe and imply O_SYNC for O_DSYNC */
83 posix_flags |= SMB_O_SYNC;
84 if (flags & O_DIRECTORY)
85 posix_flags |= SMB_O_DIRECTORY;
86 if (flags & O_NOFOLLOW)
87 posix_flags |= SMB_O_NOFOLLOW;
89 posix_flags |= SMB_O_DIRECT;
94 static inline int cifs_get_disposition(unsigned int flags)
96 if ((flags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL))
98 else if ((flags & (O_CREAT | O_TRUNC)) == (O_CREAT | O_TRUNC))
99 return FILE_OVERWRITE_IF;
100 else if ((flags & O_CREAT) == O_CREAT)
102 else if ((flags & O_TRUNC) == O_TRUNC)
103 return FILE_OVERWRITE;
108 int cifs_posix_open(char *full_path, struct inode **pinode,
109 struct super_block *sb, int mode, unsigned int f_flags,
110 __u32 *poplock, __u16 *pnetfid, unsigned int xid)
113 FILE_UNIX_BASIC_INFO *presp_data;
114 __u32 posix_flags = 0;
115 struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
116 struct cifs_fattr fattr;
117 struct tcon_link *tlink;
118 struct cifs_tcon *tcon;
120 cFYI(1, "posix open %s", full_path);
122 presp_data = kzalloc(sizeof(FILE_UNIX_BASIC_INFO), GFP_KERNEL);
123 if (presp_data == NULL)
126 tlink = cifs_sb_tlink(cifs_sb);
132 tcon = tlink_tcon(tlink);
133 mode &= ~current_umask();
135 posix_flags = cifs_posix_convert_flags(f_flags);
136 rc = CIFSPOSIXCreate(xid, tcon, posix_flags, mode, pnetfid, presp_data,
137 poplock, full_path, cifs_sb->local_nls,
138 cifs_sb->mnt_cifs_flags &
139 CIFS_MOUNT_MAP_SPECIAL_CHR);
140 cifs_put_tlink(tlink);
145 if (presp_data->Type == cpu_to_le32(-1))
146 goto posix_open_ret; /* open ok, caller does qpathinfo */
149 goto posix_open_ret; /* caller does not need info */
151 cifs_unix_basic_to_fattr(&fattr, presp_data, cifs_sb);
153 /* get new inode and set it up */
154 if (*pinode == NULL) {
155 cifs_fill_uniqueid(sb, &fattr);
156 *pinode = cifs_iget(sb, &fattr);
162 cifs_fattr_to_inode(*pinode, &fattr);
171 cifs_nt_open(char *full_path, struct inode *inode, struct cifs_sb_info *cifs_sb,
172 struct cifs_tcon *tcon, unsigned int f_flags, __u32 *oplock,
173 struct cifs_fid *fid, unsigned int xid)
178 int create_options = CREATE_NOT_DIR;
181 if (!tcon->ses->server->ops->open)
184 desired_access = cifs_convert_flags(f_flags);
186 /*********************************************************************
187 * open flag mapping table:
189 * POSIX Flag CIFS Disposition
190 * ---------- ----------------
191 * O_CREAT FILE_OPEN_IF
192 * O_CREAT | O_EXCL FILE_CREATE
193 * O_CREAT | O_TRUNC FILE_OVERWRITE_IF
194 * O_TRUNC FILE_OVERWRITE
195 * none of the above FILE_OPEN
197 * Note that there is not a direct match between disposition
198 * FILE_SUPERSEDE (ie create whether or not file exists although
199 * O_CREAT | O_TRUNC is similar but truncates the existing
200 * file rather than creating a new file as FILE_SUPERSEDE does
201 * (which uses the attributes / metadata passed in on open call)
203 *? O_SYNC is a reasonable match to CIFS writethrough flag
204 *? and the read write flags match reasonably. O_LARGEFILE
205 *? is irrelevant because largefile support is always used
206 *? by this client. Flags O_APPEND, O_DIRECT, O_DIRECTORY,
207 * O_FASYNC, O_NOFOLLOW, O_NONBLOCK need further investigation
208 *********************************************************************/
210 disposition = cifs_get_disposition(f_flags);
212 /* BB pass O_SYNC flag through on file attributes .. BB */
214 buf = kmalloc(sizeof(FILE_ALL_INFO), GFP_KERNEL);
218 if (backup_cred(cifs_sb))
219 create_options |= CREATE_OPEN_BACKUP_INTENT;
221 rc = tcon->ses->server->ops->open(xid, tcon, full_path, disposition,
222 desired_access, create_options, fid,
223 oplock, buf, cifs_sb);
229 rc = cifs_get_inode_info_unix(&inode, full_path, inode->i_sb,
232 rc = cifs_get_inode_info(&inode, full_path, buf, inode->i_sb,
240 struct cifsFileInfo *
241 cifs_new_fileinfo(struct cifs_fid *fid, struct file *file,
242 struct tcon_link *tlink, __u32 oplock)
244 struct dentry *dentry = file->f_path.dentry;
245 struct inode *inode = dentry->d_inode;
246 struct cifsInodeInfo *cinode = CIFS_I(inode);
247 struct cifsFileInfo *cfile;
248 struct cifs_fid_locks *fdlocks;
250 cfile = kzalloc(sizeof(struct cifsFileInfo), GFP_KERNEL);
254 fdlocks = kzalloc(sizeof(struct cifs_fid_locks), GFP_KERNEL);
260 INIT_LIST_HEAD(&fdlocks->locks);
261 fdlocks->cfile = cfile;
262 cfile->llist = fdlocks;
263 mutex_lock(&cinode->lock_mutex);
264 list_add(&fdlocks->llist, &cinode->llist);
265 mutex_unlock(&cinode->lock_mutex);
268 cfile->pid = current->tgid;
269 cfile->uid = current_fsuid();
270 cfile->dentry = dget(dentry);
271 cfile->f_flags = file->f_flags;
272 cfile->invalidHandle = false;
273 cfile->tlink = cifs_get_tlink(tlink);
274 INIT_WORK(&cfile->oplock_break, cifs_oplock_break);
275 mutex_init(&cfile->fh_mutex);
276 tlink_tcon(tlink)->ses->server->ops->set_fid(cfile, fid, oplock);
278 spin_lock(&cifs_file_list_lock);
279 list_add(&cfile->tlist, &(tlink_tcon(tlink)->openFileList));
280 /* if readable file instance put first in list*/
281 if (file->f_mode & FMODE_READ)
282 list_add(&cfile->flist, &cinode->openFileList);
284 list_add_tail(&cfile->flist, &cinode->openFileList);
285 spin_unlock(&cifs_file_list_lock);
287 file->private_data = cfile;
291 static void cifs_del_lock_waiters(struct cifsLockInfo *lock);
293 struct cifsFileInfo *
294 cifsFileInfo_get(struct cifsFileInfo *cifs_file)
296 spin_lock(&cifs_file_list_lock);
297 cifsFileInfo_get_locked(cifs_file);
298 spin_unlock(&cifs_file_list_lock);
303 * Release a reference on the file private data. This may involve closing
304 * the filehandle out on the server. Must be called without holding
305 * cifs_file_list_lock.
307 void cifsFileInfo_put(struct cifsFileInfo *cifs_file)
309 struct inode *inode = cifs_file->dentry->d_inode;
310 struct cifs_tcon *tcon = tlink_tcon(cifs_file->tlink);
311 struct cifsInodeInfo *cifsi = CIFS_I(inode);
312 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
313 struct cifsLockInfo *li, *tmp;
315 spin_lock(&cifs_file_list_lock);
316 if (--cifs_file->count > 0) {
317 spin_unlock(&cifs_file_list_lock);
321 /* remove it from the lists */
322 list_del(&cifs_file->flist);
323 list_del(&cifs_file->tlist);
325 if (list_empty(&cifsi->openFileList)) {
326 cFYI(1, "closing last open instance for inode %p",
327 cifs_file->dentry->d_inode);
329 * In strict cache mode we need invalidate mapping on the last
330 * close because it may cause a error when we open this file
331 * again and get at least level II oplock.
333 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO)
334 CIFS_I(inode)->invalid_mapping = true;
335 cifs_set_oplock_level(cifsi, 0);
337 spin_unlock(&cifs_file_list_lock);
339 cancel_work_sync(&cifs_file->oplock_break);
341 if (!tcon->need_reconnect && !cifs_file->invalidHandle) {
342 struct TCP_Server_Info *server = tcon->ses->server;
347 if (server->ops->close)
348 rc = server->ops->close(xid, tcon, &cifs_file->fid);
353 * Delete any outstanding lock records. We'll lose them when the file
356 mutex_lock(&cifsi->lock_mutex);
357 list_for_each_entry_safe(li, tmp, &cifs_file->llist->locks, llist) {
358 list_del(&li->llist);
359 cifs_del_lock_waiters(li);
362 list_del(&cifs_file->llist->llist);
363 kfree(cifs_file->llist);
364 mutex_unlock(&cifsi->lock_mutex);
366 cifs_put_tlink(cifs_file->tlink);
367 dput(cifs_file->dentry);
371 int cifs_open(struct inode *inode, struct file *file)
376 struct cifs_sb_info *cifs_sb;
377 struct cifs_tcon *tcon;
378 struct tcon_link *tlink;
379 struct cifsFileInfo *cfile = NULL;
380 char *full_path = NULL;
381 bool posix_open_ok = false;
386 cifs_sb = CIFS_SB(inode->i_sb);
387 tlink = cifs_sb_tlink(cifs_sb);
390 return PTR_ERR(tlink);
392 tcon = tlink_tcon(tlink);
394 full_path = build_path_from_dentry(file->f_path.dentry);
395 if (full_path == NULL) {
400 cFYI(1, "inode = 0x%p file flags are 0x%x for %s",
401 inode, file->f_flags, full_path);
403 if (tcon->ses->server->oplocks)
408 if (!tcon->broken_posix_open && tcon->unix_ext &&
409 cap_unix(tcon->ses) && (CIFS_UNIX_POSIX_PATH_OPS_CAP &
410 le64_to_cpu(tcon->fsUnixInfo.Capability))) {
411 /* can not refresh inode info since size could be stale */
412 rc = cifs_posix_open(full_path, &inode, inode->i_sb,
413 cifs_sb->mnt_file_mode /* ignored */,
414 file->f_flags, &oplock, &fid.netfid, xid);
416 cFYI(1, "posix open succeeded");
417 posix_open_ok = true;
418 } else if ((rc == -EINVAL) || (rc == -EOPNOTSUPP)) {
419 if (tcon->ses->serverNOS)
420 cERROR(1, "server %s of type %s returned"
421 " unexpected error on SMB posix open"
422 ", disabling posix open support."
423 " Check if server update available.",
424 tcon->ses->serverName,
425 tcon->ses->serverNOS);
426 tcon->broken_posix_open = true;
427 } else if ((rc != -EIO) && (rc != -EREMOTE) &&
428 (rc != -EOPNOTSUPP)) /* path not found or net err */
431 * Else fallthrough to retry open the old way on network i/o
436 if (!posix_open_ok) {
437 rc = cifs_nt_open(full_path, inode, cifs_sb, tcon,
438 file->f_flags, &oplock, &fid, xid);
443 cfile = cifs_new_fileinfo(&fid, file, tlink, oplock);
445 if (tcon->ses->server->ops->close)
446 tcon->ses->server->ops->close(xid, tcon, &fid);
451 cifs_fscache_set_inode_cookie(inode, file);
453 if ((oplock & CIFS_CREATE_ACTION) && !posix_open_ok && tcon->unix_ext) {
455 * Time to set mode which we can not set earlier due to
456 * problems creating new read-only files.
458 struct cifs_unix_set_info_args args = {
459 .mode = inode->i_mode,
462 .ctime = NO_CHANGE_64,
463 .atime = NO_CHANGE_64,
464 .mtime = NO_CHANGE_64,
467 CIFSSMBUnixSetFileInfo(xid, tcon, &args, fid.netfid,
474 cifs_put_tlink(tlink);
479 * Try to reacquire byte range locks that were released when session
482 static int cifs_relock_file(struct cifsFileInfo *cifsFile)
486 /* BB list all locks open on this file and relock */
492 cifs_reopen_file(struct cifsFileInfo *cfile, bool can_flush)
497 struct cifs_sb_info *cifs_sb;
498 struct cifs_tcon *tcon;
499 struct TCP_Server_Info *server;
500 struct cifsInodeInfo *cinode;
502 char *full_path = NULL;
504 int disposition = FILE_OPEN;
505 int create_options = CREATE_NOT_DIR;
509 mutex_lock(&cfile->fh_mutex);
510 if (!cfile->invalidHandle) {
511 mutex_unlock(&cfile->fh_mutex);
517 inode = cfile->dentry->d_inode;
518 cifs_sb = CIFS_SB(inode->i_sb);
519 tcon = tlink_tcon(cfile->tlink);
520 server = tcon->ses->server;
523 * Can not grab rename sem here because various ops, including those
524 * that already have the rename sem can end up causing writepage to get
525 * called and if the server was down that means we end up here, and we
526 * can never tell if the caller already has the rename_sem.
528 full_path = build_path_from_dentry(cfile->dentry);
529 if (full_path == NULL) {
531 mutex_unlock(&cfile->fh_mutex);
536 cFYI(1, "inode = 0x%p file flags 0x%x for %s", inode, cfile->f_flags,
539 if (tcon->ses->server->oplocks)
544 if (tcon->unix_ext && cap_unix(tcon->ses) &&
545 (CIFS_UNIX_POSIX_PATH_OPS_CAP &
546 le64_to_cpu(tcon->fsUnixInfo.Capability))) {
548 * O_CREAT, O_EXCL and O_TRUNC already had their effect on the
549 * original open. Must mask them off for a reopen.
551 unsigned int oflags = cfile->f_flags &
552 ~(O_CREAT | O_EXCL | O_TRUNC);
554 rc = cifs_posix_open(full_path, NULL, inode->i_sb,
555 cifs_sb->mnt_file_mode /* ignored */,
556 oflags, &oplock, &fid.netfid, xid);
558 cFYI(1, "posix reopen succeeded");
562 * fallthrough to retry open the old way on errors, especially
563 * in the reconnect path it is important to retry hard
567 desired_access = cifs_convert_flags(cfile->f_flags);
569 if (backup_cred(cifs_sb))
570 create_options |= CREATE_OPEN_BACKUP_INTENT;
573 * Can not refresh inode by passing in file_info buf to be returned by
574 * CIFSSMBOpen and then calling get_inode_info with returned buf since
575 * file might have write behind data that needs to be flushed and server
576 * version of file size can be stale. If we knew for sure that inode was
577 * not dirty locally we could do this.
579 rc = server->ops->open(xid, tcon, full_path, disposition,
580 desired_access, create_options, &fid, &oplock,
583 mutex_unlock(&cfile->fh_mutex);
584 cFYI(1, "cifs_reopen returned 0x%x", rc);
585 cFYI(1, "oplock: %d", oplock);
586 goto reopen_error_exit;
590 cfile->invalidHandle = false;
591 mutex_unlock(&cfile->fh_mutex);
592 cinode = CIFS_I(inode);
595 rc = filemap_write_and_wait(inode->i_mapping);
596 mapping_set_error(inode->i_mapping, rc);
599 rc = cifs_get_inode_info_unix(&inode, full_path,
602 rc = cifs_get_inode_info(&inode, full_path, NULL,
603 inode->i_sb, xid, NULL);
606 * Else we are writing out data to server already and could deadlock if
607 * we tried to flush data, and since we do not know if we have data that
608 * would invalidate the current end of file on the server we can not go
609 * to the server to get the new inode info.
612 server->ops->set_fid(cfile, &fid, oplock);
613 cifs_relock_file(cfile);
621 int cifs_close(struct inode *inode, struct file *file)
623 if (file->private_data != NULL) {
624 cifsFileInfo_put(file->private_data);
625 file->private_data = NULL;
628 /* return code from the ->release op is always ignored */
632 int cifs_closedir(struct inode *inode, struct file *file)
636 struct cifsFileInfo *cfile = file->private_data;
637 struct cifs_tcon *tcon;
638 struct TCP_Server_Info *server;
641 cFYI(1, "Closedir inode = 0x%p", inode);
647 tcon = tlink_tcon(cfile->tlink);
648 server = tcon->ses->server;
650 cFYI(1, "Freeing private data in close dir");
651 spin_lock(&cifs_file_list_lock);
652 if (!cfile->srch_inf.endOfSearch && !cfile->invalidHandle) {
653 cfile->invalidHandle = true;
654 spin_unlock(&cifs_file_list_lock);
655 if (server->ops->close_dir)
656 rc = server->ops->close_dir(xid, tcon, &cfile->fid);
659 cFYI(1, "Closing uncompleted readdir with rc %d", rc);
660 /* not much we can do if it fails anyway, ignore rc */
663 spin_unlock(&cifs_file_list_lock);
665 buf = cfile->srch_inf.ntwrk_buf_start;
667 cFYI(1, "closedir free smb buf in srch struct");
668 cfile->srch_inf.ntwrk_buf_start = NULL;
669 if (cfile->srch_inf.smallBuf)
670 cifs_small_buf_release(buf);
672 cifs_buf_release(buf);
675 cifs_put_tlink(cfile->tlink);
676 kfree(file->private_data);
677 file->private_data = NULL;
678 /* BB can we lock the filestruct while this is going on? */
683 static struct cifsLockInfo *
684 cifs_lock_init(__u64 offset, __u64 length, __u8 type)
686 struct cifsLockInfo *lock =
687 kmalloc(sizeof(struct cifsLockInfo), GFP_KERNEL);
690 lock->offset = offset;
691 lock->length = length;
693 lock->pid = current->tgid;
694 INIT_LIST_HEAD(&lock->blist);
695 init_waitqueue_head(&lock->block_q);
700 cifs_del_lock_waiters(struct cifsLockInfo *lock)
702 struct cifsLockInfo *li, *tmp;
703 list_for_each_entry_safe(li, tmp, &lock->blist, blist) {
704 list_del_init(&li->blist);
705 wake_up(&li->block_q);
710 cifs_find_fid_lock_conflict(struct cifs_fid_locks *fdlocks, __u64 offset,
711 __u64 length, __u8 type, struct cifsFileInfo *cfile,
712 struct cifsLockInfo **conf_lock)
714 struct cifsLockInfo *li;
715 struct cifsFileInfo *cur_cfile = fdlocks->cfile;
716 struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
718 list_for_each_entry(li, &fdlocks->locks, llist) {
719 if (offset + length <= li->offset ||
720 offset >= li->offset + li->length)
722 if ((type & server->vals->shared_lock_type) &&
723 ((server->ops->compare_fids(cfile, cur_cfile) &&
724 current->tgid == li->pid) || type == li->type))
733 cifs_find_lock_conflict(struct cifsFileInfo *cfile, __u64 offset, __u64 length,
734 __u8 type, struct cifsLockInfo **conf_lock)
737 struct cifs_fid_locks *cur;
738 struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode);
740 list_for_each_entry(cur, &cinode->llist, llist) {
741 rc = cifs_find_fid_lock_conflict(cur, offset, length, type,
751 * Check if there is another lock that prevents us to set the lock (mandatory
752 * style). If such a lock exists, update the flock structure with its
753 * properties. Otherwise, set the flock type to F_UNLCK if we can cache brlocks
754 * or leave it the same if we can't. Returns 0 if we don't need to request to
755 * the server or 1 otherwise.
758 cifs_lock_test(struct cifsFileInfo *cfile, __u64 offset, __u64 length,
759 __u8 type, struct file_lock *flock)
762 struct cifsLockInfo *conf_lock;
763 struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode);
764 struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
767 mutex_lock(&cinode->lock_mutex);
769 exist = cifs_find_lock_conflict(cfile, offset, length, type,
772 flock->fl_start = conf_lock->offset;
773 flock->fl_end = conf_lock->offset + conf_lock->length - 1;
774 flock->fl_pid = conf_lock->pid;
775 if (conf_lock->type & server->vals->shared_lock_type)
776 flock->fl_type = F_RDLCK;
778 flock->fl_type = F_WRLCK;
779 } else if (!cinode->can_cache_brlcks)
782 flock->fl_type = F_UNLCK;
784 mutex_unlock(&cinode->lock_mutex);
789 cifs_lock_add(struct cifsFileInfo *cfile, struct cifsLockInfo *lock)
791 struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode);
792 mutex_lock(&cinode->lock_mutex);
793 list_add_tail(&lock->llist, &cfile->llist->locks);
794 mutex_unlock(&cinode->lock_mutex);
798 * Set the byte-range lock (mandatory style). Returns:
799 * 1) 0, if we set the lock and don't need to request to the server;
800 * 2) 1, if no locks prevent us but we need to request to the server;
801 * 3) -EACCESS, if there is a lock that prevents us and wait is false.
804 cifs_lock_add_if(struct cifsFileInfo *cfile, struct cifsLockInfo *lock,
807 struct cifsLockInfo *conf_lock;
808 struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode);
814 mutex_lock(&cinode->lock_mutex);
816 exist = cifs_find_lock_conflict(cfile, lock->offset, lock->length,
817 lock->type, &conf_lock);
818 if (!exist && cinode->can_cache_brlcks) {
819 list_add_tail(&lock->llist, &cfile->llist->locks);
820 mutex_unlock(&cinode->lock_mutex);
829 list_add_tail(&lock->blist, &conf_lock->blist);
830 mutex_unlock(&cinode->lock_mutex);
831 rc = wait_event_interruptible(lock->block_q,
832 (lock->blist.prev == &lock->blist) &&
833 (lock->blist.next == &lock->blist));
836 mutex_lock(&cinode->lock_mutex);
837 list_del_init(&lock->blist);
840 mutex_unlock(&cinode->lock_mutex);
845 * Check if there is another lock that prevents us to set the lock (posix
846 * style). If such a lock exists, update the flock structure with its
847 * properties. Otherwise, set the flock type to F_UNLCK if we can cache brlocks
848 * or leave it the same if we can't. Returns 0 if we don't need to request to
849 * the server or 1 otherwise.
852 cifs_posix_lock_test(struct file *file, struct file_lock *flock)
855 struct cifsInodeInfo *cinode = CIFS_I(file->f_path.dentry->d_inode);
856 unsigned char saved_type = flock->fl_type;
858 if ((flock->fl_flags & FL_POSIX) == 0)
861 mutex_lock(&cinode->lock_mutex);
862 posix_test_lock(file, flock);
864 if (flock->fl_type == F_UNLCK && !cinode->can_cache_brlcks) {
865 flock->fl_type = saved_type;
869 mutex_unlock(&cinode->lock_mutex);
874 * Set the byte-range lock (posix style). Returns:
875 * 1) 0, if we set the lock and don't need to request to the server;
876 * 2) 1, if we need to request to the server;
877 * 3) <0, if the error occurs while setting the lock.
880 cifs_posix_lock_set(struct file *file, struct file_lock *flock)
882 struct cifsInodeInfo *cinode = CIFS_I(file->f_path.dentry->d_inode);
885 if ((flock->fl_flags & FL_POSIX) == 0)
889 mutex_lock(&cinode->lock_mutex);
890 if (!cinode->can_cache_brlcks) {
891 mutex_unlock(&cinode->lock_mutex);
895 rc = posix_lock_file(file, flock, NULL);
896 mutex_unlock(&cinode->lock_mutex);
897 if (rc == FILE_LOCK_DEFERRED) {
898 rc = wait_event_interruptible(flock->fl_wait, !flock->fl_next);
901 locks_delete_block(flock);
907 cifs_push_mandatory_locks(struct cifsFileInfo *cfile)
910 int rc = 0, stored_rc;
911 struct cifsLockInfo *li, *tmp;
912 struct cifs_tcon *tcon;
913 struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode);
914 unsigned int num, max_num, max_buf;
915 LOCKING_ANDX_RANGE *buf, *cur;
916 int types[] = {LOCKING_ANDX_LARGE_FILES,
917 LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES};
921 tcon = tlink_tcon(cfile->tlink);
923 mutex_lock(&cinode->lock_mutex);
924 if (!cinode->can_cache_brlcks) {
925 mutex_unlock(&cinode->lock_mutex);
931 * Accessing maxBuf is racy with cifs_reconnect - need to store value
932 * and check it for zero before using.
934 max_buf = tcon->ses->server->maxBuf;
936 mutex_unlock(&cinode->lock_mutex);
941 max_num = (max_buf - sizeof(struct smb_hdr)) /
942 sizeof(LOCKING_ANDX_RANGE);
943 buf = kzalloc(max_num * sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
945 mutex_unlock(&cinode->lock_mutex);
950 for (i = 0; i < 2; i++) {
953 list_for_each_entry_safe(li, tmp, &cfile->llist->locks, llist) {
954 if (li->type != types[i])
956 cur->Pid = cpu_to_le16(li->pid);
957 cur->LengthLow = cpu_to_le32((u32)li->length);
958 cur->LengthHigh = cpu_to_le32((u32)(li->length>>32));
959 cur->OffsetLow = cpu_to_le32((u32)li->offset);
960 cur->OffsetHigh = cpu_to_le32((u32)(li->offset>>32));
961 if (++num == max_num) {
962 stored_rc = cifs_lockv(xid, tcon,
964 (__u8)li->type, 0, num,
975 stored_rc = cifs_lockv(xid, tcon, cfile->fid.netfid,
976 (__u8)types[i], 0, num, buf);
982 cinode->can_cache_brlcks = false;
983 mutex_unlock(&cinode->lock_mutex);
990 /* copied from fs/locks.c with a name change */
991 #define cifs_for_each_lock(inode, lockp) \
992 for (lockp = &inode->i_flock; *lockp != NULL; \
993 lockp = &(*lockp)->fl_next)
995 struct lock_to_push {
996 struct list_head llist;
1005 cifs_push_posix_locks(struct cifsFileInfo *cfile)
1007 struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode);
1008 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1009 struct file_lock *flock, **before;
1010 unsigned int count = 0, i = 0;
1011 int rc = 0, xid, type;
1012 struct list_head locks_to_send, *el;
1013 struct lock_to_push *lck, *tmp;
1018 mutex_lock(&cinode->lock_mutex);
1019 if (!cinode->can_cache_brlcks) {
1020 mutex_unlock(&cinode->lock_mutex);
1026 cifs_for_each_lock(cfile->dentry->d_inode, before) {
1027 if ((*before)->fl_flags & FL_POSIX)
1032 INIT_LIST_HEAD(&locks_to_send);
1035 * Allocating count locks is enough because no FL_POSIX locks can be
1036 * added to the list while we are holding cinode->lock_mutex that
1037 * protects locking operations of this inode.
1039 for (; i < count; i++) {
1040 lck = kmalloc(sizeof(struct lock_to_push), GFP_KERNEL);
1045 list_add_tail(&lck->llist, &locks_to_send);
1048 el = locks_to_send.next;
1050 cifs_for_each_lock(cfile->dentry->d_inode, before) {
1052 if ((flock->fl_flags & FL_POSIX) == 0)
1054 if (el == &locks_to_send) {
1056 * The list ended. We don't have enough allocated
1057 * structures - something is really wrong.
1059 cERROR(1, "Can't push all brlocks!");
1062 length = 1 + flock->fl_end - flock->fl_start;
1063 if (flock->fl_type == F_RDLCK || flock->fl_type == F_SHLCK)
1067 lck = list_entry(el, struct lock_to_push, llist);
1068 lck->pid = flock->fl_pid;
1069 lck->netfid = cfile->fid.netfid;
1070 lck->length = length;
1072 lck->offset = flock->fl_start;
1077 list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) {
1080 stored_rc = CIFSSMBPosixLock(xid, tcon, lck->netfid, lck->pid,
1081 lck->offset, lck->length, NULL,
1085 list_del(&lck->llist);
1090 cinode->can_cache_brlcks = false;
1091 mutex_unlock(&cinode->lock_mutex);
1096 list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) {
1097 list_del(&lck->llist);
1104 cifs_push_locks(struct cifsFileInfo *cfile)
1106 struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
1107 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1109 if (cap_unix(tcon->ses) &&
1110 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
1111 ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
1112 return cifs_push_posix_locks(cfile);
1114 return tcon->ses->server->ops->push_mand_locks(cfile);
1118 cifs_read_flock(struct file_lock *flock, __u32 *type, int *lock, int *unlock,
1119 bool *wait_flag, struct TCP_Server_Info *server)
1121 if (flock->fl_flags & FL_POSIX)
1123 if (flock->fl_flags & FL_FLOCK)
1125 if (flock->fl_flags & FL_SLEEP) {
1126 cFYI(1, "Blocking lock");
1129 if (flock->fl_flags & FL_ACCESS)
1130 cFYI(1, "Process suspended by mandatory locking - "
1131 "not implemented yet");
1132 if (flock->fl_flags & FL_LEASE)
1133 cFYI(1, "Lease on file - not implemented yet");
1134 if (flock->fl_flags &
1135 (~(FL_POSIX | FL_FLOCK | FL_SLEEP | FL_ACCESS | FL_LEASE)))
1136 cFYI(1, "Unknown lock flags 0x%x", flock->fl_flags);
1138 *type = server->vals->large_lock_type;
1139 if (flock->fl_type == F_WRLCK) {
1140 cFYI(1, "F_WRLCK ");
1141 *type |= server->vals->exclusive_lock_type;
1143 } else if (flock->fl_type == F_UNLCK) {
1145 *type |= server->vals->unlock_lock_type;
1147 /* Check if unlock includes more than one lock range */
1148 } else if (flock->fl_type == F_RDLCK) {
1150 *type |= server->vals->shared_lock_type;
1152 } else if (flock->fl_type == F_EXLCK) {
1154 *type |= server->vals->exclusive_lock_type;
1156 } else if (flock->fl_type == F_SHLCK) {
1158 *type |= server->vals->shared_lock_type;
1161 cFYI(1, "Unknown type of lock");
1165 cifs_getlk(struct file *file, struct file_lock *flock, __u32 type,
1166 bool wait_flag, bool posix_lck, unsigned int xid)
1169 __u64 length = 1 + flock->fl_end - flock->fl_start;
1170 struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
1171 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1172 struct TCP_Server_Info *server = tcon->ses->server;
1173 __u16 netfid = cfile->fid.netfid;
1176 int posix_lock_type;
1178 rc = cifs_posix_lock_test(file, flock);
1182 if (type & server->vals->shared_lock_type)
1183 posix_lock_type = CIFS_RDLCK;
1185 posix_lock_type = CIFS_WRLCK;
1186 rc = CIFSSMBPosixLock(xid, tcon, netfid, current->tgid,
1187 flock->fl_start, length, flock,
1188 posix_lock_type, wait_flag);
1192 rc = cifs_lock_test(cfile, flock->fl_start, length, type, flock);
1196 /* BB we could chain these into one lock request BB */
1197 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length, type,
1200 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1202 flock->fl_type = F_UNLCK;
1204 cERROR(1, "Error unlocking previously locked "
1205 "range %d during test of lock", rc);
1209 if (type & server->vals->shared_lock_type) {
1210 flock->fl_type = F_WRLCK;
1214 type &= ~server->vals->exclusive_lock_type;
1216 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1217 type | server->vals->shared_lock_type,
1220 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1221 type | server->vals->shared_lock_type, 0, 1, false);
1222 flock->fl_type = F_RDLCK;
1224 cERROR(1, "Error unlocking previously locked "
1225 "range %d during test of lock", rc);
1227 flock->fl_type = F_WRLCK;
1233 cifs_move_llist(struct list_head *source, struct list_head *dest)
1235 struct list_head *li, *tmp;
1236 list_for_each_safe(li, tmp, source)
1237 list_move(li, dest);
1241 cifs_free_llist(struct list_head *llist)
1243 struct cifsLockInfo *li, *tmp;
1244 list_for_each_entry_safe(li, tmp, llist, llist) {
1245 cifs_del_lock_waiters(li);
1246 list_del(&li->llist);
1252 cifs_unlock_range(struct cifsFileInfo *cfile, struct file_lock *flock,
1255 int rc = 0, stored_rc;
1256 int types[] = {LOCKING_ANDX_LARGE_FILES,
1257 LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES};
1259 unsigned int max_num, num, max_buf;
1260 LOCKING_ANDX_RANGE *buf, *cur;
1261 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1262 struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode);
1263 struct cifsLockInfo *li, *tmp;
1264 __u64 length = 1 + flock->fl_end - flock->fl_start;
1265 struct list_head tmp_llist;
1267 INIT_LIST_HEAD(&tmp_llist);
1270 * Accessing maxBuf is racy with cifs_reconnect - need to store value
1271 * and check it for zero before using.
1273 max_buf = tcon->ses->server->maxBuf;
1277 max_num = (max_buf - sizeof(struct smb_hdr)) /
1278 sizeof(LOCKING_ANDX_RANGE);
1279 buf = kzalloc(max_num * sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
1283 mutex_lock(&cinode->lock_mutex);
1284 for (i = 0; i < 2; i++) {
1287 list_for_each_entry_safe(li, tmp, &cfile->llist->locks, llist) {
1288 if (flock->fl_start > li->offset ||
1289 (flock->fl_start + length) <
1290 (li->offset + li->length))
1292 if (current->tgid != li->pid)
1294 if (types[i] != li->type)
1296 if (cinode->can_cache_brlcks) {
1298 * We can cache brlock requests - simply remove
1299 * a lock from the file's list.
1301 list_del(&li->llist);
1302 cifs_del_lock_waiters(li);
1306 cur->Pid = cpu_to_le16(li->pid);
1307 cur->LengthLow = cpu_to_le32((u32)li->length);
1308 cur->LengthHigh = cpu_to_le32((u32)(li->length>>32));
1309 cur->OffsetLow = cpu_to_le32((u32)li->offset);
1310 cur->OffsetHigh = cpu_to_le32((u32)(li->offset>>32));
1312 * We need to save a lock here to let us add it again to
1313 * the file's list if the unlock range request fails on
1316 list_move(&li->llist, &tmp_llist);
1317 if (++num == max_num) {
1318 stored_rc = cifs_lockv(xid, tcon,
1320 li->type, num, 0, buf);
1323 * We failed on the unlock range
1324 * request - add all locks from the tmp
1325 * list to the head of the file's list.
1327 cifs_move_llist(&tmp_llist,
1328 &cfile->llist->locks);
1332 * The unlock range request succeed -
1333 * free the tmp list.
1335 cifs_free_llist(&tmp_llist);
1342 stored_rc = cifs_lockv(xid, tcon, cfile->fid.netfid,
1343 types[i], num, 0, buf);
1345 cifs_move_llist(&tmp_llist,
1346 &cfile->llist->locks);
1349 cifs_free_llist(&tmp_llist);
1353 mutex_unlock(&cinode->lock_mutex);
1359 cifs_setlk(struct file *file, struct file_lock *flock, __u32 type,
1360 bool wait_flag, bool posix_lck, int lock, int unlock,
1364 __u64 length = 1 + flock->fl_end - flock->fl_start;
1365 struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
1366 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1367 struct TCP_Server_Info *server = tcon->ses->server;
1370 int posix_lock_type;
1372 rc = cifs_posix_lock_set(file, flock);
1376 if (type & server->vals->shared_lock_type)
1377 posix_lock_type = CIFS_RDLCK;
1379 posix_lock_type = CIFS_WRLCK;
1382 posix_lock_type = CIFS_UNLCK;
1384 rc = CIFSSMBPosixLock(xid, tcon, cfile->fid.netfid,
1385 current->tgid, flock->fl_start, length,
1386 NULL, posix_lock_type, wait_flag);
1391 struct cifsLockInfo *lock;
1393 lock = cifs_lock_init(flock->fl_start, length, type);
1397 rc = cifs_lock_add_if(cfile, lock, wait_flag);
1403 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1404 type, 1, 0, wait_flag);
1410 cifs_lock_add(cfile, lock);
1412 rc = server->ops->mand_unlock_range(cfile, flock, xid);
1415 if (flock->fl_flags & FL_POSIX)
1416 posix_lock_file_wait(file, flock);
1420 int cifs_lock(struct file *file, int cmd, struct file_lock *flock)
1423 int lock = 0, unlock = 0;
1424 bool wait_flag = false;
1425 bool posix_lck = false;
1426 struct cifs_sb_info *cifs_sb;
1427 struct cifs_tcon *tcon;
1428 struct cifsInodeInfo *cinode;
1429 struct cifsFileInfo *cfile;
1436 cFYI(1, "Lock parm: 0x%x flockflags: 0x%x flocktype: 0x%x start: %lld "
1437 "end: %lld", cmd, flock->fl_flags, flock->fl_type,
1438 flock->fl_start, flock->fl_end);
1440 cfile = (struct cifsFileInfo *)file->private_data;
1441 tcon = tlink_tcon(cfile->tlink);
1443 cifs_read_flock(flock, &type, &lock, &unlock, &wait_flag,
1446 cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
1447 netfid = cfile->fid.netfid;
1448 cinode = CIFS_I(file->f_path.dentry->d_inode);
1450 if (cap_unix(tcon->ses) &&
1451 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
1452 ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
1455 * BB add code here to normalize offset and length to account for
1456 * negative length which we can not accept over the wire.
1458 if (IS_GETLK(cmd)) {
1459 rc = cifs_getlk(file, flock, type, wait_flag, posix_lck, xid);
1464 if (!lock && !unlock) {
1466 * if no lock or unlock then nothing to do since we do not
1473 rc = cifs_setlk(file, flock, type, wait_flag, posix_lck, lock, unlock,
1480 * update the file size (if needed) after a write. Should be called with
1481 * the inode->i_lock held
1484 cifs_update_eof(struct cifsInodeInfo *cifsi, loff_t offset,
1485 unsigned int bytes_written)
1487 loff_t end_of_write = offset + bytes_written;
1489 if (end_of_write > cifsi->server_eof)
1490 cifsi->server_eof = end_of_write;
1494 cifs_write(struct cifsFileInfo *open_file, __u32 pid, const char *write_data,
1495 size_t write_size, loff_t *offset)
1498 unsigned int bytes_written = 0;
1499 unsigned int total_written;
1500 struct cifs_sb_info *cifs_sb;
1501 struct cifs_tcon *tcon;
1502 struct TCP_Server_Info *server;
1504 struct dentry *dentry = open_file->dentry;
1505 struct cifsInodeInfo *cifsi = CIFS_I(dentry->d_inode);
1506 struct cifs_io_parms io_parms;
1508 cifs_sb = CIFS_SB(dentry->d_sb);
1510 cFYI(1, "write %zd bytes to offset %lld of %s", write_size,
1511 *offset, dentry->d_name.name);
1513 tcon = tlink_tcon(open_file->tlink);
1514 server = tcon->ses->server;
1516 if (!server->ops->sync_write)
1521 for (total_written = 0; write_size > total_written;
1522 total_written += bytes_written) {
1524 while (rc == -EAGAIN) {
1528 if (open_file->invalidHandle) {
1529 /* we could deadlock if we called
1530 filemap_fdatawait from here so tell
1531 reopen_file not to flush data to
1533 rc = cifs_reopen_file(open_file, false);
1538 len = min((size_t)cifs_sb->wsize,
1539 write_size - total_written);
1540 /* iov[0] is reserved for smb header */
1541 iov[1].iov_base = (char *)write_data + total_written;
1542 iov[1].iov_len = len;
1544 io_parms.tcon = tcon;
1545 io_parms.offset = *offset;
1546 io_parms.length = len;
1547 rc = server->ops->sync_write(xid, open_file, &io_parms,
1548 &bytes_written, iov, 1);
1550 if (rc || (bytes_written == 0)) {
1558 spin_lock(&dentry->d_inode->i_lock);
1559 cifs_update_eof(cifsi, *offset, bytes_written);
1560 spin_unlock(&dentry->d_inode->i_lock);
1561 *offset += bytes_written;
1565 cifs_stats_bytes_written(tcon, total_written);
1567 if (total_written > 0) {
1568 spin_lock(&dentry->d_inode->i_lock);
1569 if (*offset > dentry->d_inode->i_size)
1570 i_size_write(dentry->d_inode, *offset);
1571 spin_unlock(&dentry->d_inode->i_lock);
1573 mark_inode_dirty_sync(dentry->d_inode);
1575 return total_written;
1578 struct cifsFileInfo *find_readable_file(struct cifsInodeInfo *cifs_inode,
1581 struct cifsFileInfo *open_file = NULL;
1582 struct cifs_sb_info *cifs_sb = CIFS_SB(cifs_inode->vfs_inode.i_sb);
1584 /* only filter by fsuid on multiuser mounts */
1585 if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
1588 spin_lock(&cifs_file_list_lock);
1589 /* we could simply get the first_list_entry since write-only entries
1590 are always at the end of the list but since the first entry might
1591 have a close pending, we go through the whole list */
1592 list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
1593 if (fsuid_only && open_file->uid != current_fsuid())
1595 if (OPEN_FMODE(open_file->f_flags) & FMODE_READ) {
1596 if (!open_file->invalidHandle) {
1597 /* found a good file */
1598 /* lock it so it will not be closed on us */
1599 cifsFileInfo_get_locked(open_file);
1600 spin_unlock(&cifs_file_list_lock);
1602 } /* else might as well continue, and look for
1603 another, or simply have the caller reopen it
1604 again rather than trying to fix this handle */
1605 } else /* write only file */
1606 break; /* write only files are last so must be done */
1608 spin_unlock(&cifs_file_list_lock);
1612 struct cifsFileInfo *find_writable_file(struct cifsInodeInfo *cifs_inode,
1615 struct cifsFileInfo *open_file, *inv_file = NULL;
1616 struct cifs_sb_info *cifs_sb;
1617 bool any_available = false;
1619 unsigned int refind = 0;
1621 /* Having a null inode here (because mapping->host was set to zero by
1622 the VFS or MM) should not happen but we had reports of on oops (due to
1623 it being zero) during stress testcases so we need to check for it */
1625 if (cifs_inode == NULL) {
1626 cERROR(1, "Null inode passed to cifs_writeable_file");
1631 cifs_sb = CIFS_SB(cifs_inode->vfs_inode.i_sb);
1633 /* only filter by fsuid on multiuser mounts */
1634 if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
1637 spin_lock(&cifs_file_list_lock);
1639 if (refind > MAX_REOPEN_ATT) {
1640 spin_unlock(&cifs_file_list_lock);
1643 list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
1644 if (!any_available && open_file->pid != current->tgid)
1646 if (fsuid_only && open_file->uid != current_fsuid())
1648 if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) {
1649 if (!open_file->invalidHandle) {
1650 /* found a good writable file */
1651 cifsFileInfo_get_locked(open_file);
1652 spin_unlock(&cifs_file_list_lock);
1656 inv_file = open_file;
1660 /* couldn't find useable FH with same pid, try any available */
1661 if (!any_available) {
1662 any_available = true;
1663 goto refind_writable;
1667 any_available = false;
1668 cifsFileInfo_get_locked(inv_file);
1671 spin_unlock(&cifs_file_list_lock);
1674 rc = cifs_reopen_file(inv_file, false);
1678 spin_lock(&cifs_file_list_lock);
1679 list_move_tail(&inv_file->flist,
1680 &cifs_inode->openFileList);
1681 spin_unlock(&cifs_file_list_lock);
1682 cifsFileInfo_put(inv_file);
1683 spin_lock(&cifs_file_list_lock);
1685 goto refind_writable;
1692 static int cifs_partialpagewrite(struct page *page, unsigned from, unsigned to)
1694 struct address_space *mapping = page->mapping;
1695 loff_t offset = (loff_t)page->index << PAGE_CACHE_SHIFT;
1698 int bytes_written = 0;
1699 struct inode *inode;
1700 struct cifsFileInfo *open_file;
1702 if (!mapping || !mapping->host)
1705 inode = page->mapping->host;
1707 offset += (loff_t)from;
1708 write_data = kmap(page);
1711 if ((to > PAGE_CACHE_SIZE) || (from > to)) {
1716 /* racing with truncate? */
1717 if (offset > mapping->host->i_size) {
1719 return 0; /* don't care */
1722 /* check to make sure that we are not extending the file */
1723 if (mapping->host->i_size - offset < (loff_t)to)
1724 to = (unsigned)(mapping->host->i_size - offset);
1726 open_file = find_writable_file(CIFS_I(mapping->host), false);
1728 bytes_written = cifs_write(open_file, open_file->pid,
1729 write_data, to - from, &offset);
1730 cifsFileInfo_put(open_file);
1731 /* Does mm or vfs already set times? */
1732 inode->i_atime = inode->i_mtime = current_fs_time(inode->i_sb);
1733 if ((bytes_written > 0) && (offset))
1735 else if (bytes_written < 0)
1738 cFYI(1, "No writeable filehandles for inode");
1746 static int cifs_writepages(struct address_space *mapping,
1747 struct writeback_control *wbc)
1749 struct cifs_sb_info *cifs_sb = CIFS_SB(mapping->host->i_sb);
1750 bool done = false, scanned = false, range_whole = false;
1752 struct cifs_writedata *wdata;
1753 struct TCP_Server_Info *server;
1756 loff_t isize = i_size_read(mapping->host);
1759 * If wsize is smaller than the page cache size, default to writing
1760 * one page at a time via cifs_writepage
1762 if (cifs_sb->wsize < PAGE_CACHE_SIZE)
1763 return generic_writepages(mapping, wbc);
1765 if (wbc->range_cyclic) {
1766 index = mapping->writeback_index; /* Start from prev offset */
1769 index = wbc->range_start >> PAGE_CACHE_SHIFT;
1770 end = wbc->range_end >> PAGE_CACHE_SHIFT;
1771 if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
1776 while (!done && index <= end) {
1777 unsigned int i, nr_pages, found_pages;
1778 pgoff_t next = 0, tofind;
1779 struct page **pages;
1781 tofind = min((cifs_sb->wsize / PAGE_CACHE_SIZE) - 1,
1784 wdata = cifs_writedata_alloc((unsigned int)tofind,
1785 cifs_writev_complete);
1792 * find_get_pages_tag seems to return a max of 256 on each
1793 * iteration, so we must call it several times in order to
1794 * fill the array or the wsize is effectively limited to
1795 * 256 * PAGE_CACHE_SIZE.
1798 pages = wdata->pages;
1800 nr_pages = find_get_pages_tag(mapping, &index,
1801 PAGECACHE_TAG_DIRTY,
1803 found_pages += nr_pages;
1806 } while (nr_pages && tofind && index <= end);
1808 if (found_pages == 0) {
1809 kref_put(&wdata->refcount, cifs_writedata_release);
1814 for (i = 0; i < found_pages; i++) {
1815 page = wdata->pages[i];
1817 * At this point we hold neither mapping->tree_lock nor
1818 * lock on the page itself: the page may be truncated or
1819 * invalidated (changing page->mapping to NULL), or even
1820 * swizzled back from swapper_space to tmpfs file
1826 else if (!trylock_page(page))
1829 if (unlikely(page->mapping != mapping)) {
1834 if (!wbc->range_cyclic && page->index > end) {
1840 if (next && (page->index != next)) {
1841 /* Not next consecutive page */
1846 if (wbc->sync_mode != WB_SYNC_NONE)
1847 wait_on_page_writeback(page);
1849 if (PageWriteback(page) ||
1850 !clear_page_dirty_for_io(page)) {
1856 * This actually clears the dirty bit in the radix tree.
1857 * See cifs_writepage() for more commentary.
1859 set_page_writeback(page);
1861 if (page_offset(page) >= isize) {
1864 end_page_writeback(page);
1868 wdata->pages[i] = page;
1869 next = page->index + 1;
1873 /* reset index to refind any pages skipped */
1875 index = wdata->pages[0]->index + 1;
1877 /* put any pages we aren't going to use */
1878 for (i = nr_pages; i < found_pages; i++) {
1879 page_cache_release(wdata->pages[i]);
1880 wdata->pages[i] = NULL;
1883 /* nothing to write? */
1884 if (nr_pages == 0) {
1885 kref_put(&wdata->refcount, cifs_writedata_release);
1889 wdata->sync_mode = wbc->sync_mode;
1890 wdata->nr_pages = nr_pages;
1891 wdata->offset = page_offset(wdata->pages[0]);
1892 wdata->pagesz = PAGE_CACHE_SIZE;
1894 min(isize - page_offset(wdata->pages[nr_pages - 1]),
1895 (loff_t)PAGE_CACHE_SIZE);
1896 wdata->bytes = ((nr_pages - 1) * PAGE_CACHE_SIZE) +
1900 if (wdata->cfile != NULL)
1901 cifsFileInfo_put(wdata->cfile);
1902 wdata->cfile = find_writable_file(CIFS_I(mapping->host),
1904 if (!wdata->cfile) {
1905 cERROR(1, "No writable handles for inode");
1909 wdata->pid = wdata->cfile->pid;
1910 server = tlink_tcon(wdata->cfile->tlink)->ses->server;
1911 rc = server->ops->async_writev(wdata);
1912 } while (wbc->sync_mode == WB_SYNC_ALL && rc == -EAGAIN);
1914 for (i = 0; i < nr_pages; ++i)
1915 unlock_page(wdata->pages[i]);
1917 /* send failure -- clean up the mess */
1919 for (i = 0; i < nr_pages; ++i) {
1921 redirty_page_for_writepage(wbc,
1924 SetPageError(wdata->pages[i]);
1925 end_page_writeback(wdata->pages[i]);
1926 page_cache_release(wdata->pages[i]);
1929 mapping_set_error(mapping, rc);
1931 kref_put(&wdata->refcount, cifs_writedata_release);
1933 wbc->nr_to_write -= nr_pages;
1934 if (wbc->nr_to_write <= 0)
1940 if (!scanned && !done) {
1942 * We hit the last page and there is more work to be done: wrap
1943 * back to the start of the file
1950 if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0))
1951 mapping->writeback_index = index;
1957 cifs_writepage_locked(struct page *page, struct writeback_control *wbc)
1963 /* BB add check for wbc flags */
1964 page_cache_get(page);
1965 if (!PageUptodate(page))
1966 cFYI(1, "ppw - page not up to date");
1969 * Set the "writeback" flag, and clear "dirty" in the radix tree.
1971 * A writepage() implementation always needs to do either this,
1972 * or re-dirty the page with "redirty_page_for_writepage()" in
1973 * the case of a failure.
1975 * Just unlocking the page will cause the radix tree tag-bits
1976 * to fail to update with the state of the page correctly.
1978 set_page_writeback(page);
1980 rc = cifs_partialpagewrite(page, 0, PAGE_CACHE_SIZE);
1981 if (rc == -EAGAIN && wbc->sync_mode == WB_SYNC_ALL)
1983 else if (rc == -EAGAIN)
1984 redirty_page_for_writepage(wbc, page);
1988 SetPageUptodate(page);
1989 end_page_writeback(page);
1990 page_cache_release(page);
1995 static int cifs_writepage(struct page *page, struct writeback_control *wbc)
1997 int rc = cifs_writepage_locked(page, wbc);
2002 static int cifs_write_end(struct file *file, struct address_space *mapping,
2003 loff_t pos, unsigned len, unsigned copied,
2004 struct page *page, void *fsdata)
2007 struct inode *inode = mapping->host;
2008 struct cifsFileInfo *cfile = file->private_data;
2009 struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
2012 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
2015 pid = current->tgid;
2017 cFYI(1, "write_end for page %p from pos %lld with %d bytes",
2020 if (PageChecked(page)) {
2022 SetPageUptodate(page);
2023 ClearPageChecked(page);
2024 } else if (!PageUptodate(page) && copied == PAGE_CACHE_SIZE)
2025 SetPageUptodate(page);
2027 if (!PageUptodate(page)) {
2029 unsigned offset = pos & (PAGE_CACHE_SIZE - 1);
2033 /* this is probably better than directly calling
2034 partialpage_write since in this function the file handle is
2035 known which we might as well leverage */
2036 /* BB check if anything else missing out of ppw
2037 such as updating last write time */
2038 page_data = kmap(page);
2039 rc = cifs_write(cfile, pid, page_data + offset, copied, &pos);
2040 /* if (rc < 0) should we set writebehind rc? */
2047 set_page_dirty(page);
2051 spin_lock(&inode->i_lock);
2052 if (pos > inode->i_size)
2053 i_size_write(inode, pos);
2054 spin_unlock(&inode->i_lock);
2058 page_cache_release(page);
2063 int cifs_strict_fsync(struct file *file, loff_t start, loff_t end,
2068 struct cifs_tcon *tcon;
2069 struct TCP_Server_Info *server;
2070 struct cifsFileInfo *smbfile = file->private_data;
2071 struct inode *inode = file->f_path.dentry->d_inode;
2072 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
2074 rc = filemap_write_and_wait_range(inode->i_mapping, start, end);
2077 mutex_lock(&inode->i_mutex);
2081 cFYI(1, "Sync file - name: %s datasync: 0x%x",
2082 file->f_path.dentry->d_name.name, datasync);
2084 if (!CIFS_I(inode)->clientCanCacheRead) {
2085 rc = cifs_invalidate_mapping(inode);
2087 cFYI(1, "rc: %d during invalidate phase", rc);
2088 rc = 0; /* don't care about it in fsync */
2092 tcon = tlink_tcon(smbfile->tlink);
2093 if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) {
2094 server = tcon->ses->server;
2095 if (server->ops->flush)
2096 rc = server->ops->flush(xid, tcon, &smbfile->fid);
2102 mutex_unlock(&inode->i_mutex);
2106 int cifs_fsync(struct file *file, loff_t start, loff_t end, int datasync)
2110 struct cifs_tcon *tcon;
2111 struct TCP_Server_Info *server;
2112 struct cifsFileInfo *smbfile = file->private_data;
2113 struct cifs_sb_info *cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
2114 struct inode *inode = file->f_mapping->host;
2116 rc = filemap_write_and_wait_range(inode->i_mapping, start, end);
2119 mutex_lock(&inode->i_mutex);
2123 cFYI(1, "Sync file - name: %s datasync: 0x%x",
2124 file->f_path.dentry->d_name.name, datasync);
2126 tcon = tlink_tcon(smbfile->tlink);
2127 if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) {
2128 server = tcon->ses->server;
2129 if (server->ops->flush)
2130 rc = server->ops->flush(xid, tcon, &smbfile->fid);
2136 mutex_unlock(&inode->i_mutex);
2141 * As file closes, flush all cached write data for this inode checking
2142 * for write behind errors.
2144 int cifs_flush(struct file *file, fl_owner_t id)
2146 struct inode *inode = file->f_path.dentry->d_inode;
2149 if (file->f_mode & FMODE_WRITE)
2150 rc = filemap_write_and_wait(inode->i_mapping);
2152 cFYI(1, "Flush inode %p file %p rc %d", inode, file, rc);
2158 cifs_write_allocate_pages(struct page **pages, unsigned long num_pages)
2163 for (i = 0; i < num_pages; i++) {
2164 pages[i] = alloc_page(GFP_KERNEL|__GFP_HIGHMEM);
2167 * save number of pages we have already allocated and
2168 * return with ENOMEM error
2177 for (i = 0; i < num_pages; i++)
2184 size_t get_numpages(const size_t wsize, const size_t len, size_t *cur_len)
2189 clen = min_t(const size_t, len, wsize);
2190 num_pages = DIV_ROUND_UP(clen, PAGE_SIZE);
2199 cifs_uncached_writev_complete(struct work_struct *work)
2202 struct cifs_writedata *wdata = container_of(work,
2203 struct cifs_writedata, work);
2204 struct inode *inode = wdata->cfile->dentry->d_inode;
2205 struct cifsInodeInfo *cifsi = CIFS_I(inode);
2207 spin_lock(&inode->i_lock);
2208 cifs_update_eof(cifsi, wdata->offset, wdata->bytes);
2209 if (cifsi->server_eof > inode->i_size)
2210 i_size_write(inode, cifsi->server_eof);
2211 spin_unlock(&inode->i_lock);
2213 complete(&wdata->done);
2215 if (wdata->result != -EAGAIN) {
2216 for (i = 0; i < wdata->nr_pages; i++)
2217 put_page(wdata->pages[i]);
2220 kref_put(&wdata->refcount, cifs_writedata_release);
2223 /* attempt to send write to server, retry on any -EAGAIN errors */
2225 cifs_uncached_retry_writev(struct cifs_writedata *wdata)
2228 struct TCP_Server_Info *server;
2230 server = tlink_tcon(wdata->cfile->tlink)->ses->server;
2233 if (wdata->cfile->invalidHandle) {
2234 rc = cifs_reopen_file(wdata->cfile, false);
2238 rc = server->ops->async_writev(wdata);
2239 } while (rc == -EAGAIN);
2245 cifs_iovec_write(struct file *file, const struct iovec *iov,
2246 unsigned long nr_segs, loff_t *poffset)
2248 unsigned long nr_pages, i;
2249 size_t copied, len, cur_len;
2250 ssize_t total_written = 0;
2253 struct cifsFileInfo *open_file;
2254 struct cifs_tcon *tcon;
2255 struct cifs_sb_info *cifs_sb;
2256 struct cifs_writedata *wdata, *tmp;
2257 struct list_head wdata_list;
2261 len = iov_length(iov, nr_segs);
2265 rc = generic_write_checks(file, poffset, &len, 0);
2269 INIT_LIST_HEAD(&wdata_list);
2270 cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
2271 open_file = file->private_data;
2272 tcon = tlink_tcon(open_file->tlink);
2274 if (!tcon->ses->server->ops->async_writev)
2279 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
2280 pid = open_file->pid;
2282 pid = current->tgid;
2284 iov_iter_init(&it, iov, nr_segs, len, 0);
2288 nr_pages = get_numpages(cifs_sb->wsize, len, &cur_len);
2289 wdata = cifs_writedata_alloc(nr_pages,
2290 cifs_uncached_writev_complete);
2296 rc = cifs_write_allocate_pages(wdata->pages, nr_pages);
2303 for (i = 0; i < nr_pages; i++) {
2304 copied = min_t(const size_t, cur_len, PAGE_SIZE);
2305 copied = iov_iter_copy_from_user(wdata->pages[i], &it,
2308 iov_iter_advance(&it, copied);
2310 cur_len = save_len - cur_len;
2312 wdata->sync_mode = WB_SYNC_ALL;
2313 wdata->nr_pages = nr_pages;
2314 wdata->offset = (__u64)offset;
2315 wdata->cfile = cifsFileInfo_get(open_file);
2317 wdata->bytes = cur_len;
2318 wdata->pagesz = PAGE_SIZE;
2319 wdata->tailsz = cur_len - ((nr_pages - 1) * PAGE_SIZE);
2320 rc = cifs_uncached_retry_writev(wdata);
2322 kref_put(&wdata->refcount, cifs_writedata_release);
2326 list_add_tail(&wdata->list, &wdata_list);
2332 * If at least one write was successfully sent, then discard any rc
2333 * value from the later writes. If the other write succeeds, then
2334 * we'll end up returning whatever was written. If it fails, then
2335 * we'll get a new rc value from that.
2337 if (!list_empty(&wdata_list))
2341 * Wait for and collect replies for any successful sends in order of
2342 * increasing offset. Once an error is hit or we get a fatal signal
2343 * while waiting, then return without waiting for any more replies.
2346 list_for_each_entry_safe(wdata, tmp, &wdata_list, list) {
2348 /* FIXME: freezable too? */
2349 rc = wait_for_completion_killable(&wdata->done);
2352 else if (wdata->result)
2355 total_written += wdata->bytes;
2357 /* resend call if it's a retryable error */
2358 if (rc == -EAGAIN) {
2359 rc = cifs_uncached_retry_writev(wdata);
2363 list_del_init(&wdata->list);
2364 kref_put(&wdata->refcount, cifs_writedata_release);
2367 if (total_written > 0)
2368 *poffset += total_written;
2370 cifs_stats_bytes_written(tcon, total_written);
2371 return total_written ? total_written : (ssize_t)rc;
2374 ssize_t cifs_user_writev(struct kiocb *iocb, const struct iovec *iov,
2375 unsigned long nr_segs, loff_t pos)
2378 struct inode *inode;
2380 inode = iocb->ki_filp->f_path.dentry->d_inode;
2383 * BB - optimize the way when signing is disabled. We can drop this
2384 * extra memory-to-memory copying and use iovec buffers for constructing
2388 written = cifs_iovec_write(iocb->ki_filp, iov, nr_segs, &pos);
2390 CIFS_I(inode)->invalid_mapping = true;
2397 ssize_t cifs_strict_writev(struct kiocb *iocb, const struct iovec *iov,
2398 unsigned long nr_segs, loff_t pos)
2400 struct inode *inode;
2402 inode = iocb->ki_filp->f_path.dentry->d_inode;
2404 if (CIFS_I(inode)->clientCanCacheAll)
2405 return generic_file_aio_write(iocb, iov, nr_segs, pos);
2408 * In strict cache mode we need to write the data to the server exactly
2409 * from the pos to pos+len-1 rather than flush all affected pages
2410 * because it may cause a error with mandatory locks on these pages but
2411 * not on the region from pos to ppos+len-1.
2414 return cifs_user_writev(iocb, iov, nr_segs, pos);
2417 static struct cifs_readdata *
2418 cifs_readdata_alloc(unsigned int nr_pages, work_func_t complete)
2420 struct cifs_readdata *rdata;
2422 rdata = kzalloc(sizeof(*rdata) + (sizeof(struct page *) * nr_pages),
2424 if (rdata != NULL) {
2425 kref_init(&rdata->refcount);
2426 INIT_LIST_HEAD(&rdata->list);
2427 init_completion(&rdata->done);
2428 INIT_WORK(&rdata->work, complete);
2435 cifs_readdata_release(struct kref *refcount)
2437 struct cifs_readdata *rdata = container_of(refcount,
2438 struct cifs_readdata, refcount);
2441 cifsFileInfo_put(rdata->cfile);
2447 cifs_read_allocate_pages(struct cifs_readdata *rdata, unsigned int nr_pages)
2453 for (i = 0; i < nr_pages; i++) {
2454 page = alloc_page(GFP_KERNEL|__GFP_HIGHMEM);
2459 rdata->pages[i] = page;
2463 for (i = 0; i < nr_pages; i++) {
2464 put_page(rdata->pages[i]);
2465 rdata->pages[i] = NULL;
2472 cifs_uncached_readdata_release(struct kref *refcount)
2474 struct cifs_readdata *rdata = container_of(refcount,
2475 struct cifs_readdata, refcount);
2478 for (i = 0; i < rdata->nr_pages; i++) {
2479 put_page(rdata->pages[i]);
2480 rdata->pages[i] = NULL;
2482 cifs_readdata_release(refcount);
2486 cifs_retry_async_readv(struct cifs_readdata *rdata)
2489 struct TCP_Server_Info *server;
2491 server = tlink_tcon(rdata->cfile->tlink)->ses->server;
2494 if (rdata->cfile->invalidHandle) {
2495 rc = cifs_reopen_file(rdata->cfile, true);
2499 rc = server->ops->async_readv(rdata);
2500 } while (rc == -EAGAIN);
2506 * cifs_readdata_to_iov - copy data from pages in response to an iovec
2507 * @rdata: the readdata response with list of pages holding data
2508 * @iov: vector in which we should copy the data
2509 * @nr_segs: number of segments in vector
2510 * @offset: offset into file of the first iovec
2511 * @copied: used to return the amount of data copied to the iov
2513 * This function copies data from a list of pages in a readdata response into
2514 * an array of iovecs. It will first calculate where the data should go
2515 * based on the info in the readdata and then copy the data into that spot.
2518 cifs_readdata_to_iov(struct cifs_readdata *rdata, const struct iovec *iov,
2519 unsigned long nr_segs, loff_t offset, ssize_t *copied)
2523 size_t pos = rdata->offset - offset;
2524 ssize_t remaining = rdata->bytes;
2525 unsigned char *pdata;
2528 /* set up iov_iter and advance to the correct offset */
2529 iov_iter_init(&ii, iov, nr_segs, iov_length(iov, nr_segs), 0);
2530 iov_iter_advance(&ii, pos);
2533 for (i = 0; i < rdata->nr_pages; i++) {
2535 struct page *page = rdata->pages[i];
2537 /* copy a whole page or whatever's left */
2538 copy = min_t(ssize_t, remaining, PAGE_SIZE);
2540 /* ...but limit it to whatever space is left in the iov */
2541 copy = min_t(ssize_t, copy, iov_iter_count(&ii));
2543 /* go while there's data to be copied and no errors */
2546 rc = memcpy_toiovecend(ii.iov, pdata, ii.iov_offset,
2552 iov_iter_advance(&ii, copy);
2561 cifs_uncached_readv_complete(struct work_struct *work)
2563 struct cifs_readdata *rdata = container_of(work,
2564 struct cifs_readdata, work);
2566 complete(&rdata->done);
2567 kref_put(&rdata->refcount, cifs_uncached_readdata_release);
2571 cifs_uncached_read_into_pages(struct TCP_Server_Info *server,
2572 struct cifs_readdata *rdata, unsigned int len)
2574 int total_read = 0, result = 0;
2576 unsigned int nr_pages = rdata->nr_pages;
2579 rdata->tailsz = PAGE_SIZE;
2580 for (i = 0; i < nr_pages; i++) {
2581 struct page *page = rdata->pages[i];
2583 if (len >= PAGE_SIZE) {
2584 /* enough data to fill the page */
2585 iov.iov_base = kmap(page);
2586 iov.iov_len = PAGE_SIZE;
2587 cFYI(1, "%u: iov_base=%p iov_len=%zu",
2588 i, iov.iov_base, iov.iov_len);
2590 } else if (len > 0) {
2591 /* enough for partial page, fill and zero the rest */
2592 iov.iov_base = kmap(page);
2594 cFYI(1, "%u: iov_base=%p iov_len=%zu",
2595 i, iov.iov_base, iov.iov_len);
2596 memset(iov.iov_base + len, '\0', PAGE_SIZE - len);
2597 rdata->tailsz = len;
2600 /* no need to hold page hostage */
2601 rdata->pages[i] = NULL;
2607 result = cifs_readv_from_socket(server, &iov, 1, iov.iov_len);
2612 total_read += result;
2615 return total_read > 0 ? total_read : result;
2619 cifs_iovec_read(struct file *file, const struct iovec *iov,
2620 unsigned long nr_segs, loff_t *poffset)
2623 size_t len, cur_len;
2624 ssize_t total_read = 0;
2625 loff_t offset = *poffset;
2626 unsigned int npages;
2627 struct cifs_sb_info *cifs_sb;
2628 struct cifs_tcon *tcon;
2629 struct cifsFileInfo *open_file;
2630 struct cifs_readdata *rdata, *tmp;
2631 struct list_head rdata_list;
2637 len = iov_length(iov, nr_segs);
2641 INIT_LIST_HEAD(&rdata_list);
2642 cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
2643 open_file = file->private_data;
2644 tcon = tlink_tcon(open_file->tlink);
2646 if (!tcon->ses->server->ops->async_readv)
2649 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
2650 pid = open_file->pid;
2652 pid = current->tgid;
2654 if ((file->f_flags & O_ACCMODE) == O_WRONLY)
2655 cFYI(1, "attempting read on write only file instance");
2658 cur_len = min_t(const size_t, len - total_read, cifs_sb->rsize);
2659 npages = DIV_ROUND_UP(cur_len, PAGE_SIZE);
2661 /* allocate a readdata struct */
2662 rdata = cifs_readdata_alloc(npages,
2663 cifs_uncached_readv_complete);
2669 rc = cifs_read_allocate_pages(rdata, npages);
2673 rdata->cfile = cifsFileInfo_get(open_file);
2674 rdata->nr_pages = npages;
2675 rdata->offset = offset;
2676 rdata->bytes = cur_len;
2678 rdata->pagesz = PAGE_SIZE;
2679 rdata->read_into_pages = cifs_uncached_read_into_pages;
2681 rc = cifs_retry_async_readv(rdata);
2684 kref_put(&rdata->refcount,
2685 cifs_uncached_readdata_release);
2689 list_add_tail(&rdata->list, &rdata_list);
2694 /* if at least one read request send succeeded, then reset rc */
2695 if (!list_empty(&rdata_list))
2698 /* the loop below should proceed in the order of increasing offsets */
2700 list_for_each_entry_safe(rdata, tmp, &rdata_list, list) {
2704 /* FIXME: freezable sleep too? */
2705 rc = wait_for_completion_killable(&rdata->done);
2708 else if (rdata->result)
2711 rc = cifs_readdata_to_iov(rdata, iov,
2714 total_read += copied;
2717 /* resend call if it's a retryable error */
2718 if (rc == -EAGAIN) {
2719 rc = cifs_retry_async_readv(rdata);
2723 list_del_init(&rdata->list);
2724 kref_put(&rdata->refcount, cifs_uncached_readdata_release);
2727 cifs_stats_bytes_read(tcon, total_read);
2728 *poffset += total_read;
2730 /* mask nodata case */
2734 return total_read ? total_read : rc;
2737 ssize_t cifs_user_readv(struct kiocb *iocb, const struct iovec *iov,
2738 unsigned long nr_segs, loff_t pos)
2742 read = cifs_iovec_read(iocb->ki_filp, iov, nr_segs, &pos);
2749 ssize_t cifs_strict_readv(struct kiocb *iocb, const struct iovec *iov,
2750 unsigned long nr_segs, loff_t pos)
2752 struct inode *inode;
2754 inode = iocb->ki_filp->f_path.dentry->d_inode;
2756 if (CIFS_I(inode)->clientCanCacheRead)
2757 return generic_file_aio_read(iocb, iov, nr_segs, pos);
2760 * In strict cache mode we need to read from the server all the time
2761 * if we don't have level II oplock because the server can delay mtime
2762 * change - so we can't make a decision about inode invalidating.
2763 * And we can also fail with pagereading if there are mandatory locks
2764 * on pages affected by this read but not on the region from pos to
2768 return cifs_user_readv(iocb, iov, nr_segs, pos);
2772 cifs_read(struct file *file, char *read_data, size_t read_size, loff_t *offset)
2775 unsigned int bytes_read = 0;
2776 unsigned int total_read;
2777 unsigned int current_read_size;
2779 struct cifs_sb_info *cifs_sb;
2780 struct cifs_tcon *tcon;
2781 struct TCP_Server_Info *server;
2784 struct cifsFileInfo *open_file;
2785 struct cifs_io_parms io_parms;
2786 int buf_type = CIFS_NO_BUFFER;
2790 cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
2792 /* FIXME: set up handlers for larger reads and/or convert to async */
2793 rsize = min_t(unsigned int, cifs_sb->rsize, CIFSMaxBufSize);
2795 if (file->private_data == NULL) {
2800 open_file = file->private_data;
2801 tcon = tlink_tcon(open_file->tlink);
2802 server = tcon->ses->server;
2804 if (!server->ops->sync_read) {
2809 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
2810 pid = open_file->pid;
2812 pid = current->tgid;
2814 if ((file->f_flags & O_ACCMODE) == O_WRONLY)
2815 cFYI(1, "attempting read on write only file instance");
2817 for (total_read = 0, cur_offset = read_data; read_size > total_read;
2818 total_read += bytes_read, cur_offset += bytes_read) {
2819 current_read_size = min_t(uint, read_size - total_read, rsize);
2821 * For windows me and 9x we do not want to request more than it
2822 * negotiated since it will refuse the read then.
2824 if ((tcon->ses) && !(tcon->ses->capabilities &
2825 tcon->ses->server->vals->cap_large_files)) {
2826 current_read_size = min_t(uint, current_read_size,
2830 while (rc == -EAGAIN) {
2831 if (open_file->invalidHandle) {
2832 rc = cifs_reopen_file(open_file, true);
2837 io_parms.tcon = tcon;
2838 io_parms.offset = *offset;
2839 io_parms.length = current_read_size;
2840 rc = server->ops->sync_read(xid, open_file, &io_parms,
2841 &bytes_read, &cur_offset,
2844 if (rc || (bytes_read == 0)) {
2852 cifs_stats_bytes_read(tcon, total_read);
2853 *offset += bytes_read;
2861 * If the page is mmap'ed into a process' page tables, then we need to make
2862 * sure that it doesn't change while being written back.
2865 cifs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
2867 struct page *page = vmf->page;
2870 return VM_FAULT_LOCKED;
2873 static struct vm_operations_struct cifs_file_vm_ops = {
2874 .fault = filemap_fault,
2875 .page_mkwrite = cifs_page_mkwrite,
2878 int cifs_file_strict_mmap(struct file *file, struct vm_area_struct *vma)
2881 struct inode *inode = file->f_path.dentry->d_inode;
2885 if (!CIFS_I(inode)->clientCanCacheRead) {
2886 rc = cifs_invalidate_mapping(inode);
2891 rc = generic_file_mmap(file, vma);
2893 vma->vm_ops = &cifs_file_vm_ops;
2898 int cifs_file_mmap(struct file *file, struct vm_area_struct *vma)
2903 rc = cifs_revalidate_file(file);
2905 cFYI(1, "Validation prior to mmap failed, error=%d", rc);
2909 rc = generic_file_mmap(file, vma);
2911 vma->vm_ops = &cifs_file_vm_ops;
2917 cifs_readv_complete(struct work_struct *work)
2920 struct cifs_readdata *rdata = container_of(work,
2921 struct cifs_readdata, work);
2923 for (i = 0; i < rdata->nr_pages; i++) {
2924 struct page *page = rdata->pages[i];
2926 lru_cache_add_file(page);
2928 if (rdata->result == 0) {
2929 flush_dcache_page(page);
2930 SetPageUptodate(page);
2935 if (rdata->result == 0)
2936 cifs_readpage_to_fscache(rdata->mapping->host, page);
2938 page_cache_release(page);
2939 rdata->pages[i] = NULL;
2941 kref_put(&rdata->refcount, cifs_readdata_release);
2945 cifs_readpages_read_into_pages(struct TCP_Server_Info *server,
2946 struct cifs_readdata *rdata, unsigned int len)
2948 int total_read = 0, result = 0;
2952 unsigned int nr_pages = rdata->nr_pages;
2955 /* determine the eof that the server (probably) has */
2956 eof = CIFS_I(rdata->mapping->host)->server_eof;
2957 eof_index = eof ? (eof - 1) >> PAGE_CACHE_SHIFT : 0;
2958 cFYI(1, "eof=%llu eof_index=%lu", eof, eof_index);
2960 rdata->tailsz = PAGE_CACHE_SIZE;
2961 for (i = 0; i < nr_pages; i++) {
2962 struct page *page = rdata->pages[i];
2964 if (len >= PAGE_CACHE_SIZE) {
2965 /* enough data to fill the page */
2966 iov.iov_base = kmap(page);
2967 iov.iov_len = PAGE_CACHE_SIZE;
2968 cFYI(1, "%u: idx=%lu iov_base=%p iov_len=%zu",
2969 i, page->index, iov.iov_base, iov.iov_len);
2970 len -= PAGE_CACHE_SIZE;
2971 } else if (len > 0) {
2972 /* enough for partial page, fill and zero the rest */
2973 iov.iov_base = kmap(page);
2975 cFYI(1, "%u: idx=%lu iov_base=%p iov_len=%zu",
2976 i, page->index, iov.iov_base, iov.iov_len);
2977 memset(iov.iov_base + len,
2978 '\0', PAGE_CACHE_SIZE - len);
2979 rdata->tailsz = len;
2981 } else if (page->index > eof_index) {
2983 * The VFS will not try to do readahead past the
2984 * i_size, but it's possible that we have outstanding
2985 * writes with gaps in the middle and the i_size hasn't
2986 * caught up yet. Populate those with zeroed out pages
2987 * to prevent the VFS from repeatedly attempting to
2988 * fill them until the writes are flushed.
2990 zero_user(page, 0, PAGE_CACHE_SIZE);
2991 lru_cache_add_file(page);
2992 flush_dcache_page(page);
2993 SetPageUptodate(page);
2995 page_cache_release(page);
2996 rdata->pages[i] = NULL;
3000 /* no need to hold page hostage */
3001 lru_cache_add_file(page);
3003 page_cache_release(page);
3004 rdata->pages[i] = NULL;
3009 result = cifs_readv_from_socket(server, &iov, 1, iov.iov_len);
3014 total_read += result;
3017 return total_read > 0 ? total_read : result;
3020 static int cifs_readpages(struct file *file, struct address_space *mapping,
3021 struct list_head *page_list, unsigned num_pages)
3024 struct list_head tmplist;
3025 struct cifsFileInfo *open_file = file->private_data;
3026 struct cifs_sb_info *cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
3027 unsigned int rsize = cifs_sb->rsize;
3031 * Give up immediately if rsize is too small to read an entire page.
3032 * The VFS will fall back to readpage. We should never reach this
3033 * point however since we set ra_pages to 0 when the rsize is smaller
3034 * than a cache page.
3036 if (unlikely(rsize < PAGE_CACHE_SIZE))
3040 * Reads as many pages as possible from fscache. Returns -ENOBUFS
3041 * immediately if the cookie is negative
3043 rc = cifs_readpages_from_fscache(mapping->host, mapping, page_list,
3048 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
3049 pid = open_file->pid;
3051 pid = current->tgid;
3054 INIT_LIST_HEAD(&tmplist);
3056 cFYI(1, "%s: file=%p mapping=%p num_pages=%u", __func__, file,
3057 mapping, num_pages);
3060 * Start with the page at end of list and move it to private
3061 * list. Do the same with any following pages until we hit
3062 * the rsize limit, hit an index discontinuity, or run out of
3063 * pages. Issue the async read and then start the loop again
3064 * until the list is empty.
3066 * Note that list order is important. The page_list is in
3067 * the order of declining indexes. When we put the pages in
3068 * the rdata->pages, then we want them in increasing order.
3070 while (!list_empty(page_list)) {
3072 unsigned int bytes = PAGE_CACHE_SIZE;
3073 unsigned int expected_index;
3074 unsigned int nr_pages = 1;
3076 struct page *page, *tpage;
3077 struct cifs_readdata *rdata;
3079 page = list_entry(page_list->prev, struct page, lru);
3082 * Lock the page and put it in the cache. Since no one else
3083 * should have access to this page, we're safe to simply set
3084 * PG_locked without checking it first.
3086 __set_page_locked(page);
3087 rc = add_to_page_cache_locked(page, mapping,
3088 page->index, GFP_KERNEL);
3090 /* give up if we can't stick it in the cache */
3092 __clear_page_locked(page);
3096 /* move first page to the tmplist */
3097 offset = (loff_t)page->index << PAGE_CACHE_SHIFT;
3098 list_move_tail(&page->lru, &tmplist);
3100 /* now try and add more pages onto the request */
3101 expected_index = page->index + 1;
3102 list_for_each_entry_safe_reverse(page, tpage, page_list, lru) {
3103 /* discontinuity ? */
3104 if (page->index != expected_index)
3107 /* would this page push the read over the rsize? */
3108 if (bytes + PAGE_CACHE_SIZE > rsize)
3111 __set_page_locked(page);
3112 if (add_to_page_cache_locked(page, mapping,
3113 page->index, GFP_KERNEL)) {
3114 __clear_page_locked(page);
3117 list_move_tail(&page->lru, &tmplist);
3118 bytes += PAGE_CACHE_SIZE;
3123 rdata = cifs_readdata_alloc(nr_pages, cifs_readv_complete);
3125 /* best to give up if we're out of mem */
3126 list_for_each_entry_safe(page, tpage, &tmplist, lru) {
3127 list_del(&page->lru);
3128 lru_cache_add_file(page);
3130 page_cache_release(page);
3136 rdata->cfile = cifsFileInfo_get(open_file);
3137 rdata->mapping = mapping;
3138 rdata->offset = offset;
3139 rdata->bytes = bytes;
3141 rdata->pagesz = PAGE_CACHE_SIZE;
3142 rdata->read_into_pages = cifs_readpages_read_into_pages;
3144 list_for_each_entry_safe(page, tpage, &tmplist, lru) {
3145 list_del(&page->lru);
3146 rdata->pages[rdata->nr_pages++] = page;
3149 rc = cifs_retry_async_readv(rdata);
3151 for (i = 0; i < rdata->nr_pages; i++) {
3152 page = rdata->pages[i];
3153 lru_cache_add_file(page);
3155 page_cache_release(page);
3157 kref_put(&rdata->refcount, cifs_readdata_release);
3161 kref_put(&rdata->refcount, cifs_readdata_release);
3167 static int cifs_readpage_worker(struct file *file, struct page *page,
3173 /* Is the page cached? */
3174 rc = cifs_readpage_from_fscache(file->f_path.dentry->d_inode, page);
3178 page_cache_get(page);
3179 read_data = kmap(page);
3180 /* for reads over a certain size could initiate async read ahead */
3182 rc = cifs_read(file, read_data, PAGE_CACHE_SIZE, poffset);
3187 cFYI(1, "Bytes read %d", rc);
3189 file->f_path.dentry->d_inode->i_atime =
3190 current_fs_time(file->f_path.dentry->d_inode->i_sb);
3192 if (PAGE_CACHE_SIZE > rc)
3193 memset(read_data + rc, 0, PAGE_CACHE_SIZE - rc);
3195 flush_dcache_page(page);
3196 SetPageUptodate(page);
3198 /* send this page to the cache */
3199 cifs_readpage_to_fscache(file->f_path.dentry->d_inode, page);
3205 page_cache_release(page);
3211 static int cifs_readpage(struct file *file, struct page *page)
3213 loff_t offset = (loff_t)page->index << PAGE_CACHE_SHIFT;
3219 if (file->private_data == NULL) {
3225 cFYI(1, "readpage %p at offset %d 0x%x",
3226 page, (int)offset, (int)offset);
3228 rc = cifs_readpage_worker(file, page, &offset);
3236 static int is_inode_writable(struct cifsInodeInfo *cifs_inode)
3238 struct cifsFileInfo *open_file;
3240 spin_lock(&cifs_file_list_lock);
3241 list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
3242 if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) {
3243 spin_unlock(&cifs_file_list_lock);
3247 spin_unlock(&cifs_file_list_lock);
3251 /* We do not want to update the file size from server for inodes
3252 open for write - to avoid races with writepage extending
3253 the file - in the future we could consider allowing
3254 refreshing the inode only on increases in the file size
3255 but this is tricky to do without racing with writebehind
3256 page caching in the current Linux kernel design */
3257 bool is_size_safe_to_change(struct cifsInodeInfo *cifsInode, __u64 end_of_file)
3262 if (is_inode_writable(cifsInode)) {
3263 /* This inode is open for write at least once */
3264 struct cifs_sb_info *cifs_sb;
3266 cifs_sb = CIFS_SB(cifsInode->vfs_inode.i_sb);
3267 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DIRECT_IO) {
3268 /* since no page cache to corrupt on directio
3269 we can change size safely */
3273 if (i_size_read(&cifsInode->vfs_inode) < end_of_file)
3281 static int cifs_write_begin(struct file *file, struct address_space *mapping,
3282 loff_t pos, unsigned len, unsigned flags,
3283 struct page **pagep, void **fsdata)
3285 pgoff_t index = pos >> PAGE_CACHE_SHIFT;
3286 loff_t offset = pos & (PAGE_CACHE_SIZE - 1);
3287 loff_t page_start = pos & PAGE_MASK;
3292 cFYI(1, "write_begin from %lld len %d", (long long)pos, len);
3294 page = grab_cache_page_write_begin(mapping, index, flags);
3300 if (PageUptodate(page))
3304 * If we write a full page it will be up to date, no need to read from
3305 * the server. If the write is short, we'll end up doing a sync write
3308 if (len == PAGE_CACHE_SIZE)
3312 * optimize away the read when we have an oplock, and we're not
3313 * expecting to use any of the data we'd be reading in. That
3314 * is, when the page lies beyond the EOF, or straddles the EOF
3315 * and the write will cover all of the existing data.
3317 if (CIFS_I(mapping->host)->clientCanCacheRead) {
3318 i_size = i_size_read(mapping->host);
3319 if (page_start >= i_size ||
3320 (offset == 0 && (pos + len) >= i_size)) {
3321 zero_user_segments(page, 0, offset,
3325 * PageChecked means that the parts of the page
3326 * to which we're not writing are considered up
3327 * to date. Once the data is copied to the
3328 * page, it can be set uptodate.
3330 SetPageChecked(page);
3335 if ((file->f_flags & O_ACCMODE) != O_WRONLY) {
3337 * might as well read a page, it is fast enough. If we get
3338 * an error, we don't need to return it. cifs_write_end will
3339 * do a sync write instead since PG_uptodate isn't set.
3341 cifs_readpage_worker(file, page, &page_start);
3343 /* we could try using another file handle if there is one -
3344 but how would we lock it to prevent close of that handle
3345 racing with this read? In any case
3346 this will be written out by write_end so is fine */
3353 static int cifs_release_page(struct page *page, gfp_t gfp)
3355 if (PagePrivate(page))
3358 return cifs_fscache_release_page(page, gfp);
3361 static void cifs_invalidate_page(struct page *page, unsigned long offset)
3363 struct cifsInodeInfo *cifsi = CIFS_I(page->mapping->host);
3366 cifs_fscache_invalidate_page(page, &cifsi->vfs_inode);
3369 static int cifs_launder_page(struct page *page)
3372 loff_t range_start = page_offset(page);
3373 loff_t range_end = range_start + (loff_t)(PAGE_CACHE_SIZE - 1);
3374 struct writeback_control wbc = {
3375 .sync_mode = WB_SYNC_ALL,
3377 .range_start = range_start,
3378 .range_end = range_end,
3381 cFYI(1, "Launder page: %p", page);
3383 if (clear_page_dirty_for_io(page))
3384 rc = cifs_writepage_locked(page, &wbc);
3386 cifs_fscache_invalidate_page(page, page->mapping->host);
3390 void cifs_oplock_break(struct work_struct *work)
3392 struct cifsFileInfo *cfile = container_of(work, struct cifsFileInfo,
3394 struct inode *inode = cfile->dentry->d_inode;
3395 struct cifsInodeInfo *cinode = CIFS_I(inode);
3396 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
3399 if (inode && S_ISREG(inode->i_mode)) {
3400 if (cinode->clientCanCacheRead)
3401 break_lease(inode, O_RDONLY);
3403 break_lease(inode, O_WRONLY);
3404 rc = filemap_fdatawrite(inode->i_mapping);
3405 if (cinode->clientCanCacheRead == 0) {
3406 rc = filemap_fdatawait(inode->i_mapping);
3407 mapping_set_error(inode->i_mapping, rc);
3408 invalidate_remote_inode(inode);
3410 cFYI(1, "Oplock flush inode %p rc %d", inode, rc);
3413 rc = cifs_push_locks(cfile);
3415 cERROR(1, "Push locks rc = %d", rc);
3418 * releasing stale oplock after recent reconnect of smb session using
3419 * a now incorrect file handle is not a data integrity issue but do
3420 * not bother sending an oplock release if session to server still is
3421 * disconnected since oplock already released by the server
3423 if (!cfile->oplock_break_cancelled) {
3424 rc = tcon->ses->server->ops->oplock_response(tcon, &cfile->fid,
3426 cFYI(1, "Oplock release rc = %d", rc);
3430 const struct address_space_operations cifs_addr_ops = {
3431 .readpage = cifs_readpage,
3432 .readpages = cifs_readpages,
3433 .writepage = cifs_writepage,
3434 .writepages = cifs_writepages,
3435 .write_begin = cifs_write_begin,
3436 .write_end = cifs_write_end,
3437 .set_page_dirty = __set_page_dirty_nobuffers,
3438 .releasepage = cifs_release_page,
3439 .invalidatepage = cifs_invalidate_page,
3440 .launder_page = cifs_launder_page,
3444 * cifs_readpages requires the server to support a buffer large enough to
3445 * contain the header plus one complete page of data. Otherwise, we need
3446 * to leave cifs_readpages out of the address space operations.
3448 const struct address_space_operations cifs_addr_ops_smallbuf = {
3449 .readpage = cifs_readpage,
3450 .writepage = cifs_writepage,
3451 .writepages = cifs_writepages,
3452 .write_begin = cifs_write_begin,
3453 .write_end = cifs_write_end,
3454 .set_page_dirty = __set_page_dirty_nobuffers,
3455 .releasepage = cifs_release_page,
3456 .invalidatepage = cifs_invalidate_page,
3457 .launder_page = cifs_launder_page,