CIFS: Fix write after setting a read lock for read oplock files
[platform/adaptation/renesas_rcar/renesas_kernel.git] / fs / cifs / file.c
1 /*
2  *   fs/cifs/file.c
3  *
4  *   vfs operations that deal with files
5  *
6  *   Copyright (C) International Business Machines  Corp., 2002,2010
7  *   Author(s): Steve French (sfrench@us.ibm.com)
8  *              Jeremy Allison (jra@samba.org)
9  *
10  *   This library is free software; you can redistribute it and/or modify
11  *   it under the terms of the GNU Lesser General Public License as published
12  *   by the Free Software Foundation; either version 2.1 of the License, or
13  *   (at your option) any later version.
14  *
15  *   This library is distributed in the hope that it will be useful,
16  *   but WITHOUT ANY WARRANTY; without even the implied warranty of
17  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
18  *   the GNU Lesser General Public License for more details.
19  *
20  *   You should have received a copy of the GNU Lesser General Public License
21  *   along with this library; if not, write to the Free Software
22  *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
23  */
24 #include <linux/fs.h>
25 #include <linux/backing-dev.h>
26 #include <linux/stat.h>
27 #include <linux/fcntl.h>
28 #include <linux/pagemap.h>
29 #include <linux/pagevec.h>
30 #include <linux/writeback.h>
31 #include <linux/task_io_accounting_ops.h>
32 #include <linux/delay.h>
33 #include <linux/mount.h>
34 #include <linux/slab.h>
35 #include <linux/swap.h>
36 #include <asm/div64.h>
37 #include "cifsfs.h"
38 #include "cifspdu.h"
39 #include "cifsglob.h"
40 #include "cifsproto.h"
41 #include "cifs_unicode.h"
42 #include "cifs_debug.h"
43 #include "cifs_fs_sb.h"
44 #include "fscache.h"
45
46 static inline int cifs_convert_flags(unsigned int flags)
47 {
48         if ((flags & O_ACCMODE) == O_RDONLY)
49                 return GENERIC_READ;
50         else if ((flags & O_ACCMODE) == O_WRONLY)
51                 return GENERIC_WRITE;
52         else if ((flags & O_ACCMODE) == O_RDWR) {
53                 /* GENERIC_ALL is too much permission to request
54                    can cause unnecessary access denied on create */
55                 /* return GENERIC_ALL; */
56                 return (GENERIC_READ | GENERIC_WRITE);
57         }
58
59         return (READ_CONTROL | FILE_WRITE_ATTRIBUTES | FILE_READ_ATTRIBUTES |
60                 FILE_WRITE_EA | FILE_APPEND_DATA | FILE_WRITE_DATA |
61                 FILE_READ_DATA);
62 }
63
64 static u32 cifs_posix_convert_flags(unsigned int flags)
65 {
66         u32 posix_flags = 0;
67
68         if ((flags & O_ACCMODE) == O_RDONLY)
69                 posix_flags = SMB_O_RDONLY;
70         else if ((flags & O_ACCMODE) == O_WRONLY)
71                 posix_flags = SMB_O_WRONLY;
72         else if ((flags & O_ACCMODE) == O_RDWR)
73                 posix_flags = SMB_O_RDWR;
74
75         if (flags & O_CREAT)
76                 posix_flags |= SMB_O_CREAT;
77         if (flags & O_EXCL)
78                 posix_flags |= SMB_O_EXCL;
79         if (flags & O_TRUNC)
80                 posix_flags |= SMB_O_TRUNC;
81         /* be safe and imply O_SYNC for O_DSYNC */
82         if (flags & O_DSYNC)
83                 posix_flags |= SMB_O_SYNC;
84         if (flags & O_DIRECTORY)
85                 posix_flags |= SMB_O_DIRECTORY;
86         if (flags & O_NOFOLLOW)
87                 posix_flags |= SMB_O_NOFOLLOW;
88         if (flags & O_DIRECT)
89                 posix_flags |= SMB_O_DIRECT;
90
91         return posix_flags;
92 }
93
94 static inline int cifs_get_disposition(unsigned int flags)
95 {
96         if ((flags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL))
97                 return FILE_CREATE;
98         else if ((flags & (O_CREAT | O_TRUNC)) == (O_CREAT | O_TRUNC))
99                 return FILE_OVERWRITE_IF;
100         else if ((flags & O_CREAT) == O_CREAT)
101                 return FILE_OPEN_IF;
102         else if ((flags & O_TRUNC) == O_TRUNC)
103                 return FILE_OVERWRITE;
104         else
105                 return FILE_OPEN;
106 }
107
108 int cifs_posix_open(char *full_path, struct inode **pinode,
109                         struct super_block *sb, int mode, unsigned int f_flags,
110                         __u32 *poplock, __u16 *pnetfid, unsigned int xid)
111 {
112         int rc;
113         FILE_UNIX_BASIC_INFO *presp_data;
114         __u32 posix_flags = 0;
115         struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
116         struct cifs_fattr fattr;
117         struct tcon_link *tlink;
118         struct cifs_tcon *tcon;
119
120         cFYI(1, "posix open %s", full_path);
121
122         presp_data = kzalloc(sizeof(FILE_UNIX_BASIC_INFO), GFP_KERNEL);
123         if (presp_data == NULL)
124                 return -ENOMEM;
125
126         tlink = cifs_sb_tlink(cifs_sb);
127         if (IS_ERR(tlink)) {
128                 rc = PTR_ERR(tlink);
129                 goto posix_open_ret;
130         }
131
132         tcon = tlink_tcon(tlink);
133         mode &= ~current_umask();
134
135         posix_flags = cifs_posix_convert_flags(f_flags);
136         rc = CIFSPOSIXCreate(xid, tcon, posix_flags, mode, pnetfid, presp_data,
137                              poplock, full_path, cifs_sb->local_nls,
138                              cifs_sb->mnt_cifs_flags &
139                                         CIFS_MOUNT_MAP_SPECIAL_CHR);
140         cifs_put_tlink(tlink);
141
142         if (rc)
143                 goto posix_open_ret;
144
145         if (presp_data->Type == cpu_to_le32(-1))
146                 goto posix_open_ret; /* open ok, caller does qpathinfo */
147
148         if (!pinode)
149                 goto posix_open_ret; /* caller does not need info */
150
151         cifs_unix_basic_to_fattr(&fattr, presp_data, cifs_sb);
152
153         /* get new inode and set it up */
154         if (*pinode == NULL) {
155                 cifs_fill_uniqueid(sb, &fattr);
156                 *pinode = cifs_iget(sb, &fattr);
157                 if (!*pinode) {
158                         rc = -ENOMEM;
159                         goto posix_open_ret;
160                 }
161         } else {
162                 cifs_fattr_to_inode(*pinode, &fattr);
163         }
164
165 posix_open_ret:
166         kfree(presp_data);
167         return rc;
168 }
169
170 static int
171 cifs_nt_open(char *full_path, struct inode *inode, struct cifs_sb_info *cifs_sb,
172              struct cifs_tcon *tcon, unsigned int f_flags, __u32 *oplock,
173              struct cifs_fid *fid, unsigned int xid)
174 {
175         int rc;
176         int desired_access;
177         int disposition;
178         int create_options = CREATE_NOT_DIR;
179         FILE_ALL_INFO *buf;
180         struct TCP_Server_Info *server = tcon->ses->server;
181
182         if (!server->ops->open)
183                 return -ENOSYS;
184
185         desired_access = cifs_convert_flags(f_flags);
186
187 /*********************************************************************
188  *  open flag mapping table:
189  *
190  *      POSIX Flag            CIFS Disposition
191  *      ----------            ----------------
192  *      O_CREAT               FILE_OPEN_IF
193  *      O_CREAT | O_EXCL      FILE_CREATE
194  *      O_CREAT | O_TRUNC     FILE_OVERWRITE_IF
195  *      O_TRUNC               FILE_OVERWRITE
196  *      none of the above     FILE_OPEN
197  *
198  *      Note that there is not a direct match between disposition
199  *      FILE_SUPERSEDE (ie create whether or not file exists although
200  *      O_CREAT | O_TRUNC is similar but truncates the existing
201  *      file rather than creating a new file as FILE_SUPERSEDE does
202  *      (which uses the attributes / metadata passed in on open call)
203  *?
204  *?  O_SYNC is a reasonable match to CIFS writethrough flag
205  *?  and the read write flags match reasonably.  O_LARGEFILE
206  *?  is irrelevant because largefile support is always used
207  *?  by this client. Flags O_APPEND, O_DIRECT, O_DIRECTORY,
208  *       O_FASYNC, O_NOFOLLOW, O_NONBLOCK need further investigation
209  *********************************************************************/
210
211         disposition = cifs_get_disposition(f_flags);
212
213         /* BB pass O_SYNC flag through on file attributes .. BB */
214
215         buf = kmalloc(sizeof(FILE_ALL_INFO), GFP_KERNEL);
216         if (!buf)
217                 return -ENOMEM;
218
219         if (backup_cred(cifs_sb))
220                 create_options |= CREATE_OPEN_BACKUP_INTENT;
221
222         rc = server->ops->open(xid, tcon, full_path, disposition,
223                                desired_access, create_options, fid, oplock, buf,
224                                cifs_sb);
225
226         if (rc)
227                 goto out;
228
229         if (tcon->unix_ext)
230                 rc = cifs_get_inode_info_unix(&inode, full_path, inode->i_sb,
231                                               xid);
232         else
233                 rc = cifs_get_inode_info(&inode, full_path, buf, inode->i_sb,
234                                          xid, &fid->netfid);
235
236 out:
237         kfree(buf);
238         return rc;
239 }
240
241 struct cifsFileInfo *
242 cifs_new_fileinfo(struct cifs_fid *fid, struct file *file,
243                   struct tcon_link *tlink, __u32 oplock)
244 {
245         struct dentry *dentry = file->f_path.dentry;
246         struct inode *inode = dentry->d_inode;
247         struct cifsInodeInfo *cinode = CIFS_I(inode);
248         struct cifsFileInfo *cfile;
249         struct cifs_fid_locks *fdlocks;
250         struct cifs_tcon *tcon = tlink_tcon(tlink);
251
252         cfile = kzalloc(sizeof(struct cifsFileInfo), GFP_KERNEL);
253         if (cfile == NULL)
254                 return cfile;
255
256         fdlocks = kzalloc(sizeof(struct cifs_fid_locks), GFP_KERNEL);
257         if (!fdlocks) {
258                 kfree(cfile);
259                 return NULL;
260         }
261
262         INIT_LIST_HEAD(&fdlocks->locks);
263         fdlocks->cfile = cfile;
264         cfile->llist = fdlocks;
265         down_write(&cinode->lock_sem);
266         list_add(&fdlocks->llist, &cinode->llist);
267         up_write(&cinode->lock_sem);
268
269         cfile->count = 1;
270         cfile->pid = current->tgid;
271         cfile->uid = current_fsuid();
272         cfile->dentry = dget(dentry);
273         cfile->f_flags = file->f_flags;
274         cfile->invalidHandle = false;
275         cfile->tlink = cifs_get_tlink(tlink);
276         INIT_WORK(&cfile->oplock_break, cifs_oplock_break);
277         mutex_init(&cfile->fh_mutex);
278
279         spin_lock(&cifs_file_list_lock);
280         if (fid->pending_open->oplock != CIFS_OPLOCK_NO_CHANGE)
281                 oplock = fid->pending_open->oplock;
282         list_del(&fid->pending_open->olist);
283
284         tlink_tcon(tlink)->ses->server->ops->set_fid(cfile, fid, oplock);
285
286         list_add(&cfile->tlist, &tcon->openFileList);
287         /* if readable file instance put first in list*/
288         if (file->f_mode & FMODE_READ)
289                 list_add(&cfile->flist, &cinode->openFileList);
290         else
291                 list_add_tail(&cfile->flist, &cinode->openFileList);
292         spin_unlock(&cifs_file_list_lock);
293
294         file->private_data = cfile;
295         return cfile;
296 }
297
298 struct cifsFileInfo *
299 cifsFileInfo_get(struct cifsFileInfo *cifs_file)
300 {
301         spin_lock(&cifs_file_list_lock);
302         cifsFileInfo_get_locked(cifs_file);
303         spin_unlock(&cifs_file_list_lock);
304         return cifs_file;
305 }
306
307 /*
308  * Release a reference on the file private data. This may involve closing
309  * the filehandle out on the server. Must be called without holding
310  * cifs_file_list_lock.
311  */
312 void cifsFileInfo_put(struct cifsFileInfo *cifs_file)
313 {
314         struct inode *inode = cifs_file->dentry->d_inode;
315         struct cifs_tcon *tcon = tlink_tcon(cifs_file->tlink);
316         struct TCP_Server_Info *server = tcon->ses->server;
317         struct cifsInodeInfo *cifsi = CIFS_I(inode);
318         struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
319         struct cifsLockInfo *li, *tmp;
320         struct cifs_fid fid;
321         struct cifs_pending_open open;
322
323         spin_lock(&cifs_file_list_lock);
324         if (--cifs_file->count > 0) {
325                 spin_unlock(&cifs_file_list_lock);
326                 return;
327         }
328
329         if (server->ops->get_lease_key)
330                 server->ops->get_lease_key(inode, &fid);
331
332         /* store open in pending opens to make sure we don't miss lease break */
333         cifs_add_pending_open_locked(&fid, cifs_file->tlink, &open);
334
335         /* remove it from the lists */
336         list_del(&cifs_file->flist);
337         list_del(&cifs_file->tlist);
338
339         if (list_empty(&cifsi->openFileList)) {
340                 cFYI(1, "closing last open instance for inode %p",
341                         cifs_file->dentry->d_inode);
342                 /*
343                  * In strict cache mode we need invalidate mapping on the last
344                  * close  because it may cause a error when we open this file
345                  * again and get at least level II oplock.
346                  */
347                 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO)
348                         CIFS_I(inode)->invalid_mapping = true;
349                 cifs_set_oplock_level(cifsi, 0);
350         }
351         spin_unlock(&cifs_file_list_lock);
352
353         cancel_work_sync(&cifs_file->oplock_break);
354
355         if (!tcon->need_reconnect && !cifs_file->invalidHandle) {
356                 struct TCP_Server_Info *server = tcon->ses->server;
357                 unsigned int xid;
358
359                 xid = get_xid();
360                 if (server->ops->close)
361                         server->ops->close(xid, tcon, &cifs_file->fid);
362                 _free_xid(xid);
363         }
364
365         cifs_del_pending_open(&open);
366
367         /*
368          * Delete any outstanding lock records. We'll lose them when the file
369          * is closed anyway.
370          */
371         down_write(&cifsi->lock_sem);
372         list_for_each_entry_safe(li, tmp, &cifs_file->llist->locks, llist) {
373                 list_del(&li->llist);
374                 cifs_del_lock_waiters(li);
375                 kfree(li);
376         }
377         list_del(&cifs_file->llist->llist);
378         kfree(cifs_file->llist);
379         up_write(&cifsi->lock_sem);
380
381         cifs_put_tlink(cifs_file->tlink);
382         dput(cifs_file->dentry);
383         kfree(cifs_file);
384 }
385
386 int cifs_open(struct inode *inode, struct file *file)
387
388 {
389         int rc = -EACCES;
390         unsigned int xid;
391         __u32 oplock;
392         struct cifs_sb_info *cifs_sb;
393         struct TCP_Server_Info *server;
394         struct cifs_tcon *tcon;
395         struct tcon_link *tlink;
396         struct cifsFileInfo *cfile = NULL;
397         char *full_path = NULL;
398         bool posix_open_ok = false;
399         struct cifs_fid fid;
400         struct cifs_pending_open open;
401
402         xid = get_xid();
403
404         cifs_sb = CIFS_SB(inode->i_sb);
405         tlink = cifs_sb_tlink(cifs_sb);
406         if (IS_ERR(tlink)) {
407                 free_xid(xid);
408                 return PTR_ERR(tlink);
409         }
410         tcon = tlink_tcon(tlink);
411         server = tcon->ses->server;
412
413         full_path = build_path_from_dentry(file->f_path.dentry);
414         if (full_path == NULL) {
415                 rc = -ENOMEM;
416                 goto out;
417         }
418
419         cFYI(1, "inode = 0x%p file flags are 0x%x for %s",
420                  inode, file->f_flags, full_path);
421
422         if (server->oplocks)
423                 oplock = REQ_OPLOCK;
424         else
425                 oplock = 0;
426
427         if (!tcon->broken_posix_open && tcon->unix_ext &&
428             cap_unix(tcon->ses) && (CIFS_UNIX_POSIX_PATH_OPS_CAP &
429                                 le64_to_cpu(tcon->fsUnixInfo.Capability))) {
430                 /* can not refresh inode info since size could be stale */
431                 rc = cifs_posix_open(full_path, &inode, inode->i_sb,
432                                 cifs_sb->mnt_file_mode /* ignored */,
433                                 file->f_flags, &oplock, &fid.netfid, xid);
434                 if (rc == 0) {
435                         cFYI(1, "posix open succeeded");
436                         posix_open_ok = true;
437                 } else if ((rc == -EINVAL) || (rc == -EOPNOTSUPP)) {
438                         if (tcon->ses->serverNOS)
439                                 cERROR(1, "server %s of type %s returned"
440                                            " unexpected error on SMB posix open"
441                                            ", disabling posix open support."
442                                            " Check if server update available.",
443                                            tcon->ses->serverName,
444                                            tcon->ses->serverNOS);
445                         tcon->broken_posix_open = true;
446                 } else if ((rc != -EIO) && (rc != -EREMOTE) &&
447                          (rc != -EOPNOTSUPP)) /* path not found or net err */
448                         goto out;
449                 /*
450                  * Else fallthrough to retry open the old way on network i/o
451                  * or DFS errors.
452                  */
453         }
454
455         if (server->ops->get_lease_key)
456                 server->ops->get_lease_key(inode, &fid);
457
458         cifs_add_pending_open(&fid, tlink, &open);
459
460         if (!posix_open_ok) {
461                 if (server->ops->get_lease_key)
462                         server->ops->get_lease_key(inode, &fid);
463
464                 rc = cifs_nt_open(full_path, inode, cifs_sb, tcon,
465                                   file->f_flags, &oplock, &fid, xid);
466                 if (rc) {
467                         cifs_del_pending_open(&open);
468                         goto out;
469                 }
470         }
471
472         cfile = cifs_new_fileinfo(&fid, file, tlink, oplock);
473         if (cfile == NULL) {
474                 if (server->ops->close)
475                         server->ops->close(xid, tcon, &fid);
476                 cifs_del_pending_open(&open);
477                 rc = -ENOMEM;
478                 goto out;
479         }
480
481         cifs_fscache_set_inode_cookie(inode, file);
482
483         if ((oplock & CIFS_CREATE_ACTION) && !posix_open_ok && tcon->unix_ext) {
484                 /*
485                  * Time to set mode which we can not set earlier due to
486                  * problems creating new read-only files.
487                  */
488                 struct cifs_unix_set_info_args args = {
489                         .mode   = inode->i_mode,
490                         .uid    = NO_CHANGE_64,
491                         .gid    = NO_CHANGE_64,
492                         .ctime  = NO_CHANGE_64,
493                         .atime  = NO_CHANGE_64,
494                         .mtime  = NO_CHANGE_64,
495                         .device = 0,
496                 };
497                 CIFSSMBUnixSetFileInfo(xid, tcon, &args, fid.netfid,
498                                        cfile->pid);
499         }
500
501 out:
502         kfree(full_path);
503         free_xid(xid);
504         cifs_put_tlink(tlink);
505         return rc;
506 }
507
508 static int cifs_push_posix_locks(struct cifsFileInfo *cfile);
509
510 /*
511  * Try to reacquire byte range locks that were released when session
512  * to server was lost.
513  */
514 static int
515 cifs_relock_file(struct cifsFileInfo *cfile)
516 {
517         struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
518         struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode);
519         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
520         int rc = 0;
521
522         /* we are going to update can_cache_brlcks here - need a write access */
523         down_write(&cinode->lock_sem);
524         if (cinode->can_cache_brlcks) {
525                 /* can cache locks - no need to push them */
526                 up_write(&cinode->lock_sem);
527                 return rc;
528         }
529
530         if (cap_unix(tcon->ses) &&
531             (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
532             ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
533                 rc = cifs_push_posix_locks(cfile);
534         else
535                 rc = tcon->ses->server->ops->push_mand_locks(cfile);
536
537         up_write(&cinode->lock_sem);
538         return rc;
539 }
540
541 static int
542 cifs_reopen_file(struct cifsFileInfo *cfile, bool can_flush)
543 {
544         int rc = -EACCES;
545         unsigned int xid;
546         __u32 oplock;
547         struct cifs_sb_info *cifs_sb;
548         struct cifs_tcon *tcon;
549         struct TCP_Server_Info *server;
550         struct cifsInodeInfo *cinode;
551         struct inode *inode;
552         char *full_path = NULL;
553         int desired_access;
554         int disposition = FILE_OPEN;
555         int create_options = CREATE_NOT_DIR;
556         struct cifs_fid fid;
557
558         xid = get_xid();
559         mutex_lock(&cfile->fh_mutex);
560         if (!cfile->invalidHandle) {
561                 mutex_unlock(&cfile->fh_mutex);
562                 rc = 0;
563                 free_xid(xid);
564                 return rc;
565         }
566
567         inode = cfile->dentry->d_inode;
568         cifs_sb = CIFS_SB(inode->i_sb);
569         tcon = tlink_tcon(cfile->tlink);
570         server = tcon->ses->server;
571
572         /*
573          * Can not grab rename sem here because various ops, including those
574          * that already have the rename sem can end up causing writepage to get
575          * called and if the server was down that means we end up here, and we
576          * can never tell if the caller already has the rename_sem.
577          */
578         full_path = build_path_from_dentry(cfile->dentry);
579         if (full_path == NULL) {
580                 rc = -ENOMEM;
581                 mutex_unlock(&cfile->fh_mutex);
582                 free_xid(xid);
583                 return rc;
584         }
585
586         cFYI(1, "inode = 0x%p file flags 0x%x for %s", inode, cfile->f_flags,
587              full_path);
588
589         if (tcon->ses->server->oplocks)
590                 oplock = REQ_OPLOCK;
591         else
592                 oplock = 0;
593
594         if (tcon->unix_ext && cap_unix(tcon->ses) &&
595             (CIFS_UNIX_POSIX_PATH_OPS_CAP &
596                                 le64_to_cpu(tcon->fsUnixInfo.Capability))) {
597                 /*
598                  * O_CREAT, O_EXCL and O_TRUNC already had their effect on the
599                  * original open. Must mask them off for a reopen.
600                  */
601                 unsigned int oflags = cfile->f_flags &
602                                                 ~(O_CREAT | O_EXCL | O_TRUNC);
603
604                 rc = cifs_posix_open(full_path, NULL, inode->i_sb,
605                                      cifs_sb->mnt_file_mode /* ignored */,
606                                      oflags, &oplock, &fid.netfid, xid);
607                 if (rc == 0) {
608                         cFYI(1, "posix reopen succeeded");
609                         goto reopen_success;
610                 }
611                 /*
612                  * fallthrough to retry open the old way on errors, especially
613                  * in the reconnect path it is important to retry hard
614                  */
615         }
616
617         desired_access = cifs_convert_flags(cfile->f_flags);
618
619         if (backup_cred(cifs_sb))
620                 create_options |= CREATE_OPEN_BACKUP_INTENT;
621
622         if (server->ops->get_lease_key)
623                 server->ops->get_lease_key(inode, &fid);
624
625         /*
626          * Can not refresh inode by passing in file_info buf to be returned by
627          * CIFSSMBOpen and then calling get_inode_info with returned buf since
628          * file might have write behind data that needs to be flushed and server
629          * version of file size can be stale. If we knew for sure that inode was
630          * not dirty locally we could do this.
631          */
632         rc = server->ops->open(xid, tcon, full_path, disposition,
633                                desired_access, create_options, &fid, &oplock,
634                                NULL, cifs_sb);
635         if (rc) {
636                 mutex_unlock(&cfile->fh_mutex);
637                 cFYI(1, "cifs_reopen returned 0x%x", rc);
638                 cFYI(1, "oplock: %d", oplock);
639                 goto reopen_error_exit;
640         }
641
642 reopen_success:
643         cfile->invalidHandle = false;
644         mutex_unlock(&cfile->fh_mutex);
645         cinode = CIFS_I(inode);
646
647         if (can_flush) {
648                 rc = filemap_write_and_wait(inode->i_mapping);
649                 mapping_set_error(inode->i_mapping, rc);
650
651                 if (tcon->unix_ext)
652                         rc = cifs_get_inode_info_unix(&inode, full_path,
653                                                       inode->i_sb, xid);
654                 else
655                         rc = cifs_get_inode_info(&inode, full_path, NULL,
656                                                  inode->i_sb, xid, NULL);
657         }
658         /*
659          * Else we are writing out data to server already and could deadlock if
660          * we tried to flush data, and since we do not know if we have data that
661          * would invalidate the current end of file on the server we can not go
662          * to the server to get the new inode info.
663          */
664
665         server->ops->set_fid(cfile, &fid, oplock);
666         cifs_relock_file(cfile);
667
668 reopen_error_exit:
669         kfree(full_path);
670         free_xid(xid);
671         return rc;
672 }
673
674 int cifs_close(struct inode *inode, struct file *file)
675 {
676         if (file->private_data != NULL) {
677                 cifsFileInfo_put(file->private_data);
678                 file->private_data = NULL;
679         }
680
681         /* return code from the ->release op is always ignored */
682         return 0;
683 }
684
685 int cifs_closedir(struct inode *inode, struct file *file)
686 {
687         int rc = 0;
688         unsigned int xid;
689         struct cifsFileInfo *cfile = file->private_data;
690         struct cifs_tcon *tcon;
691         struct TCP_Server_Info *server;
692         char *buf;
693
694         cFYI(1, "Closedir inode = 0x%p", inode);
695
696         if (cfile == NULL)
697                 return rc;
698
699         xid = get_xid();
700         tcon = tlink_tcon(cfile->tlink);
701         server = tcon->ses->server;
702
703         cFYI(1, "Freeing private data in close dir");
704         spin_lock(&cifs_file_list_lock);
705         if (!cfile->srch_inf.endOfSearch && !cfile->invalidHandle) {
706                 cfile->invalidHandle = true;
707                 spin_unlock(&cifs_file_list_lock);
708                 if (server->ops->close_dir)
709                         rc = server->ops->close_dir(xid, tcon, &cfile->fid);
710                 else
711                         rc = -ENOSYS;
712                 cFYI(1, "Closing uncompleted readdir with rc %d", rc);
713                 /* not much we can do if it fails anyway, ignore rc */
714                 rc = 0;
715         } else
716                 spin_unlock(&cifs_file_list_lock);
717
718         buf = cfile->srch_inf.ntwrk_buf_start;
719         if (buf) {
720                 cFYI(1, "closedir free smb buf in srch struct");
721                 cfile->srch_inf.ntwrk_buf_start = NULL;
722                 if (cfile->srch_inf.smallBuf)
723                         cifs_small_buf_release(buf);
724                 else
725                         cifs_buf_release(buf);
726         }
727
728         cifs_put_tlink(cfile->tlink);
729         kfree(file->private_data);
730         file->private_data = NULL;
731         /* BB can we lock the filestruct while this is going on? */
732         free_xid(xid);
733         return rc;
734 }
735
736 static struct cifsLockInfo *
737 cifs_lock_init(__u64 offset, __u64 length, __u8 type)
738 {
739         struct cifsLockInfo *lock =
740                 kmalloc(sizeof(struct cifsLockInfo), GFP_KERNEL);
741         if (!lock)
742                 return lock;
743         lock->offset = offset;
744         lock->length = length;
745         lock->type = type;
746         lock->pid = current->tgid;
747         INIT_LIST_HEAD(&lock->blist);
748         init_waitqueue_head(&lock->block_q);
749         return lock;
750 }
751
752 void
753 cifs_del_lock_waiters(struct cifsLockInfo *lock)
754 {
755         struct cifsLockInfo *li, *tmp;
756         list_for_each_entry_safe(li, tmp, &lock->blist, blist) {
757                 list_del_init(&li->blist);
758                 wake_up(&li->block_q);
759         }
760 }
761
762 #define CIFS_LOCK_OP    0
763 #define CIFS_READ_OP    1
764 #define CIFS_WRITE_OP   2
765
766 /* @rw_check : 0 - no op, 1 - read, 2 - write */
767 static bool
768 cifs_find_fid_lock_conflict(struct cifs_fid_locks *fdlocks, __u64 offset,
769                             __u64 length, __u8 type, struct cifsFileInfo *cfile,
770                             struct cifsLockInfo **conf_lock, int rw_check)
771 {
772         struct cifsLockInfo *li;
773         struct cifsFileInfo *cur_cfile = fdlocks->cfile;
774         struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
775
776         list_for_each_entry(li, &fdlocks->locks, llist) {
777                 if (offset + length <= li->offset ||
778                     offset >= li->offset + li->length)
779                         continue;
780                 if (rw_check != CIFS_LOCK_OP && current->tgid == li->pid &&
781                     server->ops->compare_fids(cfile, cur_cfile)) {
782                         /* shared lock prevents write op through the same fid */
783                         if (!(li->type & server->vals->shared_lock_type) ||
784                             rw_check != CIFS_WRITE_OP)
785                                 continue;
786                 }
787                 if ((type & server->vals->shared_lock_type) &&
788                     ((server->ops->compare_fids(cfile, cur_cfile) &&
789                      current->tgid == li->pid) || type == li->type))
790                         continue;
791                 if (conf_lock)
792                         *conf_lock = li;
793                 return true;
794         }
795         return false;
796 }
797
798 bool
799 cifs_find_lock_conflict(struct cifsFileInfo *cfile, __u64 offset, __u64 length,
800                         __u8 type, struct cifsLockInfo **conf_lock,
801                         int rw_check)
802 {
803         bool rc = false;
804         struct cifs_fid_locks *cur;
805         struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode);
806
807         list_for_each_entry(cur, &cinode->llist, llist) {
808                 rc = cifs_find_fid_lock_conflict(cur, offset, length, type,
809                                                  cfile, conf_lock, rw_check);
810                 if (rc)
811                         break;
812         }
813
814         return rc;
815 }
816
817 /*
818  * Check if there is another lock that prevents us to set the lock (mandatory
819  * style). If such a lock exists, update the flock structure with its
820  * properties. Otherwise, set the flock type to F_UNLCK if we can cache brlocks
821  * or leave it the same if we can't. Returns 0 if we don't need to request to
822  * the server or 1 otherwise.
823  */
824 static int
825 cifs_lock_test(struct cifsFileInfo *cfile, __u64 offset, __u64 length,
826                __u8 type, struct file_lock *flock)
827 {
828         int rc = 0;
829         struct cifsLockInfo *conf_lock;
830         struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode);
831         struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
832         bool exist;
833
834         down_read(&cinode->lock_sem);
835
836         exist = cifs_find_lock_conflict(cfile, offset, length, type,
837                                         &conf_lock, CIFS_LOCK_OP);
838         if (exist) {
839                 flock->fl_start = conf_lock->offset;
840                 flock->fl_end = conf_lock->offset + conf_lock->length - 1;
841                 flock->fl_pid = conf_lock->pid;
842                 if (conf_lock->type & server->vals->shared_lock_type)
843                         flock->fl_type = F_RDLCK;
844                 else
845                         flock->fl_type = F_WRLCK;
846         } else if (!cinode->can_cache_brlcks)
847                 rc = 1;
848         else
849                 flock->fl_type = F_UNLCK;
850
851         up_read(&cinode->lock_sem);
852         return rc;
853 }
854
855 static void
856 cifs_lock_add(struct cifsFileInfo *cfile, struct cifsLockInfo *lock)
857 {
858         struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode);
859         down_write(&cinode->lock_sem);
860         list_add_tail(&lock->llist, &cfile->llist->locks);
861         up_write(&cinode->lock_sem);
862 }
863
864 /*
865  * Set the byte-range lock (mandatory style). Returns:
866  * 1) 0, if we set the lock and don't need to request to the server;
867  * 2) 1, if no locks prevent us but we need to request to the server;
868  * 3) -EACCESS, if there is a lock that prevents us and wait is false.
869  */
870 static int
871 cifs_lock_add_if(struct cifsFileInfo *cfile, struct cifsLockInfo *lock,
872                  bool wait)
873 {
874         struct cifsLockInfo *conf_lock;
875         struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode);
876         bool exist;
877         int rc = 0;
878
879 try_again:
880         exist = false;
881         down_write(&cinode->lock_sem);
882
883         exist = cifs_find_lock_conflict(cfile, lock->offset, lock->length,
884                                         lock->type, &conf_lock, CIFS_LOCK_OP);
885         if (!exist && cinode->can_cache_brlcks) {
886                 list_add_tail(&lock->llist, &cfile->llist->locks);
887                 up_write(&cinode->lock_sem);
888                 return rc;
889         }
890
891         if (!exist)
892                 rc = 1;
893         else if (!wait)
894                 rc = -EACCES;
895         else {
896                 list_add_tail(&lock->blist, &conf_lock->blist);
897                 up_write(&cinode->lock_sem);
898                 rc = wait_event_interruptible(lock->block_q,
899                                         (lock->blist.prev == &lock->blist) &&
900                                         (lock->blist.next == &lock->blist));
901                 if (!rc)
902                         goto try_again;
903                 down_write(&cinode->lock_sem);
904                 list_del_init(&lock->blist);
905         }
906
907         up_write(&cinode->lock_sem);
908         return rc;
909 }
910
911 /*
912  * Check if there is another lock that prevents us to set the lock (posix
913  * style). If such a lock exists, update the flock structure with its
914  * properties. Otherwise, set the flock type to F_UNLCK if we can cache brlocks
915  * or leave it the same if we can't. Returns 0 if we don't need to request to
916  * the server or 1 otherwise.
917  */
918 static int
919 cifs_posix_lock_test(struct file *file, struct file_lock *flock)
920 {
921         int rc = 0;
922         struct cifsInodeInfo *cinode = CIFS_I(file->f_path.dentry->d_inode);
923         unsigned char saved_type = flock->fl_type;
924
925         if ((flock->fl_flags & FL_POSIX) == 0)
926                 return 1;
927
928         down_read(&cinode->lock_sem);
929         posix_test_lock(file, flock);
930
931         if (flock->fl_type == F_UNLCK && !cinode->can_cache_brlcks) {
932                 flock->fl_type = saved_type;
933                 rc = 1;
934         }
935
936         up_read(&cinode->lock_sem);
937         return rc;
938 }
939
940 /*
941  * Set the byte-range lock (posix style). Returns:
942  * 1) 0, if we set the lock and don't need to request to the server;
943  * 2) 1, if we need to request to the server;
944  * 3) <0, if the error occurs while setting the lock.
945  */
946 static int
947 cifs_posix_lock_set(struct file *file, struct file_lock *flock)
948 {
949         struct cifsInodeInfo *cinode = CIFS_I(file->f_path.dentry->d_inode);
950         int rc = 1;
951
952         if ((flock->fl_flags & FL_POSIX) == 0)
953                 return rc;
954
955 try_again:
956         down_write(&cinode->lock_sem);
957         if (!cinode->can_cache_brlcks) {
958                 up_write(&cinode->lock_sem);
959                 return rc;
960         }
961
962         rc = posix_lock_file(file, flock, NULL);
963         up_write(&cinode->lock_sem);
964         if (rc == FILE_LOCK_DEFERRED) {
965                 rc = wait_event_interruptible(flock->fl_wait, !flock->fl_next);
966                 if (!rc)
967                         goto try_again;
968                 locks_delete_block(flock);
969         }
970         return rc;
971 }
972
973 int
974 cifs_push_mandatory_locks(struct cifsFileInfo *cfile)
975 {
976         unsigned int xid;
977         int rc = 0, stored_rc;
978         struct cifsLockInfo *li, *tmp;
979         struct cifs_tcon *tcon;
980         unsigned int num, max_num, max_buf;
981         LOCKING_ANDX_RANGE *buf, *cur;
982         int types[] = {LOCKING_ANDX_LARGE_FILES,
983                        LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES};
984         int i;
985
986         xid = get_xid();
987         tcon = tlink_tcon(cfile->tlink);
988
989         /*
990          * Accessing maxBuf is racy with cifs_reconnect - need to store value
991          * and check it for zero before using.
992          */
993         max_buf = tcon->ses->server->maxBuf;
994         if (!max_buf) {
995                 free_xid(xid);
996                 return -EINVAL;
997         }
998
999         max_num = (max_buf - sizeof(struct smb_hdr)) /
1000                                                 sizeof(LOCKING_ANDX_RANGE);
1001         buf = kzalloc(max_num * sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
1002         if (!buf) {
1003                 free_xid(xid);
1004                 return -ENOMEM;
1005         }
1006
1007         for (i = 0; i < 2; i++) {
1008                 cur = buf;
1009                 num = 0;
1010                 list_for_each_entry_safe(li, tmp, &cfile->llist->locks, llist) {
1011                         if (li->type != types[i])
1012                                 continue;
1013                         cur->Pid = cpu_to_le16(li->pid);
1014                         cur->LengthLow = cpu_to_le32((u32)li->length);
1015                         cur->LengthHigh = cpu_to_le32((u32)(li->length>>32));
1016                         cur->OffsetLow = cpu_to_le32((u32)li->offset);
1017                         cur->OffsetHigh = cpu_to_le32((u32)(li->offset>>32));
1018                         if (++num == max_num) {
1019                                 stored_rc = cifs_lockv(xid, tcon,
1020                                                        cfile->fid.netfid,
1021                                                        (__u8)li->type, 0, num,
1022                                                        buf);
1023                                 if (stored_rc)
1024                                         rc = stored_rc;
1025                                 cur = buf;
1026                                 num = 0;
1027                         } else
1028                                 cur++;
1029                 }
1030
1031                 if (num) {
1032                         stored_rc = cifs_lockv(xid, tcon, cfile->fid.netfid,
1033                                                (__u8)types[i], 0, num, buf);
1034                         if (stored_rc)
1035                                 rc = stored_rc;
1036                 }
1037         }
1038
1039         kfree(buf);
1040         free_xid(xid);
1041         return rc;
1042 }
1043
1044 /* copied from fs/locks.c with a name change */
1045 #define cifs_for_each_lock(inode, lockp) \
1046         for (lockp = &inode->i_flock; *lockp != NULL; \
1047              lockp = &(*lockp)->fl_next)
1048
1049 struct lock_to_push {
1050         struct list_head llist;
1051         __u64 offset;
1052         __u64 length;
1053         __u32 pid;
1054         __u16 netfid;
1055         __u8 type;
1056 };
1057
1058 static int
1059 cifs_push_posix_locks(struct cifsFileInfo *cfile)
1060 {
1061         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1062         struct file_lock *flock, **before;
1063         unsigned int count = 0, i = 0;
1064         int rc = 0, xid, type;
1065         struct list_head locks_to_send, *el;
1066         struct lock_to_push *lck, *tmp;
1067         __u64 length;
1068
1069         xid = get_xid();
1070
1071         lock_flocks();
1072         cifs_for_each_lock(cfile->dentry->d_inode, before) {
1073                 if ((*before)->fl_flags & FL_POSIX)
1074                         count++;
1075         }
1076         unlock_flocks();
1077
1078         INIT_LIST_HEAD(&locks_to_send);
1079
1080         /*
1081          * Allocating count locks is enough because no FL_POSIX locks can be
1082          * added to the list while we are holding cinode->lock_sem that
1083          * protects locking operations of this inode.
1084          */
1085         for (; i < count; i++) {
1086                 lck = kmalloc(sizeof(struct lock_to_push), GFP_KERNEL);
1087                 if (!lck) {
1088                         rc = -ENOMEM;
1089                         goto err_out;
1090                 }
1091                 list_add_tail(&lck->llist, &locks_to_send);
1092         }
1093
1094         el = locks_to_send.next;
1095         lock_flocks();
1096         cifs_for_each_lock(cfile->dentry->d_inode, before) {
1097                 flock = *before;
1098                 if ((flock->fl_flags & FL_POSIX) == 0)
1099                         continue;
1100                 if (el == &locks_to_send) {
1101                         /*
1102                          * The list ended. We don't have enough allocated
1103                          * structures - something is really wrong.
1104                          */
1105                         cERROR(1, "Can't push all brlocks!");
1106                         break;
1107                 }
1108                 length = 1 + flock->fl_end - flock->fl_start;
1109                 if (flock->fl_type == F_RDLCK || flock->fl_type == F_SHLCK)
1110                         type = CIFS_RDLCK;
1111                 else
1112                         type = CIFS_WRLCK;
1113                 lck = list_entry(el, struct lock_to_push, llist);
1114                 lck->pid = flock->fl_pid;
1115                 lck->netfid = cfile->fid.netfid;
1116                 lck->length = length;
1117                 lck->type = type;
1118                 lck->offset = flock->fl_start;
1119                 el = el->next;
1120         }
1121         unlock_flocks();
1122
1123         list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) {
1124                 int stored_rc;
1125
1126                 stored_rc = CIFSSMBPosixLock(xid, tcon, lck->netfid, lck->pid,
1127                                              lck->offset, lck->length, NULL,
1128                                              lck->type, 0);
1129                 if (stored_rc)
1130                         rc = stored_rc;
1131                 list_del(&lck->llist);
1132                 kfree(lck);
1133         }
1134
1135 out:
1136         free_xid(xid);
1137         return rc;
1138 err_out:
1139         list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) {
1140                 list_del(&lck->llist);
1141                 kfree(lck);
1142         }
1143         goto out;
1144 }
1145
1146 static int
1147 cifs_push_locks(struct cifsFileInfo *cfile)
1148 {
1149         struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
1150         struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode);
1151         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1152         int rc = 0;
1153
1154         /* we are going to update can_cache_brlcks here - need a write access */
1155         down_write(&cinode->lock_sem);
1156         if (!cinode->can_cache_brlcks) {
1157                 up_write(&cinode->lock_sem);
1158                 return rc;
1159         }
1160
1161         if (cap_unix(tcon->ses) &&
1162             (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
1163             ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
1164                 rc = cifs_push_posix_locks(cfile);
1165         else
1166                 rc = tcon->ses->server->ops->push_mand_locks(cfile);
1167
1168         cinode->can_cache_brlcks = false;
1169         up_write(&cinode->lock_sem);
1170         return rc;
1171 }
1172
1173 static void
1174 cifs_read_flock(struct file_lock *flock, __u32 *type, int *lock, int *unlock,
1175                 bool *wait_flag, struct TCP_Server_Info *server)
1176 {
1177         if (flock->fl_flags & FL_POSIX)
1178                 cFYI(1, "Posix");
1179         if (flock->fl_flags & FL_FLOCK)
1180                 cFYI(1, "Flock");
1181         if (flock->fl_flags & FL_SLEEP) {
1182                 cFYI(1, "Blocking lock");
1183                 *wait_flag = true;
1184         }
1185         if (flock->fl_flags & FL_ACCESS)
1186                 cFYI(1, "Process suspended by mandatory locking - "
1187                         "not implemented yet");
1188         if (flock->fl_flags & FL_LEASE)
1189                 cFYI(1, "Lease on file - not implemented yet");
1190         if (flock->fl_flags &
1191             (~(FL_POSIX | FL_FLOCK | FL_SLEEP |
1192                FL_ACCESS | FL_LEASE | FL_CLOSE)))
1193                 cFYI(1, "Unknown lock flags 0x%x", flock->fl_flags);
1194
1195         *type = server->vals->large_lock_type;
1196         if (flock->fl_type == F_WRLCK) {
1197                 cFYI(1, "F_WRLCK ");
1198                 *type |= server->vals->exclusive_lock_type;
1199                 *lock = 1;
1200         } else if (flock->fl_type == F_UNLCK) {
1201                 cFYI(1, "F_UNLCK");
1202                 *type |= server->vals->unlock_lock_type;
1203                 *unlock = 1;
1204                 /* Check if unlock includes more than one lock range */
1205         } else if (flock->fl_type == F_RDLCK) {
1206                 cFYI(1, "F_RDLCK");
1207                 *type |= server->vals->shared_lock_type;
1208                 *lock = 1;
1209         } else if (flock->fl_type == F_EXLCK) {
1210                 cFYI(1, "F_EXLCK");
1211                 *type |= server->vals->exclusive_lock_type;
1212                 *lock = 1;
1213         } else if (flock->fl_type == F_SHLCK) {
1214                 cFYI(1, "F_SHLCK");
1215                 *type |= server->vals->shared_lock_type;
1216                 *lock = 1;
1217         } else
1218                 cFYI(1, "Unknown type of lock");
1219 }
1220
1221 static int
1222 cifs_getlk(struct file *file, struct file_lock *flock, __u32 type,
1223            bool wait_flag, bool posix_lck, unsigned int xid)
1224 {
1225         int rc = 0;
1226         __u64 length = 1 + flock->fl_end - flock->fl_start;
1227         struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
1228         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1229         struct TCP_Server_Info *server = tcon->ses->server;
1230         __u16 netfid = cfile->fid.netfid;
1231
1232         if (posix_lck) {
1233                 int posix_lock_type;
1234
1235                 rc = cifs_posix_lock_test(file, flock);
1236                 if (!rc)
1237                         return rc;
1238
1239                 if (type & server->vals->shared_lock_type)
1240                         posix_lock_type = CIFS_RDLCK;
1241                 else
1242                         posix_lock_type = CIFS_WRLCK;
1243                 rc = CIFSSMBPosixLock(xid, tcon, netfid, current->tgid,
1244                                       flock->fl_start, length, flock,
1245                                       posix_lock_type, wait_flag);
1246                 return rc;
1247         }
1248
1249         rc = cifs_lock_test(cfile, flock->fl_start, length, type, flock);
1250         if (!rc)
1251                 return rc;
1252
1253         /* BB we could chain these into one lock request BB */
1254         rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length, type,
1255                                     1, 0, false);
1256         if (rc == 0) {
1257                 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1258                                             type, 0, 1, false);
1259                 flock->fl_type = F_UNLCK;
1260                 if (rc != 0)
1261                         cERROR(1, "Error unlocking previously locked "
1262                                   "range %d during test of lock", rc);
1263                 return 0;
1264         }
1265
1266         if (type & server->vals->shared_lock_type) {
1267                 flock->fl_type = F_WRLCK;
1268                 return 0;
1269         }
1270
1271         type &= ~server->vals->exclusive_lock_type;
1272
1273         rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1274                                     type | server->vals->shared_lock_type,
1275                                     1, 0, false);
1276         if (rc == 0) {
1277                 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1278                         type | server->vals->shared_lock_type, 0, 1, false);
1279                 flock->fl_type = F_RDLCK;
1280                 if (rc != 0)
1281                         cERROR(1, "Error unlocking previously locked "
1282                                   "range %d during test of lock", rc);
1283         } else
1284                 flock->fl_type = F_WRLCK;
1285
1286         return 0;
1287 }
1288
1289 void
1290 cifs_move_llist(struct list_head *source, struct list_head *dest)
1291 {
1292         struct list_head *li, *tmp;
1293         list_for_each_safe(li, tmp, source)
1294                 list_move(li, dest);
1295 }
1296
1297 void
1298 cifs_free_llist(struct list_head *llist)
1299 {
1300         struct cifsLockInfo *li, *tmp;
1301         list_for_each_entry_safe(li, tmp, llist, llist) {
1302                 cifs_del_lock_waiters(li);
1303                 list_del(&li->llist);
1304                 kfree(li);
1305         }
1306 }
1307
1308 int
1309 cifs_unlock_range(struct cifsFileInfo *cfile, struct file_lock *flock,
1310                   unsigned int xid)
1311 {
1312         int rc = 0, stored_rc;
1313         int types[] = {LOCKING_ANDX_LARGE_FILES,
1314                        LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES};
1315         unsigned int i;
1316         unsigned int max_num, num, max_buf;
1317         LOCKING_ANDX_RANGE *buf, *cur;
1318         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1319         struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode);
1320         struct cifsLockInfo *li, *tmp;
1321         __u64 length = 1 + flock->fl_end - flock->fl_start;
1322         struct list_head tmp_llist;
1323
1324         INIT_LIST_HEAD(&tmp_llist);
1325
1326         /*
1327          * Accessing maxBuf is racy with cifs_reconnect - need to store value
1328          * and check it for zero before using.
1329          */
1330         max_buf = tcon->ses->server->maxBuf;
1331         if (!max_buf)
1332                 return -EINVAL;
1333
1334         max_num = (max_buf - sizeof(struct smb_hdr)) /
1335                                                 sizeof(LOCKING_ANDX_RANGE);
1336         buf = kzalloc(max_num * sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
1337         if (!buf)
1338                 return -ENOMEM;
1339
1340         down_write(&cinode->lock_sem);
1341         for (i = 0; i < 2; i++) {
1342                 cur = buf;
1343                 num = 0;
1344                 list_for_each_entry_safe(li, tmp, &cfile->llist->locks, llist) {
1345                         if (flock->fl_start > li->offset ||
1346                             (flock->fl_start + length) <
1347                             (li->offset + li->length))
1348                                 continue;
1349                         if (current->tgid != li->pid)
1350                                 continue;
1351                         if (types[i] != li->type)
1352                                 continue;
1353                         if (cinode->can_cache_brlcks) {
1354                                 /*
1355                                  * We can cache brlock requests - simply remove
1356                                  * a lock from the file's list.
1357                                  */
1358                                 list_del(&li->llist);
1359                                 cifs_del_lock_waiters(li);
1360                                 kfree(li);
1361                                 continue;
1362                         }
1363                         cur->Pid = cpu_to_le16(li->pid);
1364                         cur->LengthLow = cpu_to_le32((u32)li->length);
1365                         cur->LengthHigh = cpu_to_le32((u32)(li->length>>32));
1366                         cur->OffsetLow = cpu_to_le32((u32)li->offset);
1367                         cur->OffsetHigh = cpu_to_le32((u32)(li->offset>>32));
1368                         /*
1369                          * We need to save a lock here to let us add it again to
1370                          * the file's list if the unlock range request fails on
1371                          * the server.
1372                          */
1373                         list_move(&li->llist, &tmp_llist);
1374                         if (++num == max_num) {
1375                                 stored_rc = cifs_lockv(xid, tcon,
1376                                                        cfile->fid.netfid,
1377                                                        li->type, num, 0, buf);
1378                                 if (stored_rc) {
1379                                         /*
1380                                          * We failed on the unlock range
1381                                          * request - add all locks from the tmp
1382                                          * list to the head of the file's list.
1383                                          */
1384                                         cifs_move_llist(&tmp_llist,
1385                                                         &cfile->llist->locks);
1386                                         rc = stored_rc;
1387                                 } else
1388                                         /*
1389                                          * The unlock range request succeed -
1390                                          * free the tmp list.
1391                                          */
1392                                         cifs_free_llist(&tmp_llist);
1393                                 cur = buf;
1394                                 num = 0;
1395                         } else
1396                                 cur++;
1397                 }
1398                 if (num) {
1399                         stored_rc = cifs_lockv(xid, tcon, cfile->fid.netfid,
1400                                                types[i], num, 0, buf);
1401                         if (stored_rc) {
1402                                 cifs_move_llist(&tmp_llist,
1403                                                 &cfile->llist->locks);
1404                                 rc = stored_rc;
1405                         } else
1406                                 cifs_free_llist(&tmp_llist);
1407                 }
1408         }
1409
1410         up_write(&cinode->lock_sem);
1411         kfree(buf);
1412         return rc;
1413 }
1414
1415 static int
1416 cifs_setlk(struct file *file, struct file_lock *flock, __u32 type,
1417            bool wait_flag, bool posix_lck, int lock, int unlock,
1418            unsigned int xid)
1419 {
1420         int rc = 0;
1421         __u64 length = 1 + flock->fl_end - flock->fl_start;
1422         struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
1423         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1424         struct TCP_Server_Info *server = tcon->ses->server;
1425
1426         if (posix_lck) {
1427                 int posix_lock_type;
1428
1429                 rc = cifs_posix_lock_set(file, flock);
1430                 if (!rc || rc < 0)
1431                         return rc;
1432
1433                 if (type & server->vals->shared_lock_type)
1434                         posix_lock_type = CIFS_RDLCK;
1435                 else
1436                         posix_lock_type = CIFS_WRLCK;
1437
1438                 if (unlock == 1)
1439                         posix_lock_type = CIFS_UNLCK;
1440
1441                 rc = CIFSSMBPosixLock(xid, tcon, cfile->fid.netfid,
1442                                       current->tgid, flock->fl_start, length,
1443                                       NULL, posix_lock_type, wait_flag);
1444                 goto out;
1445         }
1446
1447         if (lock) {
1448                 struct cifsLockInfo *lock;
1449
1450                 lock = cifs_lock_init(flock->fl_start, length, type);
1451                 if (!lock)
1452                         return -ENOMEM;
1453
1454                 rc = cifs_lock_add_if(cfile, lock, wait_flag);
1455                 if (rc < 0) {
1456                         kfree(lock);
1457                         return rc;
1458                 }
1459                 if (!rc)
1460                         goto out;
1461
1462                 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1463                                             type, 1, 0, wait_flag);
1464                 if (rc) {
1465                         kfree(lock);
1466                         return rc;
1467                 }
1468
1469                 cifs_lock_add(cfile, lock);
1470         } else if (unlock)
1471                 rc = server->ops->mand_unlock_range(cfile, flock, xid);
1472
1473 out:
1474         if (flock->fl_flags & FL_POSIX)
1475                 posix_lock_file_wait(file, flock);
1476         return rc;
1477 }
1478
1479 int cifs_lock(struct file *file, int cmd, struct file_lock *flock)
1480 {
1481         int rc, xid;
1482         int lock = 0, unlock = 0;
1483         bool wait_flag = false;
1484         bool posix_lck = false;
1485         struct cifs_sb_info *cifs_sb;
1486         struct cifs_tcon *tcon;
1487         struct cifsInodeInfo *cinode;
1488         struct cifsFileInfo *cfile;
1489         __u16 netfid;
1490         __u32 type;
1491
1492         rc = -EACCES;
1493         xid = get_xid();
1494
1495         cFYI(1, "Lock parm: 0x%x flockflags: 0x%x flocktype: 0x%x start: %lld "
1496                 "end: %lld", cmd, flock->fl_flags, flock->fl_type,
1497                 flock->fl_start, flock->fl_end);
1498
1499         cfile = (struct cifsFileInfo *)file->private_data;
1500         tcon = tlink_tcon(cfile->tlink);
1501
1502         cifs_read_flock(flock, &type, &lock, &unlock, &wait_flag,
1503                         tcon->ses->server);
1504
1505         cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
1506         netfid = cfile->fid.netfid;
1507         cinode = CIFS_I(file->f_path.dentry->d_inode);
1508
1509         if (cap_unix(tcon->ses) &&
1510             (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
1511             ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
1512                 posix_lck = true;
1513         /*
1514          * BB add code here to normalize offset and length to account for
1515          * negative length which we can not accept over the wire.
1516          */
1517         if (IS_GETLK(cmd)) {
1518                 rc = cifs_getlk(file, flock, type, wait_flag, posix_lck, xid);
1519                 free_xid(xid);
1520                 return rc;
1521         }
1522
1523         if (!lock && !unlock) {
1524                 /*
1525                  * if no lock or unlock then nothing to do since we do not
1526                  * know what it is
1527                  */
1528                 free_xid(xid);
1529                 return -EOPNOTSUPP;
1530         }
1531
1532         rc = cifs_setlk(file, flock, type, wait_flag, posix_lck, lock, unlock,
1533                         xid);
1534         free_xid(xid);
1535         return rc;
1536 }
1537
1538 /*
1539  * update the file size (if needed) after a write. Should be called with
1540  * the inode->i_lock held
1541  */
1542 void
1543 cifs_update_eof(struct cifsInodeInfo *cifsi, loff_t offset,
1544                       unsigned int bytes_written)
1545 {
1546         loff_t end_of_write = offset + bytes_written;
1547
1548         if (end_of_write > cifsi->server_eof)
1549                 cifsi->server_eof = end_of_write;
1550 }
1551
1552 static ssize_t
1553 cifs_write(struct cifsFileInfo *open_file, __u32 pid, const char *write_data,
1554            size_t write_size, loff_t *offset)
1555 {
1556         int rc = 0;
1557         unsigned int bytes_written = 0;
1558         unsigned int total_written;
1559         struct cifs_sb_info *cifs_sb;
1560         struct cifs_tcon *tcon;
1561         struct TCP_Server_Info *server;
1562         unsigned int xid;
1563         struct dentry *dentry = open_file->dentry;
1564         struct cifsInodeInfo *cifsi = CIFS_I(dentry->d_inode);
1565         struct cifs_io_parms io_parms;
1566
1567         cifs_sb = CIFS_SB(dentry->d_sb);
1568
1569         cFYI(1, "write %zd bytes to offset %lld of %s", write_size,
1570              *offset, dentry->d_name.name);
1571
1572         tcon = tlink_tcon(open_file->tlink);
1573         server = tcon->ses->server;
1574
1575         if (!server->ops->sync_write)
1576                 return -ENOSYS;
1577
1578         xid = get_xid();
1579
1580         for (total_written = 0; write_size > total_written;
1581              total_written += bytes_written) {
1582                 rc = -EAGAIN;
1583                 while (rc == -EAGAIN) {
1584                         struct kvec iov[2];
1585                         unsigned int len;
1586
1587                         if (open_file->invalidHandle) {
1588                                 /* we could deadlock if we called
1589                                    filemap_fdatawait from here so tell
1590                                    reopen_file not to flush data to
1591                                    server now */
1592                                 rc = cifs_reopen_file(open_file, false);
1593                                 if (rc != 0)
1594                                         break;
1595                         }
1596
1597                         len = min((size_t)cifs_sb->wsize,
1598                                   write_size - total_written);
1599                         /* iov[0] is reserved for smb header */
1600                         iov[1].iov_base = (char *)write_data + total_written;
1601                         iov[1].iov_len = len;
1602                         io_parms.pid = pid;
1603                         io_parms.tcon = tcon;
1604                         io_parms.offset = *offset;
1605                         io_parms.length = len;
1606                         rc = server->ops->sync_write(xid, open_file, &io_parms,
1607                                                      &bytes_written, iov, 1);
1608                 }
1609                 if (rc || (bytes_written == 0)) {
1610                         if (total_written)
1611                                 break;
1612                         else {
1613                                 free_xid(xid);
1614                                 return rc;
1615                         }
1616                 } else {
1617                         spin_lock(&dentry->d_inode->i_lock);
1618                         cifs_update_eof(cifsi, *offset, bytes_written);
1619                         spin_unlock(&dentry->d_inode->i_lock);
1620                         *offset += bytes_written;
1621                 }
1622         }
1623
1624         cifs_stats_bytes_written(tcon, total_written);
1625
1626         if (total_written > 0) {
1627                 spin_lock(&dentry->d_inode->i_lock);
1628                 if (*offset > dentry->d_inode->i_size)
1629                         i_size_write(dentry->d_inode, *offset);
1630                 spin_unlock(&dentry->d_inode->i_lock);
1631         }
1632         mark_inode_dirty_sync(dentry->d_inode);
1633         free_xid(xid);
1634         return total_written;
1635 }
1636
1637 struct cifsFileInfo *find_readable_file(struct cifsInodeInfo *cifs_inode,
1638                                         bool fsuid_only)
1639 {
1640         struct cifsFileInfo *open_file = NULL;
1641         struct cifs_sb_info *cifs_sb = CIFS_SB(cifs_inode->vfs_inode.i_sb);
1642
1643         /* only filter by fsuid on multiuser mounts */
1644         if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
1645                 fsuid_only = false;
1646
1647         spin_lock(&cifs_file_list_lock);
1648         /* we could simply get the first_list_entry since write-only entries
1649            are always at the end of the list but since the first entry might
1650            have a close pending, we go through the whole list */
1651         list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
1652                 if (fsuid_only && open_file->uid != current_fsuid())
1653                         continue;
1654                 if (OPEN_FMODE(open_file->f_flags) & FMODE_READ) {
1655                         if (!open_file->invalidHandle) {
1656                                 /* found a good file */
1657                                 /* lock it so it will not be closed on us */
1658                                 cifsFileInfo_get_locked(open_file);
1659                                 spin_unlock(&cifs_file_list_lock);
1660                                 return open_file;
1661                         } /* else might as well continue, and look for
1662                              another, or simply have the caller reopen it
1663                              again rather than trying to fix this handle */
1664                 } else /* write only file */
1665                         break; /* write only files are last so must be done */
1666         }
1667         spin_unlock(&cifs_file_list_lock);
1668         return NULL;
1669 }
1670
1671 struct cifsFileInfo *find_writable_file(struct cifsInodeInfo *cifs_inode,
1672                                         bool fsuid_only)
1673 {
1674         struct cifsFileInfo *open_file, *inv_file = NULL;
1675         struct cifs_sb_info *cifs_sb;
1676         bool any_available = false;
1677         int rc;
1678         unsigned int refind = 0;
1679
1680         /* Having a null inode here (because mapping->host was set to zero by
1681         the VFS or MM) should not happen but we had reports of on oops (due to
1682         it being zero) during stress testcases so we need to check for it */
1683
1684         if (cifs_inode == NULL) {
1685                 cERROR(1, "Null inode passed to cifs_writeable_file");
1686                 dump_stack();
1687                 return NULL;
1688         }
1689
1690         cifs_sb = CIFS_SB(cifs_inode->vfs_inode.i_sb);
1691
1692         /* only filter by fsuid on multiuser mounts */
1693         if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
1694                 fsuid_only = false;
1695
1696         spin_lock(&cifs_file_list_lock);
1697 refind_writable:
1698         if (refind > MAX_REOPEN_ATT) {
1699                 spin_unlock(&cifs_file_list_lock);
1700                 return NULL;
1701         }
1702         list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
1703                 if (!any_available && open_file->pid != current->tgid)
1704                         continue;
1705                 if (fsuid_only && open_file->uid != current_fsuid())
1706                         continue;
1707                 if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) {
1708                         if (!open_file->invalidHandle) {
1709                                 /* found a good writable file */
1710                                 cifsFileInfo_get_locked(open_file);
1711                                 spin_unlock(&cifs_file_list_lock);
1712                                 return open_file;
1713                         } else {
1714                                 if (!inv_file)
1715                                         inv_file = open_file;
1716                         }
1717                 }
1718         }
1719         /* couldn't find useable FH with same pid, try any available */
1720         if (!any_available) {
1721                 any_available = true;
1722                 goto refind_writable;
1723         }
1724
1725         if (inv_file) {
1726                 any_available = false;
1727                 cifsFileInfo_get_locked(inv_file);
1728         }
1729
1730         spin_unlock(&cifs_file_list_lock);
1731
1732         if (inv_file) {
1733                 rc = cifs_reopen_file(inv_file, false);
1734                 if (!rc)
1735                         return inv_file;
1736                 else {
1737                         spin_lock(&cifs_file_list_lock);
1738                         list_move_tail(&inv_file->flist,
1739                                         &cifs_inode->openFileList);
1740                         spin_unlock(&cifs_file_list_lock);
1741                         cifsFileInfo_put(inv_file);
1742                         spin_lock(&cifs_file_list_lock);
1743                         ++refind;
1744                         goto refind_writable;
1745                 }
1746         }
1747
1748         return NULL;
1749 }
1750
1751 static int cifs_partialpagewrite(struct page *page, unsigned from, unsigned to)
1752 {
1753         struct address_space *mapping = page->mapping;
1754         loff_t offset = (loff_t)page->index << PAGE_CACHE_SHIFT;
1755         char *write_data;
1756         int rc = -EFAULT;
1757         int bytes_written = 0;
1758         struct inode *inode;
1759         struct cifsFileInfo *open_file;
1760
1761         if (!mapping || !mapping->host)
1762                 return -EFAULT;
1763
1764         inode = page->mapping->host;
1765
1766         offset += (loff_t)from;
1767         write_data = kmap(page);
1768         write_data += from;
1769
1770         if ((to > PAGE_CACHE_SIZE) || (from > to)) {
1771                 kunmap(page);
1772                 return -EIO;
1773         }
1774
1775         /* racing with truncate? */
1776         if (offset > mapping->host->i_size) {
1777                 kunmap(page);
1778                 return 0; /* don't care */
1779         }
1780
1781         /* check to make sure that we are not extending the file */
1782         if (mapping->host->i_size - offset < (loff_t)to)
1783                 to = (unsigned)(mapping->host->i_size - offset);
1784
1785         open_file = find_writable_file(CIFS_I(mapping->host), false);
1786         if (open_file) {
1787                 bytes_written = cifs_write(open_file, open_file->pid,
1788                                            write_data, to - from, &offset);
1789                 cifsFileInfo_put(open_file);
1790                 /* Does mm or vfs already set times? */
1791                 inode->i_atime = inode->i_mtime = current_fs_time(inode->i_sb);
1792                 if ((bytes_written > 0) && (offset))
1793                         rc = 0;
1794                 else if (bytes_written < 0)
1795                         rc = bytes_written;
1796         } else {
1797                 cFYI(1, "No writeable filehandles for inode");
1798                 rc = -EIO;
1799         }
1800
1801         kunmap(page);
1802         return rc;
1803 }
1804
1805 static int cifs_writepages(struct address_space *mapping,
1806                            struct writeback_control *wbc)
1807 {
1808         struct cifs_sb_info *cifs_sb = CIFS_SB(mapping->host->i_sb);
1809         bool done = false, scanned = false, range_whole = false;
1810         pgoff_t end, index;
1811         struct cifs_writedata *wdata;
1812         struct TCP_Server_Info *server;
1813         struct page *page;
1814         int rc = 0;
1815
1816         /*
1817          * If wsize is smaller than the page cache size, default to writing
1818          * one page at a time via cifs_writepage
1819          */
1820         if (cifs_sb->wsize < PAGE_CACHE_SIZE)
1821                 return generic_writepages(mapping, wbc);
1822
1823         if (wbc->range_cyclic) {
1824                 index = mapping->writeback_index; /* Start from prev offset */
1825                 end = -1;
1826         } else {
1827                 index = wbc->range_start >> PAGE_CACHE_SHIFT;
1828                 end = wbc->range_end >> PAGE_CACHE_SHIFT;
1829                 if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
1830                         range_whole = true;
1831                 scanned = true;
1832         }
1833 retry:
1834         while (!done && index <= end) {
1835                 unsigned int i, nr_pages, found_pages;
1836                 pgoff_t next = 0, tofind;
1837                 struct page **pages;
1838
1839                 tofind = min((cifs_sb->wsize / PAGE_CACHE_SIZE) - 1,
1840                                 end - index) + 1;
1841
1842                 wdata = cifs_writedata_alloc((unsigned int)tofind,
1843                                              cifs_writev_complete);
1844                 if (!wdata) {
1845                         rc = -ENOMEM;
1846                         break;
1847                 }
1848
1849                 /*
1850                  * find_get_pages_tag seems to return a max of 256 on each
1851                  * iteration, so we must call it several times in order to
1852                  * fill the array or the wsize is effectively limited to
1853                  * 256 * PAGE_CACHE_SIZE.
1854                  */
1855                 found_pages = 0;
1856                 pages = wdata->pages;
1857                 do {
1858                         nr_pages = find_get_pages_tag(mapping, &index,
1859                                                         PAGECACHE_TAG_DIRTY,
1860                                                         tofind, pages);
1861                         found_pages += nr_pages;
1862                         tofind -= nr_pages;
1863                         pages += nr_pages;
1864                 } while (nr_pages && tofind && index <= end);
1865
1866                 if (found_pages == 0) {
1867                         kref_put(&wdata->refcount, cifs_writedata_release);
1868                         break;
1869                 }
1870
1871                 nr_pages = 0;
1872                 for (i = 0; i < found_pages; i++) {
1873                         page = wdata->pages[i];
1874                         /*
1875                          * At this point we hold neither mapping->tree_lock nor
1876                          * lock on the page itself: the page may be truncated or
1877                          * invalidated (changing page->mapping to NULL), or even
1878                          * swizzled back from swapper_space to tmpfs file
1879                          * mapping
1880                          */
1881
1882                         if (nr_pages == 0)
1883                                 lock_page(page);
1884                         else if (!trylock_page(page))
1885                                 break;
1886
1887                         if (unlikely(page->mapping != mapping)) {
1888                                 unlock_page(page);
1889                                 break;
1890                         }
1891
1892                         if (!wbc->range_cyclic && page->index > end) {
1893                                 done = true;
1894                                 unlock_page(page);
1895                                 break;
1896                         }
1897
1898                         if (next && (page->index != next)) {
1899                                 /* Not next consecutive page */
1900                                 unlock_page(page);
1901                                 break;
1902                         }
1903
1904                         if (wbc->sync_mode != WB_SYNC_NONE)
1905                                 wait_on_page_writeback(page);
1906
1907                         if (PageWriteback(page) ||
1908                                         !clear_page_dirty_for_io(page)) {
1909                                 unlock_page(page);
1910                                 break;
1911                         }
1912
1913                         /*
1914                          * This actually clears the dirty bit in the radix tree.
1915                          * See cifs_writepage() for more commentary.
1916                          */
1917                         set_page_writeback(page);
1918
1919                         if (page_offset(page) >= i_size_read(mapping->host)) {
1920                                 done = true;
1921                                 unlock_page(page);
1922                                 end_page_writeback(page);
1923                                 break;
1924                         }
1925
1926                         wdata->pages[i] = page;
1927                         next = page->index + 1;
1928                         ++nr_pages;
1929                 }
1930
1931                 /* reset index to refind any pages skipped */
1932                 if (nr_pages == 0)
1933                         index = wdata->pages[0]->index + 1;
1934
1935                 /* put any pages we aren't going to use */
1936                 for (i = nr_pages; i < found_pages; i++) {
1937                         page_cache_release(wdata->pages[i]);
1938                         wdata->pages[i] = NULL;
1939                 }
1940
1941                 /* nothing to write? */
1942                 if (nr_pages == 0) {
1943                         kref_put(&wdata->refcount, cifs_writedata_release);
1944                         continue;
1945                 }
1946
1947                 wdata->sync_mode = wbc->sync_mode;
1948                 wdata->nr_pages = nr_pages;
1949                 wdata->offset = page_offset(wdata->pages[0]);
1950                 wdata->pagesz = PAGE_CACHE_SIZE;
1951                 wdata->tailsz =
1952                         min(i_size_read(mapping->host) -
1953                             page_offset(wdata->pages[nr_pages - 1]),
1954                             (loff_t)PAGE_CACHE_SIZE);
1955                 wdata->bytes = ((nr_pages - 1) * PAGE_CACHE_SIZE) +
1956                                         wdata->tailsz;
1957
1958                 do {
1959                         if (wdata->cfile != NULL)
1960                                 cifsFileInfo_put(wdata->cfile);
1961                         wdata->cfile = find_writable_file(CIFS_I(mapping->host),
1962                                                           false);
1963                         if (!wdata->cfile) {
1964                                 cERROR(1, "No writable handles for inode");
1965                                 rc = -EBADF;
1966                                 break;
1967                         }
1968                         wdata->pid = wdata->cfile->pid;
1969                         server = tlink_tcon(wdata->cfile->tlink)->ses->server;
1970                         rc = server->ops->async_writev(wdata);
1971                 } while (wbc->sync_mode == WB_SYNC_ALL && rc == -EAGAIN);
1972
1973                 for (i = 0; i < nr_pages; ++i)
1974                         unlock_page(wdata->pages[i]);
1975
1976                 /* send failure -- clean up the mess */
1977                 if (rc != 0) {
1978                         for (i = 0; i < nr_pages; ++i) {
1979                                 if (rc == -EAGAIN)
1980                                         redirty_page_for_writepage(wbc,
1981                                                            wdata->pages[i]);
1982                                 else
1983                                         SetPageError(wdata->pages[i]);
1984                                 end_page_writeback(wdata->pages[i]);
1985                                 page_cache_release(wdata->pages[i]);
1986                         }
1987                         if (rc != -EAGAIN)
1988                                 mapping_set_error(mapping, rc);
1989                 }
1990                 kref_put(&wdata->refcount, cifs_writedata_release);
1991
1992                 wbc->nr_to_write -= nr_pages;
1993                 if (wbc->nr_to_write <= 0)
1994                         done = true;
1995
1996                 index = next;
1997         }
1998
1999         if (!scanned && !done) {
2000                 /*
2001                  * We hit the last page and there is more work to be done: wrap
2002                  * back to the start of the file
2003                  */
2004                 scanned = true;
2005                 index = 0;
2006                 goto retry;
2007         }
2008
2009         if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0))
2010                 mapping->writeback_index = index;
2011
2012         return rc;
2013 }
2014
2015 static int
2016 cifs_writepage_locked(struct page *page, struct writeback_control *wbc)
2017 {
2018         int rc;
2019         unsigned int xid;
2020
2021         xid = get_xid();
2022 /* BB add check for wbc flags */
2023         page_cache_get(page);
2024         if (!PageUptodate(page))
2025                 cFYI(1, "ppw - page not up to date");
2026
2027         /*
2028          * Set the "writeback" flag, and clear "dirty" in the radix tree.
2029          *
2030          * A writepage() implementation always needs to do either this,
2031          * or re-dirty the page with "redirty_page_for_writepage()" in
2032          * the case of a failure.
2033          *
2034          * Just unlocking the page will cause the radix tree tag-bits
2035          * to fail to update with the state of the page correctly.
2036          */
2037         set_page_writeback(page);
2038 retry_write:
2039         rc = cifs_partialpagewrite(page, 0, PAGE_CACHE_SIZE);
2040         if (rc == -EAGAIN && wbc->sync_mode == WB_SYNC_ALL)
2041                 goto retry_write;
2042         else if (rc == -EAGAIN)
2043                 redirty_page_for_writepage(wbc, page);
2044         else if (rc != 0)
2045                 SetPageError(page);
2046         else
2047                 SetPageUptodate(page);
2048         end_page_writeback(page);
2049         page_cache_release(page);
2050         free_xid(xid);
2051         return rc;
2052 }
2053
2054 static int cifs_writepage(struct page *page, struct writeback_control *wbc)
2055 {
2056         int rc = cifs_writepage_locked(page, wbc);
2057         unlock_page(page);
2058         return rc;
2059 }
2060
2061 static int cifs_write_end(struct file *file, struct address_space *mapping,
2062                         loff_t pos, unsigned len, unsigned copied,
2063                         struct page *page, void *fsdata)
2064 {
2065         int rc;
2066         struct inode *inode = mapping->host;
2067         struct cifsFileInfo *cfile = file->private_data;
2068         struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
2069         __u32 pid;
2070
2071         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
2072                 pid = cfile->pid;
2073         else
2074                 pid = current->tgid;
2075
2076         cFYI(1, "write_end for page %p from pos %lld with %d bytes",
2077                  page, pos, copied);
2078
2079         if (PageChecked(page)) {
2080                 if (copied == len)
2081                         SetPageUptodate(page);
2082                 ClearPageChecked(page);
2083         } else if (!PageUptodate(page) && copied == PAGE_CACHE_SIZE)
2084                 SetPageUptodate(page);
2085
2086         if (!PageUptodate(page)) {
2087                 char *page_data;
2088                 unsigned offset = pos & (PAGE_CACHE_SIZE - 1);
2089                 unsigned int xid;
2090
2091                 xid = get_xid();
2092                 /* this is probably better than directly calling
2093                    partialpage_write since in this function the file handle is
2094                    known which we might as well leverage */
2095                 /* BB check if anything else missing out of ppw
2096                    such as updating last write time */
2097                 page_data = kmap(page);
2098                 rc = cifs_write(cfile, pid, page_data + offset, copied, &pos);
2099                 /* if (rc < 0) should we set writebehind rc? */
2100                 kunmap(page);
2101
2102                 free_xid(xid);
2103         } else {
2104                 rc = copied;
2105                 pos += copied;
2106                 set_page_dirty(page);
2107         }
2108
2109         if (rc > 0) {
2110                 spin_lock(&inode->i_lock);
2111                 if (pos > inode->i_size)
2112                         i_size_write(inode, pos);
2113                 spin_unlock(&inode->i_lock);
2114         }
2115
2116         unlock_page(page);
2117         page_cache_release(page);
2118
2119         return rc;
2120 }
2121
2122 int cifs_strict_fsync(struct file *file, loff_t start, loff_t end,
2123                       int datasync)
2124 {
2125         unsigned int xid;
2126         int rc = 0;
2127         struct cifs_tcon *tcon;
2128         struct TCP_Server_Info *server;
2129         struct cifsFileInfo *smbfile = file->private_data;
2130         struct inode *inode = file->f_path.dentry->d_inode;
2131         struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
2132
2133         rc = filemap_write_and_wait_range(inode->i_mapping, start, end);
2134         if (rc)
2135                 return rc;
2136         mutex_lock(&inode->i_mutex);
2137
2138         xid = get_xid();
2139
2140         cFYI(1, "Sync file - name: %s datasync: 0x%x",
2141                 file->f_path.dentry->d_name.name, datasync);
2142
2143         if (!CIFS_I(inode)->clientCanCacheRead) {
2144                 rc = cifs_invalidate_mapping(inode);
2145                 if (rc) {
2146                         cFYI(1, "rc: %d during invalidate phase", rc);
2147                         rc = 0; /* don't care about it in fsync */
2148                 }
2149         }
2150
2151         tcon = tlink_tcon(smbfile->tlink);
2152         if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) {
2153                 server = tcon->ses->server;
2154                 if (server->ops->flush)
2155                         rc = server->ops->flush(xid, tcon, &smbfile->fid);
2156                 else
2157                         rc = -ENOSYS;
2158         }
2159
2160         free_xid(xid);
2161         mutex_unlock(&inode->i_mutex);
2162         return rc;
2163 }
2164
2165 int cifs_fsync(struct file *file, loff_t start, loff_t end, int datasync)
2166 {
2167         unsigned int xid;
2168         int rc = 0;
2169         struct cifs_tcon *tcon;
2170         struct TCP_Server_Info *server;
2171         struct cifsFileInfo *smbfile = file->private_data;
2172         struct cifs_sb_info *cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
2173         struct inode *inode = file->f_mapping->host;
2174
2175         rc = filemap_write_and_wait_range(inode->i_mapping, start, end);
2176         if (rc)
2177                 return rc;
2178         mutex_lock(&inode->i_mutex);
2179
2180         xid = get_xid();
2181
2182         cFYI(1, "Sync file - name: %s datasync: 0x%x",
2183                 file->f_path.dentry->d_name.name, datasync);
2184
2185         tcon = tlink_tcon(smbfile->tlink);
2186         if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) {
2187                 server = tcon->ses->server;
2188                 if (server->ops->flush)
2189                         rc = server->ops->flush(xid, tcon, &smbfile->fid);
2190                 else
2191                         rc = -ENOSYS;
2192         }
2193
2194         free_xid(xid);
2195         mutex_unlock(&inode->i_mutex);
2196         return rc;
2197 }
2198
2199 /*
2200  * As file closes, flush all cached write data for this inode checking
2201  * for write behind errors.
2202  */
2203 int cifs_flush(struct file *file, fl_owner_t id)
2204 {
2205         struct inode *inode = file->f_path.dentry->d_inode;
2206         int rc = 0;
2207
2208         if (file->f_mode & FMODE_WRITE)
2209                 rc = filemap_write_and_wait(inode->i_mapping);
2210
2211         cFYI(1, "Flush inode %p file %p rc %d", inode, file, rc);
2212
2213         return rc;
2214 }
2215
2216 static int
2217 cifs_write_allocate_pages(struct page **pages, unsigned long num_pages)
2218 {
2219         int rc = 0;
2220         unsigned long i;
2221
2222         for (i = 0; i < num_pages; i++) {
2223                 pages[i] = alloc_page(GFP_KERNEL|__GFP_HIGHMEM);
2224                 if (!pages[i]) {
2225                         /*
2226                          * save number of pages we have already allocated and
2227                          * return with ENOMEM error
2228                          */
2229                         num_pages = i;
2230                         rc = -ENOMEM;
2231                         break;
2232                 }
2233         }
2234
2235         if (rc) {
2236                 for (i = 0; i < num_pages; i++)
2237                         put_page(pages[i]);
2238         }
2239         return rc;
2240 }
2241
2242 static inline
2243 size_t get_numpages(const size_t wsize, const size_t len, size_t *cur_len)
2244 {
2245         size_t num_pages;
2246         size_t clen;
2247
2248         clen = min_t(const size_t, len, wsize);
2249         num_pages = DIV_ROUND_UP(clen, PAGE_SIZE);
2250
2251         if (cur_len)
2252                 *cur_len = clen;
2253
2254         return num_pages;
2255 }
2256
2257 static void
2258 cifs_uncached_writev_complete(struct work_struct *work)
2259 {
2260         int i;
2261         struct cifs_writedata *wdata = container_of(work,
2262                                         struct cifs_writedata, work);
2263         struct inode *inode = wdata->cfile->dentry->d_inode;
2264         struct cifsInodeInfo *cifsi = CIFS_I(inode);
2265
2266         spin_lock(&inode->i_lock);
2267         cifs_update_eof(cifsi, wdata->offset, wdata->bytes);
2268         if (cifsi->server_eof > inode->i_size)
2269                 i_size_write(inode, cifsi->server_eof);
2270         spin_unlock(&inode->i_lock);
2271
2272         complete(&wdata->done);
2273
2274         if (wdata->result != -EAGAIN) {
2275                 for (i = 0; i < wdata->nr_pages; i++)
2276                         put_page(wdata->pages[i]);
2277         }
2278
2279         kref_put(&wdata->refcount, cifs_writedata_release);
2280 }
2281
2282 /* attempt to send write to server, retry on any -EAGAIN errors */
2283 static int
2284 cifs_uncached_retry_writev(struct cifs_writedata *wdata)
2285 {
2286         int rc;
2287         struct TCP_Server_Info *server;
2288
2289         server = tlink_tcon(wdata->cfile->tlink)->ses->server;
2290
2291         do {
2292                 if (wdata->cfile->invalidHandle) {
2293                         rc = cifs_reopen_file(wdata->cfile, false);
2294                         if (rc != 0)
2295                                 continue;
2296                 }
2297                 rc = server->ops->async_writev(wdata);
2298         } while (rc == -EAGAIN);
2299
2300         return rc;
2301 }
2302
2303 static ssize_t
2304 cifs_iovec_write(struct file *file, const struct iovec *iov,
2305                  unsigned long nr_segs, loff_t *poffset)
2306 {
2307         unsigned long nr_pages, i;
2308         size_t copied, len, cur_len;
2309         ssize_t total_written = 0;
2310         loff_t offset;
2311         struct iov_iter it;
2312         struct cifsFileInfo *open_file;
2313         struct cifs_tcon *tcon;
2314         struct cifs_sb_info *cifs_sb;
2315         struct cifs_writedata *wdata, *tmp;
2316         struct list_head wdata_list;
2317         int rc;
2318         pid_t pid;
2319
2320         len = iov_length(iov, nr_segs);
2321         if (!len)
2322                 return 0;
2323
2324         rc = generic_write_checks(file, poffset, &len, 0);
2325         if (rc)
2326                 return rc;
2327
2328         INIT_LIST_HEAD(&wdata_list);
2329         cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
2330         open_file = file->private_data;
2331         tcon = tlink_tcon(open_file->tlink);
2332
2333         if (!tcon->ses->server->ops->async_writev)
2334                 return -ENOSYS;
2335
2336         offset = *poffset;
2337
2338         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
2339                 pid = open_file->pid;
2340         else
2341                 pid = current->tgid;
2342
2343         iov_iter_init(&it, iov, nr_segs, len, 0);
2344         do {
2345                 size_t save_len;
2346
2347                 nr_pages = get_numpages(cifs_sb->wsize, len, &cur_len);
2348                 wdata = cifs_writedata_alloc(nr_pages,
2349                                              cifs_uncached_writev_complete);
2350                 if (!wdata) {
2351                         rc = -ENOMEM;
2352                         break;
2353                 }
2354
2355                 rc = cifs_write_allocate_pages(wdata->pages, nr_pages);
2356                 if (rc) {
2357                         kfree(wdata);
2358                         break;
2359                 }
2360
2361                 save_len = cur_len;
2362                 for (i = 0; i < nr_pages; i++) {
2363                         copied = min_t(const size_t, cur_len, PAGE_SIZE);
2364                         copied = iov_iter_copy_from_user(wdata->pages[i], &it,
2365                                                          0, copied);
2366                         cur_len -= copied;
2367                         iov_iter_advance(&it, copied);
2368                 }
2369                 cur_len = save_len - cur_len;
2370
2371                 wdata->sync_mode = WB_SYNC_ALL;
2372                 wdata->nr_pages = nr_pages;
2373                 wdata->offset = (__u64)offset;
2374                 wdata->cfile = cifsFileInfo_get(open_file);
2375                 wdata->pid = pid;
2376                 wdata->bytes = cur_len;
2377                 wdata->pagesz = PAGE_SIZE;
2378                 wdata->tailsz = cur_len - ((nr_pages - 1) * PAGE_SIZE);
2379                 rc = cifs_uncached_retry_writev(wdata);
2380                 if (rc) {
2381                         kref_put(&wdata->refcount, cifs_writedata_release);
2382                         break;
2383                 }
2384
2385                 list_add_tail(&wdata->list, &wdata_list);
2386                 offset += cur_len;
2387                 len -= cur_len;
2388         } while (len > 0);
2389
2390         /*
2391          * If at least one write was successfully sent, then discard any rc
2392          * value from the later writes. If the other write succeeds, then
2393          * we'll end up returning whatever was written. If it fails, then
2394          * we'll get a new rc value from that.
2395          */
2396         if (!list_empty(&wdata_list))
2397                 rc = 0;
2398
2399         /*
2400          * Wait for and collect replies for any successful sends in order of
2401          * increasing offset. Once an error is hit or we get a fatal signal
2402          * while waiting, then return without waiting for any more replies.
2403          */
2404 restart_loop:
2405         list_for_each_entry_safe(wdata, tmp, &wdata_list, list) {
2406                 if (!rc) {
2407                         /* FIXME: freezable too? */
2408                         rc = wait_for_completion_killable(&wdata->done);
2409                         if (rc)
2410                                 rc = -EINTR;
2411                         else if (wdata->result)
2412                                 rc = wdata->result;
2413                         else
2414                                 total_written += wdata->bytes;
2415
2416                         /* resend call if it's a retryable error */
2417                         if (rc == -EAGAIN) {
2418                                 rc = cifs_uncached_retry_writev(wdata);
2419                                 goto restart_loop;
2420                         }
2421                 }
2422                 list_del_init(&wdata->list);
2423                 kref_put(&wdata->refcount, cifs_writedata_release);
2424         }
2425
2426         if (total_written > 0)
2427                 *poffset += total_written;
2428
2429         cifs_stats_bytes_written(tcon, total_written);
2430         return total_written ? total_written : (ssize_t)rc;
2431 }
2432
2433 ssize_t cifs_user_writev(struct kiocb *iocb, const struct iovec *iov,
2434                                 unsigned long nr_segs, loff_t pos)
2435 {
2436         ssize_t written;
2437         struct inode *inode;
2438
2439         inode = iocb->ki_filp->f_path.dentry->d_inode;
2440
2441         /*
2442          * BB - optimize the way when signing is disabled. We can drop this
2443          * extra memory-to-memory copying and use iovec buffers for constructing
2444          * write request.
2445          */
2446
2447         written = cifs_iovec_write(iocb->ki_filp, iov, nr_segs, &pos);
2448         if (written > 0) {
2449                 CIFS_I(inode)->invalid_mapping = true;
2450                 iocb->ki_pos = pos;
2451         }
2452
2453         return written;
2454 }
2455
2456 static ssize_t
2457 cifs_writev(struct kiocb *iocb, const struct iovec *iov,
2458             unsigned long nr_segs, loff_t pos)
2459 {
2460         struct file *file = iocb->ki_filp;
2461         struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
2462         struct inode *inode = file->f_mapping->host;
2463         struct cifsInodeInfo *cinode = CIFS_I(inode);
2464         struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
2465         ssize_t rc = -EACCES;
2466
2467         BUG_ON(iocb->ki_pos != pos);
2468
2469         sb_start_write(inode->i_sb);
2470
2471         /*
2472          * We need to hold the sem to be sure nobody modifies lock list
2473          * with a brlock that prevents writing.
2474          */
2475         down_read(&cinode->lock_sem);
2476         if (!cifs_find_lock_conflict(cfile, pos, iov_length(iov, nr_segs),
2477                                      server->vals->exclusive_lock_type, NULL,
2478                                      CIFS_WRITE_OP)) {
2479                 mutex_lock(&inode->i_mutex);
2480                 rc = __generic_file_aio_write(iocb, iov, nr_segs,
2481                                                &iocb->ki_pos);
2482                 mutex_unlock(&inode->i_mutex);
2483         }
2484
2485         if (rc > 0 || rc == -EIOCBQUEUED) {
2486                 ssize_t err;
2487
2488                 err = generic_write_sync(file, pos, rc);
2489                 if (err < 0 && rc > 0)
2490                         rc = err;
2491         }
2492
2493         up_read(&cinode->lock_sem);
2494         sb_end_write(inode->i_sb);
2495         return rc;
2496 }
2497
2498 ssize_t
2499 cifs_strict_writev(struct kiocb *iocb, const struct iovec *iov,
2500                    unsigned long nr_segs, loff_t pos)
2501 {
2502         struct inode *inode = iocb->ki_filp->f_path.dentry->d_inode;
2503         struct cifsInodeInfo *cinode = CIFS_I(inode);
2504         struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
2505         struct cifsFileInfo *cfile = (struct cifsFileInfo *)
2506                                                 iocb->ki_filp->private_data;
2507         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
2508         ssize_t written;
2509
2510         if (cinode->clientCanCacheAll) {
2511                 if (cap_unix(tcon->ses) &&
2512                 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability))
2513                     && ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
2514                         return generic_file_aio_write(iocb, iov, nr_segs, pos);
2515                 return cifs_writev(iocb, iov, nr_segs, pos);
2516         }
2517         /*
2518          * For non-oplocked files in strict cache mode we need to write the data
2519          * to the server exactly from the pos to pos+len-1 rather than flush all
2520          * affected pages because it may cause a error with mandatory locks on
2521          * these pages but not on the region from pos to ppos+len-1.
2522          */
2523         written = cifs_user_writev(iocb, iov, nr_segs, pos);
2524         if (written > 0 && cinode->clientCanCacheRead) {
2525                 /*
2526                  * Windows 7 server can delay breaking level2 oplock if a write
2527                  * request comes - break it on the client to prevent reading
2528                  * an old data.
2529                  */
2530                 cifs_invalidate_mapping(inode);
2531                 cFYI(1, "Set no oplock for inode=%p after a write operation",
2532                      inode);
2533                 cinode->clientCanCacheRead = false;
2534         }
2535         return written;
2536 }
2537
2538 static struct cifs_readdata *
2539 cifs_readdata_alloc(unsigned int nr_pages, work_func_t complete)
2540 {
2541         struct cifs_readdata *rdata;
2542
2543         rdata = kzalloc(sizeof(*rdata) + (sizeof(struct page *) * nr_pages),
2544                         GFP_KERNEL);
2545         if (rdata != NULL) {
2546                 kref_init(&rdata->refcount);
2547                 INIT_LIST_HEAD(&rdata->list);
2548                 init_completion(&rdata->done);
2549                 INIT_WORK(&rdata->work, complete);
2550         }
2551
2552         return rdata;
2553 }
2554
2555 void
2556 cifs_readdata_release(struct kref *refcount)
2557 {
2558         struct cifs_readdata *rdata = container_of(refcount,
2559                                         struct cifs_readdata, refcount);
2560
2561         if (rdata->cfile)
2562                 cifsFileInfo_put(rdata->cfile);
2563
2564         kfree(rdata);
2565 }
2566
2567 static int
2568 cifs_read_allocate_pages(struct cifs_readdata *rdata, unsigned int nr_pages)
2569 {
2570         int rc = 0;
2571         struct page *page;
2572         unsigned int i;
2573
2574         for (i = 0; i < nr_pages; i++) {
2575                 page = alloc_page(GFP_KERNEL|__GFP_HIGHMEM);
2576                 if (!page) {
2577                         rc = -ENOMEM;
2578                         break;
2579                 }
2580                 rdata->pages[i] = page;
2581         }
2582
2583         if (rc) {
2584                 for (i = 0; i < nr_pages; i++) {
2585                         put_page(rdata->pages[i]);
2586                         rdata->pages[i] = NULL;
2587                 }
2588         }
2589         return rc;
2590 }
2591
2592 static void
2593 cifs_uncached_readdata_release(struct kref *refcount)
2594 {
2595         struct cifs_readdata *rdata = container_of(refcount,
2596                                         struct cifs_readdata, refcount);
2597         unsigned int i;
2598
2599         for (i = 0; i < rdata->nr_pages; i++) {
2600                 put_page(rdata->pages[i]);
2601                 rdata->pages[i] = NULL;
2602         }
2603         cifs_readdata_release(refcount);
2604 }
2605
2606 static int
2607 cifs_retry_async_readv(struct cifs_readdata *rdata)
2608 {
2609         int rc;
2610         struct TCP_Server_Info *server;
2611
2612         server = tlink_tcon(rdata->cfile->tlink)->ses->server;
2613
2614         do {
2615                 if (rdata->cfile->invalidHandle) {
2616                         rc = cifs_reopen_file(rdata->cfile, true);
2617                         if (rc != 0)
2618                                 continue;
2619                 }
2620                 rc = server->ops->async_readv(rdata);
2621         } while (rc == -EAGAIN);
2622
2623         return rc;
2624 }
2625
2626 /**
2627  * cifs_readdata_to_iov - copy data from pages in response to an iovec
2628  * @rdata:      the readdata response with list of pages holding data
2629  * @iov:        vector in which we should copy the data
2630  * @nr_segs:    number of segments in vector
2631  * @offset:     offset into file of the first iovec
2632  * @copied:     used to return the amount of data copied to the iov
2633  *
2634  * This function copies data from a list of pages in a readdata response into
2635  * an array of iovecs. It will first calculate where the data should go
2636  * based on the info in the readdata and then copy the data into that spot.
2637  */
2638 static ssize_t
2639 cifs_readdata_to_iov(struct cifs_readdata *rdata, const struct iovec *iov,
2640                         unsigned long nr_segs, loff_t offset, ssize_t *copied)
2641 {
2642         int rc = 0;
2643         struct iov_iter ii;
2644         size_t pos = rdata->offset - offset;
2645         ssize_t remaining = rdata->bytes;
2646         unsigned char *pdata;
2647         unsigned int i;
2648
2649         /* set up iov_iter and advance to the correct offset */
2650         iov_iter_init(&ii, iov, nr_segs, iov_length(iov, nr_segs), 0);
2651         iov_iter_advance(&ii, pos);
2652
2653         *copied = 0;
2654         for (i = 0; i < rdata->nr_pages; i++) {
2655                 ssize_t copy;
2656                 struct page *page = rdata->pages[i];
2657
2658                 /* copy a whole page or whatever's left */
2659                 copy = min_t(ssize_t, remaining, PAGE_SIZE);
2660
2661                 /* ...but limit it to whatever space is left in the iov */
2662                 copy = min_t(ssize_t, copy, iov_iter_count(&ii));
2663
2664                 /* go while there's data to be copied and no errors */
2665                 if (copy && !rc) {
2666                         pdata = kmap(page);
2667                         rc = memcpy_toiovecend(ii.iov, pdata, ii.iov_offset,
2668                                                 (int)copy);
2669                         kunmap(page);
2670                         if (!rc) {
2671                                 *copied += copy;
2672                                 remaining -= copy;
2673                                 iov_iter_advance(&ii, copy);
2674                         }
2675                 }
2676         }
2677
2678         return rc;
2679 }
2680
2681 static void
2682 cifs_uncached_readv_complete(struct work_struct *work)
2683 {
2684         struct cifs_readdata *rdata = container_of(work,
2685                                                 struct cifs_readdata, work);
2686
2687         complete(&rdata->done);
2688         kref_put(&rdata->refcount, cifs_uncached_readdata_release);
2689 }
2690
2691 static int
2692 cifs_uncached_read_into_pages(struct TCP_Server_Info *server,
2693                         struct cifs_readdata *rdata, unsigned int len)
2694 {
2695         int total_read = 0, result = 0;
2696         unsigned int i;
2697         unsigned int nr_pages = rdata->nr_pages;
2698         struct kvec iov;
2699
2700         rdata->tailsz = PAGE_SIZE;
2701         for (i = 0; i < nr_pages; i++) {
2702                 struct page *page = rdata->pages[i];
2703
2704                 if (len >= PAGE_SIZE) {
2705                         /* enough data to fill the page */
2706                         iov.iov_base = kmap(page);
2707                         iov.iov_len = PAGE_SIZE;
2708                         cFYI(1, "%u: iov_base=%p iov_len=%zu",
2709                                 i, iov.iov_base, iov.iov_len);
2710                         len -= PAGE_SIZE;
2711                 } else if (len > 0) {
2712                         /* enough for partial page, fill and zero the rest */
2713                         iov.iov_base = kmap(page);
2714                         iov.iov_len = len;
2715                         cFYI(1, "%u: iov_base=%p iov_len=%zu",
2716                                 i, iov.iov_base, iov.iov_len);
2717                         memset(iov.iov_base + len, '\0', PAGE_SIZE - len);
2718                         rdata->tailsz = len;
2719                         len = 0;
2720                 } else {
2721                         /* no need to hold page hostage */
2722                         rdata->pages[i] = NULL;
2723                         rdata->nr_pages--;
2724                         put_page(page);
2725                         continue;
2726                 }
2727
2728                 result = cifs_readv_from_socket(server, &iov, 1, iov.iov_len);
2729                 kunmap(page);
2730                 if (result < 0)
2731                         break;
2732
2733                 total_read += result;
2734         }
2735
2736         return total_read > 0 ? total_read : result;
2737 }
2738
2739 static ssize_t
2740 cifs_iovec_read(struct file *file, const struct iovec *iov,
2741                  unsigned long nr_segs, loff_t *poffset)
2742 {
2743         ssize_t rc;
2744         size_t len, cur_len;
2745         ssize_t total_read = 0;
2746         loff_t offset = *poffset;
2747         unsigned int npages;
2748         struct cifs_sb_info *cifs_sb;
2749         struct cifs_tcon *tcon;
2750         struct cifsFileInfo *open_file;
2751         struct cifs_readdata *rdata, *tmp;
2752         struct list_head rdata_list;
2753         pid_t pid;
2754
2755         if (!nr_segs)
2756                 return 0;
2757
2758         len = iov_length(iov, nr_segs);
2759         if (!len)
2760                 return 0;
2761
2762         INIT_LIST_HEAD(&rdata_list);
2763         cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
2764         open_file = file->private_data;
2765         tcon = tlink_tcon(open_file->tlink);
2766
2767         if (!tcon->ses->server->ops->async_readv)
2768                 return -ENOSYS;
2769
2770         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
2771                 pid = open_file->pid;
2772         else
2773                 pid = current->tgid;
2774
2775         if ((file->f_flags & O_ACCMODE) == O_WRONLY)
2776                 cFYI(1, "attempting read on write only file instance");
2777
2778         do {
2779                 cur_len = min_t(const size_t, len - total_read, cifs_sb->rsize);
2780                 npages = DIV_ROUND_UP(cur_len, PAGE_SIZE);
2781
2782                 /* allocate a readdata struct */
2783                 rdata = cifs_readdata_alloc(npages,
2784                                             cifs_uncached_readv_complete);
2785                 if (!rdata) {
2786                         rc = -ENOMEM;
2787                         goto error;
2788                 }
2789
2790                 rc = cifs_read_allocate_pages(rdata, npages);
2791                 if (rc)
2792                         goto error;
2793
2794                 rdata->cfile = cifsFileInfo_get(open_file);
2795                 rdata->nr_pages = npages;
2796                 rdata->offset = offset;
2797                 rdata->bytes = cur_len;
2798                 rdata->pid = pid;
2799                 rdata->pagesz = PAGE_SIZE;
2800                 rdata->read_into_pages = cifs_uncached_read_into_pages;
2801
2802                 rc = cifs_retry_async_readv(rdata);
2803 error:
2804                 if (rc) {
2805                         kref_put(&rdata->refcount,
2806                                  cifs_uncached_readdata_release);
2807                         break;
2808                 }
2809
2810                 list_add_tail(&rdata->list, &rdata_list);
2811                 offset += cur_len;
2812                 len -= cur_len;
2813         } while (len > 0);
2814
2815         /* if at least one read request send succeeded, then reset rc */
2816         if (!list_empty(&rdata_list))
2817                 rc = 0;
2818
2819         /* the loop below should proceed in the order of increasing offsets */
2820 restart_loop:
2821         list_for_each_entry_safe(rdata, tmp, &rdata_list, list) {
2822                 if (!rc) {
2823                         ssize_t copied;
2824
2825                         /* FIXME: freezable sleep too? */
2826                         rc = wait_for_completion_killable(&rdata->done);
2827                         if (rc)
2828                                 rc = -EINTR;
2829                         else if (rdata->result)
2830                                 rc = rdata->result;
2831                         else {
2832                                 rc = cifs_readdata_to_iov(rdata, iov,
2833                                                         nr_segs, *poffset,
2834                                                         &copied);
2835                                 total_read += copied;
2836                         }
2837
2838                         /* resend call if it's a retryable error */
2839                         if (rc == -EAGAIN) {
2840                                 rc = cifs_retry_async_readv(rdata);
2841                                 goto restart_loop;
2842                         }
2843                 }
2844                 list_del_init(&rdata->list);
2845                 kref_put(&rdata->refcount, cifs_uncached_readdata_release);
2846         }
2847
2848         cifs_stats_bytes_read(tcon, total_read);
2849         *poffset += total_read;
2850
2851         /* mask nodata case */
2852         if (rc == -ENODATA)
2853                 rc = 0;
2854
2855         return total_read ? total_read : rc;
2856 }
2857
2858 ssize_t cifs_user_readv(struct kiocb *iocb, const struct iovec *iov,
2859                                unsigned long nr_segs, loff_t pos)
2860 {
2861         ssize_t read;
2862
2863         read = cifs_iovec_read(iocb->ki_filp, iov, nr_segs, &pos);
2864         if (read > 0)
2865                 iocb->ki_pos = pos;
2866
2867         return read;
2868 }
2869
2870 ssize_t
2871 cifs_strict_readv(struct kiocb *iocb, const struct iovec *iov,
2872                   unsigned long nr_segs, loff_t pos)
2873 {
2874         struct inode *inode = iocb->ki_filp->f_path.dentry->d_inode;
2875         struct cifsInodeInfo *cinode = CIFS_I(inode);
2876         struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
2877         struct cifsFileInfo *cfile = (struct cifsFileInfo *)
2878                                                 iocb->ki_filp->private_data;
2879         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
2880         int rc = -EACCES;
2881
2882         /*
2883          * In strict cache mode we need to read from the server all the time
2884          * if we don't have level II oplock because the server can delay mtime
2885          * change - so we can't make a decision about inode invalidating.
2886          * And we can also fail with pagereading if there are mandatory locks
2887          * on pages affected by this read but not on the region from pos to
2888          * pos+len-1.
2889          */
2890         if (!cinode->clientCanCacheRead)
2891                 return cifs_user_readv(iocb, iov, nr_segs, pos);
2892
2893         if (cap_unix(tcon->ses) &&
2894             (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
2895             ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
2896                 return generic_file_aio_read(iocb, iov, nr_segs, pos);
2897
2898         /*
2899          * We need to hold the sem to be sure nobody modifies lock list
2900          * with a brlock that prevents reading.
2901          */
2902         down_read(&cinode->lock_sem);
2903         if (!cifs_find_lock_conflict(cfile, pos, iov_length(iov, nr_segs),
2904                                      tcon->ses->server->vals->shared_lock_type,
2905                                      NULL, CIFS_READ_OP))
2906                 rc = generic_file_aio_read(iocb, iov, nr_segs, pos);
2907         up_read(&cinode->lock_sem);
2908         return rc;
2909 }
2910
2911 static ssize_t
2912 cifs_read(struct file *file, char *read_data, size_t read_size, loff_t *offset)
2913 {
2914         int rc = -EACCES;
2915         unsigned int bytes_read = 0;
2916         unsigned int total_read;
2917         unsigned int current_read_size;
2918         unsigned int rsize;
2919         struct cifs_sb_info *cifs_sb;
2920         struct cifs_tcon *tcon;
2921         struct TCP_Server_Info *server;
2922         unsigned int xid;
2923         char *cur_offset;
2924         struct cifsFileInfo *open_file;
2925         struct cifs_io_parms io_parms;
2926         int buf_type = CIFS_NO_BUFFER;
2927         __u32 pid;
2928
2929         xid = get_xid();
2930         cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
2931
2932         /* FIXME: set up handlers for larger reads and/or convert to async */
2933         rsize = min_t(unsigned int, cifs_sb->rsize, CIFSMaxBufSize);
2934
2935         if (file->private_data == NULL) {
2936                 rc = -EBADF;
2937                 free_xid(xid);
2938                 return rc;
2939         }
2940         open_file = file->private_data;
2941         tcon = tlink_tcon(open_file->tlink);
2942         server = tcon->ses->server;
2943
2944         if (!server->ops->sync_read) {
2945                 free_xid(xid);
2946                 return -ENOSYS;
2947         }
2948
2949         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
2950                 pid = open_file->pid;
2951         else
2952                 pid = current->tgid;
2953
2954         if ((file->f_flags & O_ACCMODE) == O_WRONLY)
2955                 cFYI(1, "attempting read on write only file instance");
2956
2957         for (total_read = 0, cur_offset = read_data; read_size > total_read;
2958              total_read += bytes_read, cur_offset += bytes_read) {
2959                 current_read_size = min_t(uint, read_size - total_read, rsize);
2960                 /*
2961                  * For windows me and 9x we do not want to request more than it
2962                  * negotiated since it will refuse the read then.
2963                  */
2964                 if ((tcon->ses) && !(tcon->ses->capabilities &
2965                                 tcon->ses->server->vals->cap_large_files)) {
2966                         current_read_size = min_t(uint, current_read_size,
2967                                         CIFSMaxBufSize);
2968                 }
2969                 rc = -EAGAIN;
2970                 while (rc == -EAGAIN) {
2971                         if (open_file->invalidHandle) {
2972                                 rc = cifs_reopen_file(open_file, true);
2973                                 if (rc != 0)
2974                                         break;
2975                         }
2976                         io_parms.pid = pid;
2977                         io_parms.tcon = tcon;
2978                         io_parms.offset = *offset;
2979                         io_parms.length = current_read_size;
2980                         rc = server->ops->sync_read(xid, open_file, &io_parms,
2981                                                     &bytes_read, &cur_offset,
2982                                                     &buf_type);
2983                 }
2984                 if (rc || (bytes_read == 0)) {
2985                         if (total_read) {
2986                                 break;
2987                         } else {
2988                                 free_xid(xid);
2989                                 return rc;
2990                         }
2991                 } else {
2992                         cifs_stats_bytes_read(tcon, total_read);
2993                         *offset += bytes_read;
2994                 }
2995         }
2996         free_xid(xid);
2997         return total_read;
2998 }
2999
3000 /*
3001  * If the page is mmap'ed into a process' page tables, then we need to make
3002  * sure that it doesn't change while being written back.
3003  */
3004 static int
3005 cifs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
3006 {
3007         struct page *page = vmf->page;
3008
3009         lock_page(page);
3010         return VM_FAULT_LOCKED;
3011 }
3012
3013 static struct vm_operations_struct cifs_file_vm_ops = {
3014         .fault = filemap_fault,
3015         .page_mkwrite = cifs_page_mkwrite,
3016         .remap_pages = generic_file_remap_pages,
3017 };
3018
3019 int cifs_file_strict_mmap(struct file *file, struct vm_area_struct *vma)
3020 {
3021         int rc, xid;
3022         struct inode *inode = file->f_path.dentry->d_inode;
3023
3024         xid = get_xid();
3025
3026         if (!CIFS_I(inode)->clientCanCacheRead) {
3027                 rc = cifs_invalidate_mapping(inode);
3028                 if (rc)
3029                         return rc;
3030         }
3031
3032         rc = generic_file_mmap(file, vma);
3033         if (rc == 0)
3034                 vma->vm_ops = &cifs_file_vm_ops;
3035         free_xid(xid);
3036         return rc;
3037 }
3038
3039 int cifs_file_mmap(struct file *file, struct vm_area_struct *vma)
3040 {
3041         int rc, xid;
3042
3043         xid = get_xid();
3044         rc = cifs_revalidate_file(file);
3045         if (rc) {
3046                 cFYI(1, "Validation prior to mmap failed, error=%d", rc);
3047                 free_xid(xid);
3048                 return rc;
3049         }
3050         rc = generic_file_mmap(file, vma);
3051         if (rc == 0)
3052                 vma->vm_ops = &cifs_file_vm_ops;
3053         free_xid(xid);
3054         return rc;
3055 }
3056
3057 static void
3058 cifs_readv_complete(struct work_struct *work)
3059 {
3060         unsigned int i;
3061         struct cifs_readdata *rdata = container_of(work,
3062                                                 struct cifs_readdata, work);
3063
3064         for (i = 0; i < rdata->nr_pages; i++) {
3065                 struct page *page = rdata->pages[i];
3066
3067                 lru_cache_add_file(page);
3068
3069                 if (rdata->result == 0) {
3070                         flush_dcache_page(page);
3071                         SetPageUptodate(page);
3072                 }
3073
3074                 unlock_page(page);
3075
3076                 if (rdata->result == 0)
3077                         cifs_readpage_to_fscache(rdata->mapping->host, page);
3078
3079                 page_cache_release(page);
3080                 rdata->pages[i] = NULL;
3081         }
3082         kref_put(&rdata->refcount, cifs_readdata_release);
3083 }
3084
3085 static int
3086 cifs_readpages_read_into_pages(struct TCP_Server_Info *server,
3087                         struct cifs_readdata *rdata, unsigned int len)
3088 {
3089         int total_read = 0, result = 0;
3090         unsigned int i;
3091         u64 eof;
3092         pgoff_t eof_index;
3093         unsigned int nr_pages = rdata->nr_pages;
3094         struct kvec iov;
3095
3096         /* determine the eof that the server (probably) has */
3097         eof = CIFS_I(rdata->mapping->host)->server_eof;
3098         eof_index = eof ? (eof - 1) >> PAGE_CACHE_SHIFT : 0;
3099         cFYI(1, "eof=%llu eof_index=%lu", eof, eof_index);
3100
3101         rdata->tailsz = PAGE_CACHE_SIZE;
3102         for (i = 0; i < nr_pages; i++) {
3103                 struct page *page = rdata->pages[i];
3104
3105                 if (len >= PAGE_CACHE_SIZE) {
3106                         /* enough data to fill the page */
3107                         iov.iov_base = kmap(page);
3108                         iov.iov_len = PAGE_CACHE_SIZE;
3109                         cFYI(1, "%u: idx=%lu iov_base=%p iov_len=%zu",
3110                                 i, page->index, iov.iov_base, iov.iov_len);
3111                         len -= PAGE_CACHE_SIZE;
3112                 } else if (len > 0) {
3113                         /* enough for partial page, fill and zero the rest */
3114                         iov.iov_base = kmap(page);
3115                         iov.iov_len = len;
3116                         cFYI(1, "%u: idx=%lu iov_base=%p iov_len=%zu",
3117                                 i, page->index, iov.iov_base, iov.iov_len);
3118                         memset(iov.iov_base + len,
3119                                 '\0', PAGE_CACHE_SIZE - len);
3120                         rdata->tailsz = len;
3121                         len = 0;
3122                 } else if (page->index > eof_index) {
3123                         /*
3124                          * The VFS will not try to do readahead past the
3125                          * i_size, but it's possible that we have outstanding
3126                          * writes with gaps in the middle and the i_size hasn't
3127                          * caught up yet. Populate those with zeroed out pages
3128                          * to prevent the VFS from repeatedly attempting to
3129                          * fill them until the writes are flushed.
3130                          */
3131                         zero_user(page, 0, PAGE_CACHE_SIZE);
3132                         lru_cache_add_file(page);
3133                         flush_dcache_page(page);
3134                         SetPageUptodate(page);
3135                         unlock_page(page);
3136                         page_cache_release(page);
3137                         rdata->pages[i] = NULL;
3138                         rdata->nr_pages--;
3139                         continue;
3140                 } else {
3141                         /* no need to hold page hostage */
3142                         lru_cache_add_file(page);
3143                         unlock_page(page);
3144                         page_cache_release(page);
3145                         rdata->pages[i] = NULL;
3146                         rdata->nr_pages--;
3147                         continue;
3148                 }
3149
3150                 result = cifs_readv_from_socket(server, &iov, 1, iov.iov_len);
3151                 kunmap(page);
3152                 if (result < 0)
3153                         break;
3154
3155                 total_read += result;
3156         }
3157
3158         return total_read > 0 ? total_read : result;
3159 }
3160
3161 static int cifs_readpages(struct file *file, struct address_space *mapping,
3162         struct list_head *page_list, unsigned num_pages)
3163 {
3164         int rc;
3165         struct list_head tmplist;
3166         struct cifsFileInfo *open_file = file->private_data;
3167         struct cifs_sb_info *cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
3168         unsigned int rsize = cifs_sb->rsize;
3169         pid_t pid;
3170
3171         /*
3172          * Give up immediately if rsize is too small to read an entire page.
3173          * The VFS will fall back to readpage. We should never reach this
3174          * point however since we set ra_pages to 0 when the rsize is smaller
3175          * than a cache page.
3176          */
3177         if (unlikely(rsize < PAGE_CACHE_SIZE))
3178                 return 0;
3179
3180         /*
3181          * Reads as many pages as possible from fscache. Returns -ENOBUFS
3182          * immediately if the cookie is negative
3183          */
3184         rc = cifs_readpages_from_fscache(mapping->host, mapping, page_list,
3185                                          &num_pages);
3186         if (rc == 0)
3187                 return rc;
3188
3189         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
3190                 pid = open_file->pid;
3191         else
3192                 pid = current->tgid;
3193
3194         rc = 0;
3195         INIT_LIST_HEAD(&tmplist);
3196
3197         cFYI(1, "%s: file=%p mapping=%p num_pages=%u", __func__, file,
3198                 mapping, num_pages);
3199
3200         /*
3201          * Start with the page at end of list and move it to private
3202          * list. Do the same with any following pages until we hit
3203          * the rsize limit, hit an index discontinuity, or run out of
3204          * pages. Issue the async read and then start the loop again
3205          * until the list is empty.
3206          *
3207          * Note that list order is important. The page_list is in
3208          * the order of declining indexes. When we put the pages in
3209          * the rdata->pages, then we want them in increasing order.
3210          */
3211         while (!list_empty(page_list)) {
3212                 unsigned int i;
3213                 unsigned int bytes = PAGE_CACHE_SIZE;
3214                 unsigned int expected_index;
3215                 unsigned int nr_pages = 1;
3216                 loff_t offset;
3217                 struct page *page, *tpage;
3218                 struct cifs_readdata *rdata;
3219
3220                 page = list_entry(page_list->prev, struct page, lru);
3221
3222                 /*
3223                  * Lock the page and put it in the cache. Since no one else
3224                  * should have access to this page, we're safe to simply set
3225                  * PG_locked without checking it first.
3226                  */
3227                 __set_page_locked(page);
3228                 rc = add_to_page_cache_locked(page, mapping,
3229                                               page->index, GFP_KERNEL);
3230
3231                 /* give up if we can't stick it in the cache */
3232                 if (rc) {
3233                         __clear_page_locked(page);
3234                         break;
3235                 }
3236
3237                 /* move first page to the tmplist */
3238                 offset = (loff_t)page->index << PAGE_CACHE_SHIFT;
3239                 list_move_tail(&page->lru, &tmplist);
3240
3241                 /* now try and add more pages onto the request */
3242                 expected_index = page->index + 1;
3243                 list_for_each_entry_safe_reverse(page, tpage, page_list, lru) {
3244                         /* discontinuity ? */
3245                         if (page->index != expected_index)
3246                                 break;
3247
3248                         /* would this page push the read over the rsize? */
3249                         if (bytes + PAGE_CACHE_SIZE > rsize)
3250                                 break;
3251
3252                         __set_page_locked(page);
3253                         if (add_to_page_cache_locked(page, mapping,
3254                                                 page->index, GFP_KERNEL)) {
3255                                 __clear_page_locked(page);
3256                                 break;
3257                         }
3258                         list_move_tail(&page->lru, &tmplist);
3259                         bytes += PAGE_CACHE_SIZE;
3260                         expected_index++;
3261                         nr_pages++;
3262                 }
3263
3264                 rdata = cifs_readdata_alloc(nr_pages, cifs_readv_complete);
3265                 if (!rdata) {
3266                         /* best to give up if we're out of mem */
3267                         list_for_each_entry_safe(page, tpage, &tmplist, lru) {
3268                                 list_del(&page->lru);
3269                                 lru_cache_add_file(page);
3270                                 unlock_page(page);
3271                                 page_cache_release(page);
3272                         }
3273                         rc = -ENOMEM;
3274                         break;
3275                 }
3276
3277                 rdata->cfile = cifsFileInfo_get(open_file);
3278                 rdata->mapping = mapping;
3279                 rdata->offset = offset;
3280                 rdata->bytes = bytes;
3281                 rdata->pid = pid;
3282                 rdata->pagesz = PAGE_CACHE_SIZE;
3283                 rdata->read_into_pages = cifs_readpages_read_into_pages;
3284
3285                 list_for_each_entry_safe(page, tpage, &tmplist, lru) {
3286                         list_del(&page->lru);
3287                         rdata->pages[rdata->nr_pages++] = page;
3288                 }
3289
3290                 rc = cifs_retry_async_readv(rdata);
3291                 if (rc != 0) {
3292                         for (i = 0; i < rdata->nr_pages; i++) {
3293                                 page = rdata->pages[i];
3294                                 lru_cache_add_file(page);
3295                                 unlock_page(page);
3296                                 page_cache_release(page);
3297                         }
3298                         kref_put(&rdata->refcount, cifs_readdata_release);
3299                         break;
3300                 }
3301
3302                 kref_put(&rdata->refcount, cifs_readdata_release);
3303         }
3304
3305         return rc;
3306 }
3307
3308 static int cifs_readpage_worker(struct file *file, struct page *page,
3309         loff_t *poffset)
3310 {
3311         char *read_data;
3312         int rc;
3313
3314         /* Is the page cached? */
3315         rc = cifs_readpage_from_fscache(file->f_path.dentry->d_inode, page);
3316         if (rc == 0)
3317                 goto read_complete;
3318
3319         page_cache_get(page);
3320         read_data = kmap(page);
3321         /* for reads over a certain size could initiate async read ahead */
3322
3323         rc = cifs_read(file, read_data, PAGE_CACHE_SIZE, poffset);
3324
3325         if (rc < 0)
3326                 goto io_error;
3327         else
3328                 cFYI(1, "Bytes read %d", rc);
3329
3330         file->f_path.dentry->d_inode->i_atime =
3331                 current_fs_time(file->f_path.dentry->d_inode->i_sb);
3332
3333         if (PAGE_CACHE_SIZE > rc)
3334                 memset(read_data + rc, 0, PAGE_CACHE_SIZE - rc);
3335
3336         flush_dcache_page(page);
3337         SetPageUptodate(page);
3338
3339         /* send this page to the cache */
3340         cifs_readpage_to_fscache(file->f_path.dentry->d_inode, page);
3341
3342         rc = 0;
3343
3344 io_error:
3345         kunmap(page);
3346         page_cache_release(page);
3347
3348 read_complete:
3349         return rc;
3350 }
3351
3352 static int cifs_readpage(struct file *file, struct page *page)
3353 {
3354         loff_t offset = (loff_t)page->index << PAGE_CACHE_SHIFT;
3355         int rc = -EACCES;
3356         unsigned int xid;
3357
3358         xid = get_xid();
3359
3360         if (file->private_data == NULL) {
3361                 rc = -EBADF;
3362                 free_xid(xid);
3363                 return rc;
3364         }
3365
3366         cFYI(1, "readpage %p at offset %d 0x%x",
3367                  page, (int)offset, (int)offset);
3368
3369         rc = cifs_readpage_worker(file, page, &offset);
3370
3371         unlock_page(page);
3372
3373         free_xid(xid);
3374         return rc;
3375 }
3376
3377 static int is_inode_writable(struct cifsInodeInfo *cifs_inode)
3378 {
3379         struct cifsFileInfo *open_file;
3380
3381         spin_lock(&cifs_file_list_lock);
3382         list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
3383                 if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) {
3384                         spin_unlock(&cifs_file_list_lock);
3385                         return 1;
3386                 }
3387         }
3388         spin_unlock(&cifs_file_list_lock);
3389         return 0;
3390 }
3391
3392 /* We do not want to update the file size from server for inodes
3393    open for write - to avoid races with writepage extending
3394    the file - in the future we could consider allowing
3395    refreshing the inode only on increases in the file size
3396    but this is tricky to do without racing with writebehind
3397    page caching in the current Linux kernel design */
3398 bool is_size_safe_to_change(struct cifsInodeInfo *cifsInode, __u64 end_of_file)
3399 {
3400         if (!cifsInode)
3401                 return true;
3402
3403         if (is_inode_writable(cifsInode)) {
3404                 /* This inode is open for write at least once */
3405                 struct cifs_sb_info *cifs_sb;
3406
3407                 cifs_sb = CIFS_SB(cifsInode->vfs_inode.i_sb);
3408                 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DIRECT_IO) {
3409                         /* since no page cache to corrupt on directio
3410                         we can change size safely */
3411                         return true;
3412                 }
3413
3414                 if (i_size_read(&cifsInode->vfs_inode) < end_of_file)
3415                         return true;
3416
3417                 return false;
3418         } else
3419                 return true;
3420 }
3421
3422 static int cifs_write_begin(struct file *file, struct address_space *mapping,
3423                         loff_t pos, unsigned len, unsigned flags,
3424                         struct page **pagep, void **fsdata)
3425 {
3426         pgoff_t index = pos >> PAGE_CACHE_SHIFT;
3427         loff_t offset = pos & (PAGE_CACHE_SIZE - 1);
3428         loff_t page_start = pos & PAGE_MASK;
3429         loff_t i_size;
3430         struct page *page;
3431         int rc = 0;
3432
3433         cFYI(1, "write_begin from %lld len %d", (long long)pos, len);
3434
3435         page = grab_cache_page_write_begin(mapping, index, flags);
3436         if (!page) {
3437                 rc = -ENOMEM;
3438                 goto out;
3439         }
3440
3441         if (PageUptodate(page))
3442                 goto out;
3443
3444         /*
3445          * If we write a full page it will be up to date, no need to read from
3446          * the server. If the write is short, we'll end up doing a sync write
3447          * instead.
3448          */
3449         if (len == PAGE_CACHE_SIZE)
3450                 goto out;
3451
3452         /*
3453          * optimize away the read when we have an oplock, and we're not
3454          * expecting to use any of the data we'd be reading in. That
3455          * is, when the page lies beyond the EOF, or straddles the EOF
3456          * and the write will cover all of the existing data.
3457          */
3458         if (CIFS_I(mapping->host)->clientCanCacheRead) {
3459                 i_size = i_size_read(mapping->host);
3460                 if (page_start >= i_size ||
3461                     (offset == 0 && (pos + len) >= i_size)) {
3462                         zero_user_segments(page, 0, offset,
3463                                            offset + len,
3464                                            PAGE_CACHE_SIZE);
3465                         /*
3466                          * PageChecked means that the parts of the page
3467                          * to which we're not writing are considered up
3468                          * to date. Once the data is copied to the
3469                          * page, it can be set uptodate.
3470                          */
3471                         SetPageChecked(page);
3472                         goto out;
3473                 }
3474         }
3475
3476         if ((file->f_flags & O_ACCMODE) != O_WRONLY) {
3477                 /*
3478                  * might as well read a page, it is fast enough. If we get
3479                  * an error, we don't need to return it. cifs_write_end will
3480                  * do a sync write instead since PG_uptodate isn't set.
3481                  */
3482                 cifs_readpage_worker(file, page, &page_start);
3483         } else {
3484                 /* we could try using another file handle if there is one -
3485                    but how would we lock it to prevent close of that handle
3486                    racing with this read? In any case
3487                    this will be written out by write_end so is fine */
3488         }
3489 out:
3490         *pagep = page;
3491         return rc;
3492 }
3493
3494 static int cifs_release_page(struct page *page, gfp_t gfp)
3495 {
3496         if (PagePrivate(page))
3497                 return 0;
3498
3499         return cifs_fscache_release_page(page, gfp);
3500 }
3501
3502 static void cifs_invalidate_page(struct page *page, unsigned long offset)
3503 {
3504         struct cifsInodeInfo *cifsi = CIFS_I(page->mapping->host);
3505
3506         if (offset == 0)
3507                 cifs_fscache_invalidate_page(page, &cifsi->vfs_inode);
3508 }
3509
3510 static int cifs_launder_page(struct page *page)
3511 {
3512         int rc = 0;
3513         loff_t range_start = page_offset(page);
3514         loff_t range_end = range_start + (loff_t)(PAGE_CACHE_SIZE - 1);
3515         struct writeback_control wbc = {
3516                 .sync_mode = WB_SYNC_ALL,
3517                 .nr_to_write = 0,
3518                 .range_start = range_start,
3519                 .range_end = range_end,
3520         };
3521
3522         cFYI(1, "Launder page: %p", page);
3523
3524         if (clear_page_dirty_for_io(page))
3525                 rc = cifs_writepage_locked(page, &wbc);
3526
3527         cifs_fscache_invalidate_page(page, page->mapping->host);
3528         return rc;
3529 }
3530
3531 void cifs_oplock_break(struct work_struct *work)
3532 {
3533         struct cifsFileInfo *cfile = container_of(work, struct cifsFileInfo,
3534                                                   oplock_break);
3535         struct inode *inode = cfile->dentry->d_inode;
3536         struct cifsInodeInfo *cinode = CIFS_I(inode);
3537         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
3538         int rc = 0;
3539
3540         if (inode && S_ISREG(inode->i_mode)) {
3541                 if (cinode->clientCanCacheRead)
3542                         break_lease(inode, O_RDONLY);
3543                 else
3544                         break_lease(inode, O_WRONLY);
3545                 rc = filemap_fdatawrite(inode->i_mapping);
3546                 if (cinode->clientCanCacheRead == 0) {
3547                         rc = filemap_fdatawait(inode->i_mapping);
3548                         mapping_set_error(inode->i_mapping, rc);
3549                         cifs_invalidate_mapping(inode);
3550                 }
3551                 cFYI(1, "Oplock flush inode %p rc %d", inode, rc);
3552         }
3553
3554         rc = cifs_push_locks(cfile);
3555         if (rc)
3556                 cERROR(1, "Push locks rc = %d", rc);
3557
3558         /*
3559          * releasing stale oplock after recent reconnect of smb session using
3560          * a now incorrect file handle is not a data integrity issue but do
3561          * not bother sending an oplock release if session to server still is
3562          * disconnected since oplock already released by the server
3563          */
3564         if (!cfile->oplock_break_cancelled) {
3565                 rc = tcon->ses->server->ops->oplock_response(tcon, &cfile->fid,
3566                                                              cinode);
3567                 cFYI(1, "Oplock release rc = %d", rc);
3568         }
3569 }
3570
3571 const struct address_space_operations cifs_addr_ops = {
3572         .readpage = cifs_readpage,
3573         .readpages = cifs_readpages,
3574         .writepage = cifs_writepage,
3575         .writepages = cifs_writepages,
3576         .write_begin = cifs_write_begin,
3577         .write_end = cifs_write_end,
3578         .set_page_dirty = __set_page_dirty_nobuffers,
3579         .releasepage = cifs_release_page,
3580         .invalidatepage = cifs_invalidate_page,
3581         .launder_page = cifs_launder_page,
3582 };
3583
3584 /*
3585  * cifs_readpages requires the server to support a buffer large enough to
3586  * contain the header plus one complete page of data.  Otherwise, we need
3587  * to leave cifs_readpages out of the address space operations.
3588  */
3589 const struct address_space_operations cifs_addr_ops_smallbuf = {
3590         .readpage = cifs_readpage,
3591         .writepage = cifs_writepage,
3592         .writepages = cifs_writepages,
3593         .write_begin = cifs_write_begin,
3594         .write_end = cifs_write_end,
3595         .set_page_dirty = __set_page_dirty_nobuffers,
3596         .releasepage = cifs_release_page,
3597         .invalidatepage = cifs_invalidate_page,
3598         .launder_page = cifs_launder_page,
3599 };