CIFS: Fix cache coherency for read oplock case
[profile/ivi/kernel-adaptation-intel-automotive.git] / fs / cifs / file.c
1 /*
2  *   fs/cifs/file.c
3  *
4  *   vfs operations that deal with files
5  *
6  *   Copyright (C) International Business Machines  Corp., 2002,2010
7  *   Author(s): Steve French (sfrench@us.ibm.com)
8  *              Jeremy Allison (jra@samba.org)
9  *
10  *   This library is free software; you can redistribute it and/or modify
11  *   it under the terms of the GNU Lesser General Public License as published
12  *   by the Free Software Foundation; either version 2.1 of the License, or
13  *   (at your option) any later version.
14  *
15  *   This library is distributed in the hope that it will be useful,
16  *   but WITHOUT ANY WARRANTY; without even the implied warranty of
17  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
18  *   the GNU Lesser General Public License for more details.
19  *
20  *   You should have received a copy of the GNU Lesser General Public License
21  *   along with this library; if not, write to the Free Software
22  *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
23  */
24 #include <linux/fs.h>
25 #include <linux/backing-dev.h>
26 #include <linux/stat.h>
27 #include <linux/fcntl.h>
28 #include <linux/pagemap.h>
29 #include <linux/pagevec.h>
30 #include <linux/writeback.h>
31 #include <linux/task_io_accounting_ops.h>
32 #include <linux/delay.h>
33 #include <linux/mount.h>
34 #include <linux/slab.h>
35 #include <linux/swap.h>
36 #include <asm/div64.h>
37 #include "cifsfs.h"
38 #include "cifspdu.h"
39 #include "cifsglob.h"
40 #include "cifsproto.h"
41 #include "cifs_unicode.h"
42 #include "cifs_debug.h"
43 #include "cifs_fs_sb.h"
44 #include "fscache.h"
45
46 static inline int cifs_convert_flags(unsigned int flags)
47 {
48         if ((flags & O_ACCMODE) == O_RDONLY)
49                 return GENERIC_READ;
50         else if ((flags & O_ACCMODE) == O_WRONLY)
51                 return GENERIC_WRITE;
52         else if ((flags & O_ACCMODE) == O_RDWR) {
53                 /* GENERIC_ALL is too much permission to request
54                    can cause unnecessary access denied on create */
55                 /* return GENERIC_ALL; */
56                 return (GENERIC_READ | GENERIC_WRITE);
57         }
58
59         return (READ_CONTROL | FILE_WRITE_ATTRIBUTES | FILE_READ_ATTRIBUTES |
60                 FILE_WRITE_EA | FILE_APPEND_DATA | FILE_WRITE_DATA |
61                 FILE_READ_DATA);
62 }
63
64 static u32 cifs_posix_convert_flags(unsigned int flags)
65 {
66         u32 posix_flags = 0;
67
68         if ((flags & O_ACCMODE) == O_RDONLY)
69                 posix_flags = SMB_O_RDONLY;
70         else if ((flags & O_ACCMODE) == O_WRONLY)
71                 posix_flags = SMB_O_WRONLY;
72         else if ((flags & O_ACCMODE) == O_RDWR)
73                 posix_flags = SMB_O_RDWR;
74
75         if (flags & O_CREAT)
76                 posix_flags |= SMB_O_CREAT;
77         if (flags & O_EXCL)
78                 posix_flags |= SMB_O_EXCL;
79         if (flags & O_TRUNC)
80                 posix_flags |= SMB_O_TRUNC;
81         /* be safe and imply O_SYNC for O_DSYNC */
82         if (flags & O_DSYNC)
83                 posix_flags |= SMB_O_SYNC;
84         if (flags & O_DIRECTORY)
85                 posix_flags |= SMB_O_DIRECTORY;
86         if (flags & O_NOFOLLOW)
87                 posix_flags |= SMB_O_NOFOLLOW;
88         if (flags & O_DIRECT)
89                 posix_flags |= SMB_O_DIRECT;
90
91         return posix_flags;
92 }
93
94 static inline int cifs_get_disposition(unsigned int flags)
95 {
96         if ((flags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL))
97                 return FILE_CREATE;
98         else if ((flags & (O_CREAT | O_TRUNC)) == (O_CREAT | O_TRUNC))
99                 return FILE_OVERWRITE_IF;
100         else if ((flags & O_CREAT) == O_CREAT)
101                 return FILE_OPEN_IF;
102         else if ((flags & O_TRUNC) == O_TRUNC)
103                 return FILE_OVERWRITE;
104         else
105                 return FILE_OPEN;
106 }
107
108 int cifs_posix_open(char *full_path, struct inode **pinode,
109                         struct super_block *sb, int mode, unsigned int f_flags,
110                         __u32 *poplock, __u16 *pnetfid, unsigned int xid)
111 {
112         int rc;
113         FILE_UNIX_BASIC_INFO *presp_data;
114         __u32 posix_flags = 0;
115         struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
116         struct cifs_fattr fattr;
117         struct tcon_link *tlink;
118         struct cifs_tcon *tcon;
119
120         cFYI(1, "posix open %s", full_path);
121
122         presp_data = kzalloc(sizeof(FILE_UNIX_BASIC_INFO), GFP_KERNEL);
123         if (presp_data == NULL)
124                 return -ENOMEM;
125
126         tlink = cifs_sb_tlink(cifs_sb);
127         if (IS_ERR(tlink)) {
128                 rc = PTR_ERR(tlink);
129                 goto posix_open_ret;
130         }
131
132         tcon = tlink_tcon(tlink);
133         mode &= ~current_umask();
134
135         posix_flags = cifs_posix_convert_flags(f_flags);
136         rc = CIFSPOSIXCreate(xid, tcon, posix_flags, mode, pnetfid, presp_data,
137                              poplock, full_path, cifs_sb->local_nls,
138                              cifs_sb->mnt_cifs_flags &
139                                         CIFS_MOUNT_MAP_SPECIAL_CHR);
140         cifs_put_tlink(tlink);
141
142         if (rc)
143                 goto posix_open_ret;
144
145         if (presp_data->Type == cpu_to_le32(-1))
146                 goto posix_open_ret; /* open ok, caller does qpathinfo */
147
148         if (!pinode)
149                 goto posix_open_ret; /* caller does not need info */
150
151         cifs_unix_basic_to_fattr(&fattr, presp_data, cifs_sb);
152
153         /* get new inode and set it up */
154         if (*pinode == NULL) {
155                 cifs_fill_uniqueid(sb, &fattr);
156                 *pinode = cifs_iget(sb, &fattr);
157                 if (!*pinode) {
158                         rc = -ENOMEM;
159                         goto posix_open_ret;
160                 }
161         } else {
162                 cifs_fattr_to_inode(*pinode, &fattr);
163         }
164
165 posix_open_ret:
166         kfree(presp_data);
167         return rc;
168 }
169
170 static int
171 cifs_nt_open(char *full_path, struct inode *inode, struct cifs_sb_info *cifs_sb,
172              struct cifs_tcon *tcon, unsigned int f_flags, __u32 *oplock,
173              struct cifs_fid *fid, unsigned int xid)
174 {
175         int rc;
176         int desired_access;
177         int disposition;
178         int create_options = CREATE_NOT_DIR;
179         FILE_ALL_INFO *buf;
180         struct TCP_Server_Info *server = tcon->ses->server;
181
182         if (!server->ops->open)
183                 return -ENOSYS;
184
185         desired_access = cifs_convert_flags(f_flags);
186
187 /*********************************************************************
188  *  open flag mapping table:
189  *
190  *      POSIX Flag            CIFS Disposition
191  *      ----------            ----------------
192  *      O_CREAT               FILE_OPEN_IF
193  *      O_CREAT | O_EXCL      FILE_CREATE
194  *      O_CREAT | O_TRUNC     FILE_OVERWRITE_IF
195  *      O_TRUNC               FILE_OVERWRITE
196  *      none of the above     FILE_OPEN
197  *
198  *      Note that there is not a direct match between disposition
199  *      FILE_SUPERSEDE (ie create whether or not file exists although
200  *      O_CREAT | O_TRUNC is similar but truncates the existing
201  *      file rather than creating a new file as FILE_SUPERSEDE does
202  *      (which uses the attributes / metadata passed in on open call)
203  *?
204  *?  O_SYNC is a reasonable match to CIFS writethrough flag
205  *?  and the read write flags match reasonably.  O_LARGEFILE
206  *?  is irrelevant because largefile support is always used
207  *?  by this client. Flags O_APPEND, O_DIRECT, O_DIRECTORY,
208  *       O_FASYNC, O_NOFOLLOW, O_NONBLOCK need further investigation
209  *********************************************************************/
210
211         disposition = cifs_get_disposition(f_flags);
212
213         /* BB pass O_SYNC flag through on file attributes .. BB */
214
215         buf = kmalloc(sizeof(FILE_ALL_INFO), GFP_KERNEL);
216         if (!buf)
217                 return -ENOMEM;
218
219         if (backup_cred(cifs_sb))
220                 create_options |= CREATE_OPEN_BACKUP_INTENT;
221
222         rc = server->ops->open(xid, tcon, full_path, disposition,
223                                desired_access, create_options, fid, oplock, buf,
224                                cifs_sb);
225
226         if (rc)
227                 goto out;
228
229         if (tcon->unix_ext)
230                 rc = cifs_get_inode_info_unix(&inode, full_path, inode->i_sb,
231                                               xid);
232         else
233                 rc = cifs_get_inode_info(&inode, full_path, buf, inode->i_sb,
234                                          xid, &fid->netfid);
235
236 out:
237         kfree(buf);
238         return rc;
239 }
240
241 struct cifsFileInfo *
242 cifs_new_fileinfo(struct cifs_fid *fid, struct file *file,
243                   struct tcon_link *tlink, __u32 oplock)
244 {
245         struct dentry *dentry = file->f_path.dentry;
246         struct inode *inode = dentry->d_inode;
247         struct cifsInodeInfo *cinode = CIFS_I(inode);
248         struct cifsFileInfo *cfile;
249         struct cifs_fid_locks *fdlocks;
250
251         cfile = kzalloc(sizeof(struct cifsFileInfo), GFP_KERNEL);
252         if (cfile == NULL)
253                 return cfile;
254
255         fdlocks = kzalloc(sizeof(struct cifs_fid_locks), GFP_KERNEL);
256         if (!fdlocks) {
257                 kfree(cfile);
258                 return NULL;
259         }
260
261         INIT_LIST_HEAD(&fdlocks->locks);
262         fdlocks->cfile = cfile;
263         cfile->llist = fdlocks;
264         down_write(&cinode->lock_sem);
265         list_add(&fdlocks->llist, &cinode->llist);
266         up_write(&cinode->lock_sem);
267
268         cfile->count = 1;
269         cfile->pid = current->tgid;
270         cfile->uid = current_fsuid();
271         cfile->dentry = dget(dentry);
272         cfile->f_flags = file->f_flags;
273         cfile->invalidHandle = false;
274         cfile->tlink = cifs_get_tlink(tlink);
275         INIT_WORK(&cfile->oplock_break, cifs_oplock_break);
276         mutex_init(&cfile->fh_mutex);
277         tlink_tcon(tlink)->ses->server->ops->set_fid(cfile, fid, oplock);
278
279         spin_lock(&cifs_file_list_lock);
280         list_add(&cfile->tlist, &(tlink_tcon(tlink)->openFileList));
281         /* if readable file instance put first in list*/
282         if (file->f_mode & FMODE_READ)
283                 list_add(&cfile->flist, &cinode->openFileList);
284         else
285                 list_add_tail(&cfile->flist, &cinode->openFileList);
286         spin_unlock(&cifs_file_list_lock);
287
288         file->private_data = cfile;
289         return cfile;
290 }
291
292 struct cifsFileInfo *
293 cifsFileInfo_get(struct cifsFileInfo *cifs_file)
294 {
295         spin_lock(&cifs_file_list_lock);
296         cifsFileInfo_get_locked(cifs_file);
297         spin_unlock(&cifs_file_list_lock);
298         return cifs_file;
299 }
300
301 /*
302  * Release a reference on the file private data. This may involve closing
303  * the filehandle out on the server. Must be called without holding
304  * cifs_file_list_lock.
305  */
306 void cifsFileInfo_put(struct cifsFileInfo *cifs_file)
307 {
308         struct inode *inode = cifs_file->dentry->d_inode;
309         struct cifs_tcon *tcon = tlink_tcon(cifs_file->tlink);
310         struct cifsInodeInfo *cifsi = CIFS_I(inode);
311         struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
312         struct cifsLockInfo *li, *tmp;
313
314         spin_lock(&cifs_file_list_lock);
315         if (--cifs_file->count > 0) {
316                 spin_unlock(&cifs_file_list_lock);
317                 return;
318         }
319
320         /* remove it from the lists */
321         list_del(&cifs_file->flist);
322         list_del(&cifs_file->tlist);
323
324         if (list_empty(&cifsi->openFileList)) {
325                 cFYI(1, "closing last open instance for inode %p",
326                         cifs_file->dentry->d_inode);
327                 /*
328                  * In strict cache mode we need invalidate mapping on the last
329                  * close  because it may cause a error when we open this file
330                  * again and get at least level II oplock.
331                  */
332                 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO)
333                         CIFS_I(inode)->invalid_mapping = true;
334                 cifs_set_oplock_level(cifsi, 0);
335         }
336         spin_unlock(&cifs_file_list_lock);
337
338         cancel_work_sync(&cifs_file->oplock_break);
339
340         if (!tcon->need_reconnect && !cifs_file->invalidHandle) {
341                 struct TCP_Server_Info *server = tcon->ses->server;
342                 unsigned int xid;
343                 int rc = -ENOSYS;
344
345                 xid = get_xid();
346                 if (server->ops->close)
347                         rc = server->ops->close(xid, tcon, &cifs_file->fid);
348                 free_xid(xid);
349         }
350
351         /*
352          * Delete any outstanding lock records. We'll lose them when the file
353          * is closed anyway.
354          */
355         down_write(&cifsi->lock_sem);
356         list_for_each_entry_safe(li, tmp, &cifs_file->llist->locks, llist) {
357                 list_del(&li->llist);
358                 cifs_del_lock_waiters(li);
359                 kfree(li);
360         }
361         list_del(&cifs_file->llist->llist);
362         kfree(cifs_file->llist);
363         up_write(&cifsi->lock_sem);
364
365         cifs_put_tlink(cifs_file->tlink);
366         dput(cifs_file->dentry);
367         kfree(cifs_file);
368 }
369
370 int cifs_open(struct inode *inode, struct file *file)
371 {
372         int rc = -EACCES;
373         unsigned int xid;
374         __u32 oplock;
375         struct cifs_sb_info *cifs_sb;
376         struct TCP_Server_Info *server;
377         struct cifs_tcon *tcon;
378         struct tcon_link *tlink;
379         struct cifsFileInfo *cfile = NULL;
380         char *full_path = NULL;
381         bool posix_open_ok = false;
382         struct cifs_fid fid;
383
384         xid = get_xid();
385
386         cifs_sb = CIFS_SB(inode->i_sb);
387         tlink = cifs_sb_tlink(cifs_sb);
388         if (IS_ERR(tlink)) {
389                 free_xid(xid);
390                 return PTR_ERR(tlink);
391         }
392         tcon = tlink_tcon(tlink);
393         server = tcon->ses->server;
394
395         full_path = build_path_from_dentry(file->f_path.dentry);
396         if (full_path == NULL) {
397                 rc = -ENOMEM;
398                 goto out;
399         }
400
401         cFYI(1, "inode = 0x%p file flags are 0x%x for %s",
402                  inode, file->f_flags, full_path);
403
404         if (tcon->ses->server->oplocks)
405                 oplock = REQ_OPLOCK;
406         else
407                 oplock = 0;
408
409         if (!tcon->broken_posix_open && tcon->unix_ext &&
410             cap_unix(tcon->ses) && (CIFS_UNIX_POSIX_PATH_OPS_CAP &
411                                 le64_to_cpu(tcon->fsUnixInfo.Capability))) {
412                 /* can not refresh inode info since size could be stale */
413                 rc = cifs_posix_open(full_path, &inode, inode->i_sb,
414                                 cifs_sb->mnt_file_mode /* ignored */,
415                                 file->f_flags, &oplock, &fid.netfid, xid);
416                 if (rc == 0) {
417                         cFYI(1, "posix open succeeded");
418                         posix_open_ok = true;
419                 } else if ((rc == -EINVAL) || (rc == -EOPNOTSUPP)) {
420                         if (tcon->ses->serverNOS)
421                                 cERROR(1, "server %s of type %s returned"
422                                            " unexpected error on SMB posix open"
423                                            ", disabling posix open support."
424                                            " Check if server update available.",
425                                            tcon->ses->serverName,
426                                            tcon->ses->serverNOS);
427                         tcon->broken_posix_open = true;
428                 } else if ((rc != -EIO) && (rc != -EREMOTE) &&
429                          (rc != -EOPNOTSUPP)) /* path not found or net err */
430                         goto out;
431                 /*
432                  * Else fallthrough to retry open the old way on network i/o
433                  * or DFS errors.
434                  */
435         }
436
437         if (!posix_open_ok) {
438                 if (server->ops->get_lease_key)
439                         server->ops->get_lease_key(inode, &fid);
440
441                 rc = cifs_nt_open(full_path, inode, cifs_sb, tcon,
442                                   file->f_flags, &oplock, &fid, xid);
443                 if (rc)
444                         goto out;
445         }
446
447         cfile = cifs_new_fileinfo(&fid, file, tlink, oplock);
448         if (cfile == NULL) {
449                 if (server->ops->close)
450                         server->ops->close(xid, tcon, &fid);
451                 rc = -ENOMEM;
452                 goto out;
453         }
454
455         cifs_fscache_set_inode_cookie(inode, file);
456
457         if ((oplock & CIFS_CREATE_ACTION) && !posix_open_ok && tcon->unix_ext) {
458                 /*
459                  * Time to set mode which we can not set earlier due to
460                  * problems creating new read-only files.
461                  */
462                 struct cifs_unix_set_info_args args = {
463                         .mode   = inode->i_mode,
464                         .uid    = NO_CHANGE_64,
465                         .gid    = NO_CHANGE_64,
466                         .ctime  = NO_CHANGE_64,
467                         .atime  = NO_CHANGE_64,
468                         .mtime  = NO_CHANGE_64,
469                         .device = 0,
470                 };
471                 CIFSSMBUnixSetFileInfo(xid, tcon, &args, fid.netfid,
472                                        cfile->pid);
473         }
474
475 out:
476         kfree(full_path);
477         free_xid(xid);
478         cifs_put_tlink(tlink);
479         return rc;
480 }
481
482 /*
483  * Try to reacquire byte range locks that were released when session
484  * to server was lost
485  */
486 static int cifs_relock_file(struct cifsFileInfo *cifsFile)
487 {
488         int rc = 0;
489
490         /* BB list all locks open on this file and relock */
491
492         return rc;
493 }
494
495 static int
496 cifs_reopen_file(struct cifsFileInfo *cfile, bool can_flush)
497 {
498         int rc = -EACCES;
499         unsigned int xid;
500         __u32 oplock;
501         struct cifs_sb_info *cifs_sb;
502         struct cifs_tcon *tcon;
503         struct TCP_Server_Info *server;
504         struct cifsInodeInfo *cinode;
505         struct inode *inode;
506         char *full_path = NULL;
507         int desired_access;
508         int disposition = FILE_OPEN;
509         int create_options = CREATE_NOT_DIR;
510         struct cifs_fid fid;
511
512         xid = get_xid();
513         mutex_lock(&cfile->fh_mutex);
514         if (!cfile->invalidHandle) {
515                 mutex_unlock(&cfile->fh_mutex);
516                 rc = 0;
517                 free_xid(xid);
518                 return rc;
519         }
520
521         inode = cfile->dentry->d_inode;
522         cifs_sb = CIFS_SB(inode->i_sb);
523         tcon = tlink_tcon(cfile->tlink);
524         server = tcon->ses->server;
525
526         /*
527          * Can not grab rename sem here because various ops, including those
528          * that already have the rename sem can end up causing writepage to get
529          * called and if the server was down that means we end up here, and we
530          * can never tell if the caller already has the rename_sem.
531          */
532         full_path = build_path_from_dentry(cfile->dentry);
533         if (full_path == NULL) {
534                 rc = -ENOMEM;
535                 mutex_unlock(&cfile->fh_mutex);
536                 free_xid(xid);
537                 return rc;
538         }
539
540         cFYI(1, "inode = 0x%p file flags 0x%x for %s", inode, cfile->f_flags,
541              full_path);
542
543         if (tcon->ses->server->oplocks)
544                 oplock = REQ_OPLOCK;
545         else
546                 oplock = 0;
547
548         if (tcon->unix_ext && cap_unix(tcon->ses) &&
549             (CIFS_UNIX_POSIX_PATH_OPS_CAP &
550                                 le64_to_cpu(tcon->fsUnixInfo.Capability))) {
551                 /*
552                  * O_CREAT, O_EXCL and O_TRUNC already had their effect on the
553                  * original open. Must mask them off for a reopen.
554                  */
555                 unsigned int oflags = cfile->f_flags &
556                                                 ~(O_CREAT | O_EXCL | O_TRUNC);
557
558                 rc = cifs_posix_open(full_path, NULL, inode->i_sb,
559                                      cifs_sb->mnt_file_mode /* ignored */,
560                                      oflags, &oplock, &fid.netfid, xid);
561                 if (rc == 0) {
562                         cFYI(1, "posix reopen succeeded");
563                         goto reopen_success;
564                 }
565                 /*
566                  * fallthrough to retry open the old way on errors, especially
567                  * in the reconnect path it is important to retry hard
568                  */
569         }
570
571         desired_access = cifs_convert_flags(cfile->f_flags);
572
573         if (backup_cred(cifs_sb))
574                 create_options |= CREATE_OPEN_BACKUP_INTENT;
575
576         if (server->ops->get_lease_key)
577                 server->ops->get_lease_key(inode, &fid);
578
579         /*
580          * Can not refresh inode by passing in file_info buf to be returned by
581          * CIFSSMBOpen and then calling get_inode_info with returned buf since
582          * file might have write behind data that needs to be flushed and server
583          * version of file size can be stale. If we knew for sure that inode was
584          * not dirty locally we could do this.
585          */
586         rc = server->ops->open(xid, tcon, full_path, disposition,
587                                desired_access, create_options, &fid, &oplock,
588                                NULL, cifs_sb);
589         if (rc) {
590                 mutex_unlock(&cfile->fh_mutex);
591                 cFYI(1, "cifs_reopen returned 0x%x", rc);
592                 cFYI(1, "oplock: %d", oplock);
593                 goto reopen_error_exit;
594         }
595
596 reopen_success:
597         cfile->invalidHandle = false;
598         mutex_unlock(&cfile->fh_mutex);
599         cinode = CIFS_I(inode);
600
601         if (can_flush) {
602                 rc = filemap_write_and_wait(inode->i_mapping);
603                 mapping_set_error(inode->i_mapping, rc);
604
605                 if (tcon->unix_ext)
606                         rc = cifs_get_inode_info_unix(&inode, full_path,
607                                                       inode->i_sb, xid);
608                 else
609                         rc = cifs_get_inode_info(&inode, full_path, NULL,
610                                                  inode->i_sb, xid, NULL);
611         }
612         /*
613          * Else we are writing out data to server already and could deadlock if
614          * we tried to flush data, and since we do not know if we have data that
615          * would invalidate the current end of file on the server we can not go
616          * to the server to get the new inode info.
617          */
618
619         server->ops->set_fid(cfile, &fid, oplock);
620         cifs_relock_file(cfile);
621
622 reopen_error_exit:
623         kfree(full_path);
624         free_xid(xid);
625         return rc;
626 }
627
628 int cifs_close(struct inode *inode, struct file *file)
629 {
630         if (file->private_data != NULL) {
631                 cifsFileInfo_put(file->private_data);
632                 file->private_data = NULL;
633         }
634
635         /* return code from the ->release op is always ignored */
636         return 0;
637 }
638
639 int cifs_closedir(struct inode *inode, struct file *file)
640 {
641         int rc = 0;
642         unsigned int xid;
643         struct cifsFileInfo *cfile = file->private_data;
644         struct cifs_tcon *tcon;
645         struct TCP_Server_Info *server;
646         char *buf;
647
648         cFYI(1, "Closedir inode = 0x%p", inode);
649
650         if (cfile == NULL)
651                 return rc;
652
653         xid = get_xid();
654         tcon = tlink_tcon(cfile->tlink);
655         server = tcon->ses->server;
656
657         cFYI(1, "Freeing private data in close dir");
658         spin_lock(&cifs_file_list_lock);
659         if (!cfile->srch_inf.endOfSearch && !cfile->invalidHandle) {
660                 cfile->invalidHandle = true;
661                 spin_unlock(&cifs_file_list_lock);
662                 if (server->ops->close_dir)
663                         rc = server->ops->close_dir(xid, tcon, &cfile->fid);
664                 else
665                         rc = -ENOSYS;
666                 cFYI(1, "Closing uncompleted readdir with rc %d", rc);
667                 /* not much we can do if it fails anyway, ignore rc */
668                 rc = 0;
669         } else
670                 spin_unlock(&cifs_file_list_lock);
671
672         buf = cfile->srch_inf.ntwrk_buf_start;
673         if (buf) {
674                 cFYI(1, "closedir free smb buf in srch struct");
675                 cfile->srch_inf.ntwrk_buf_start = NULL;
676                 if (cfile->srch_inf.smallBuf)
677                         cifs_small_buf_release(buf);
678                 else
679                         cifs_buf_release(buf);
680         }
681
682         cifs_put_tlink(cfile->tlink);
683         kfree(file->private_data);
684         file->private_data = NULL;
685         /* BB can we lock the filestruct while this is going on? */
686         free_xid(xid);
687         return rc;
688 }
689
690 static struct cifsLockInfo *
691 cifs_lock_init(__u64 offset, __u64 length, __u8 type)
692 {
693         struct cifsLockInfo *lock =
694                 kmalloc(sizeof(struct cifsLockInfo), GFP_KERNEL);
695         if (!lock)
696                 return lock;
697         lock->offset = offset;
698         lock->length = length;
699         lock->type = type;
700         lock->pid = current->tgid;
701         INIT_LIST_HEAD(&lock->blist);
702         init_waitqueue_head(&lock->block_q);
703         return lock;
704 }
705
706 void
707 cifs_del_lock_waiters(struct cifsLockInfo *lock)
708 {
709         struct cifsLockInfo *li, *tmp;
710         list_for_each_entry_safe(li, tmp, &lock->blist, blist) {
711                 list_del_init(&li->blist);
712                 wake_up(&li->block_q);
713         }
714 }
715
716 static bool
717 cifs_find_fid_lock_conflict(struct cifs_fid_locks *fdlocks, __u64 offset,
718                             __u64 length, __u8 type, struct cifsFileInfo *cfile,
719                             struct cifsLockInfo **conf_lock, bool rw_check)
720 {
721         struct cifsLockInfo *li;
722         struct cifsFileInfo *cur_cfile = fdlocks->cfile;
723         struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
724
725         list_for_each_entry(li, &fdlocks->locks, llist) {
726                 if (offset + length <= li->offset ||
727                     offset >= li->offset + li->length)
728                         continue;
729                 if (rw_check && server->ops->compare_fids(cfile, cur_cfile) &&
730                     current->tgid == li->pid)
731                         continue;
732                 if ((type & server->vals->shared_lock_type) &&
733                     ((server->ops->compare_fids(cfile, cur_cfile) &&
734                      current->tgid == li->pid) || type == li->type))
735                         continue;
736                 if (conf_lock)
737                         *conf_lock = li;
738                 return true;
739         }
740         return false;
741 }
742
743 bool
744 cifs_find_lock_conflict(struct cifsFileInfo *cfile, __u64 offset, __u64 length,
745                         __u8 type, struct cifsLockInfo **conf_lock,
746                         bool rw_check)
747 {
748         bool rc = false;
749         struct cifs_fid_locks *cur;
750         struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode);
751
752         list_for_each_entry(cur, &cinode->llist, llist) {
753                 rc = cifs_find_fid_lock_conflict(cur, offset, length, type,
754                                                  cfile, conf_lock, rw_check);
755                 if (rc)
756                         break;
757         }
758
759         return rc;
760 }
761
762 /*
763  * Check if there is another lock that prevents us to set the lock (mandatory
764  * style). If such a lock exists, update the flock structure with its
765  * properties. Otherwise, set the flock type to F_UNLCK if we can cache brlocks
766  * or leave it the same if we can't. Returns 0 if we don't need to request to
767  * the server or 1 otherwise.
768  */
769 static int
770 cifs_lock_test(struct cifsFileInfo *cfile, __u64 offset, __u64 length,
771                __u8 type, struct file_lock *flock)
772 {
773         int rc = 0;
774         struct cifsLockInfo *conf_lock;
775         struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode);
776         struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
777         bool exist;
778
779         down_read(&cinode->lock_sem);
780
781         exist = cifs_find_lock_conflict(cfile, offset, length, type,
782                                         &conf_lock, false);
783         if (exist) {
784                 flock->fl_start = conf_lock->offset;
785                 flock->fl_end = conf_lock->offset + conf_lock->length - 1;
786                 flock->fl_pid = conf_lock->pid;
787                 if (conf_lock->type & server->vals->shared_lock_type)
788                         flock->fl_type = F_RDLCK;
789                 else
790                         flock->fl_type = F_WRLCK;
791         } else if (!cinode->can_cache_brlcks)
792                 rc = 1;
793         else
794                 flock->fl_type = F_UNLCK;
795
796         up_read(&cinode->lock_sem);
797         return rc;
798 }
799
800 static void
801 cifs_lock_add(struct cifsFileInfo *cfile, struct cifsLockInfo *lock)
802 {
803         struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode);
804         down_write(&cinode->lock_sem);
805         list_add_tail(&lock->llist, &cfile->llist->locks);
806         up_write(&cinode->lock_sem);
807 }
808
809 /*
810  * Set the byte-range lock (mandatory style). Returns:
811  * 1) 0, if we set the lock and don't need to request to the server;
812  * 2) 1, if no locks prevent us but we need to request to the server;
813  * 3) -EACCESS, if there is a lock that prevents us and wait is false.
814  */
815 static int
816 cifs_lock_add_if(struct cifsFileInfo *cfile, struct cifsLockInfo *lock,
817                  bool wait)
818 {
819         struct cifsLockInfo *conf_lock;
820         struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode);
821         bool exist;
822         int rc = 0;
823
824 try_again:
825         exist = false;
826         down_write(&cinode->lock_sem);
827
828         exist = cifs_find_lock_conflict(cfile, lock->offset, lock->length,
829                                         lock->type, &conf_lock, false);
830         if (!exist && cinode->can_cache_brlcks) {
831                 list_add_tail(&lock->llist, &cfile->llist->locks);
832                 up_write(&cinode->lock_sem);
833                 return rc;
834         }
835
836         if (!exist)
837                 rc = 1;
838         else if (!wait)
839                 rc = -EACCES;
840         else {
841                 list_add_tail(&lock->blist, &conf_lock->blist);
842                 up_write(&cinode->lock_sem);
843                 rc = wait_event_interruptible(lock->block_q,
844                                         (lock->blist.prev == &lock->blist) &&
845                                         (lock->blist.next == &lock->blist));
846                 if (!rc)
847                         goto try_again;
848                 down_write(&cinode->lock_sem);
849                 list_del_init(&lock->blist);
850         }
851
852         up_write(&cinode->lock_sem);
853         return rc;
854 }
855
856 /*
857  * Check if there is another lock that prevents us to set the lock (posix
858  * style). If such a lock exists, update the flock structure with its
859  * properties. Otherwise, set the flock type to F_UNLCK if we can cache brlocks
860  * or leave it the same if we can't. Returns 0 if we don't need to request to
861  * the server or 1 otherwise.
862  */
863 static int
864 cifs_posix_lock_test(struct file *file, struct file_lock *flock)
865 {
866         int rc = 0;
867         struct cifsInodeInfo *cinode = CIFS_I(file->f_path.dentry->d_inode);
868         unsigned char saved_type = flock->fl_type;
869
870         if ((flock->fl_flags & FL_POSIX) == 0)
871                 return 1;
872
873         down_read(&cinode->lock_sem);
874         posix_test_lock(file, flock);
875
876         if (flock->fl_type == F_UNLCK && !cinode->can_cache_brlcks) {
877                 flock->fl_type = saved_type;
878                 rc = 1;
879         }
880
881         up_read(&cinode->lock_sem);
882         return rc;
883 }
884
885 /*
886  * Set the byte-range lock (posix style). Returns:
887  * 1) 0, if we set the lock and don't need to request to the server;
888  * 2) 1, if we need to request to the server;
889  * 3) <0, if the error occurs while setting the lock.
890  */
891 static int
892 cifs_posix_lock_set(struct file *file, struct file_lock *flock)
893 {
894         struct cifsInodeInfo *cinode = CIFS_I(file->f_path.dentry->d_inode);
895         int rc = 1;
896
897         if ((flock->fl_flags & FL_POSIX) == 0)
898                 return rc;
899
900 try_again:
901         down_write(&cinode->lock_sem);
902         if (!cinode->can_cache_brlcks) {
903                 up_write(&cinode->lock_sem);
904                 return rc;
905         }
906
907         rc = posix_lock_file(file, flock, NULL);
908         up_write(&cinode->lock_sem);
909         if (rc == FILE_LOCK_DEFERRED) {
910                 rc = wait_event_interruptible(flock->fl_wait, !flock->fl_next);
911                 if (!rc)
912                         goto try_again;
913                 locks_delete_block(flock);
914         }
915         return rc;
916 }
917
918 int
919 cifs_push_mandatory_locks(struct cifsFileInfo *cfile)
920 {
921         unsigned int xid;
922         int rc = 0, stored_rc;
923         struct cifsLockInfo *li, *tmp;
924         struct cifs_tcon *tcon;
925         struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode);
926         unsigned int num, max_num, max_buf;
927         LOCKING_ANDX_RANGE *buf, *cur;
928         int types[] = {LOCKING_ANDX_LARGE_FILES,
929                        LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES};
930         int i;
931
932         xid = get_xid();
933         tcon = tlink_tcon(cfile->tlink);
934
935         /* we are going to update can_cache_brlcks here - need a write access */
936         down_write(&cinode->lock_sem);
937         if (!cinode->can_cache_brlcks) {
938                 up_write(&cinode->lock_sem);
939                 free_xid(xid);
940                 return rc;
941         }
942
943         /*
944          * Accessing maxBuf is racy with cifs_reconnect - need to store value
945          * and check it for zero before using.
946          */
947         max_buf = tcon->ses->server->maxBuf;
948         if (!max_buf) {
949                 up_write(&cinode->lock_sem);
950                 free_xid(xid);
951                 return -EINVAL;
952         }
953
954         max_num = (max_buf - sizeof(struct smb_hdr)) /
955                                                 sizeof(LOCKING_ANDX_RANGE);
956         buf = kzalloc(max_num * sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
957         if (!buf) {
958                 up_write(&cinode->lock_sem);
959                 free_xid(xid);
960                 return -ENOMEM;
961         }
962
963         for (i = 0; i < 2; i++) {
964                 cur = buf;
965                 num = 0;
966                 list_for_each_entry_safe(li, tmp, &cfile->llist->locks, llist) {
967                         if (li->type != types[i])
968                                 continue;
969                         cur->Pid = cpu_to_le16(li->pid);
970                         cur->LengthLow = cpu_to_le32((u32)li->length);
971                         cur->LengthHigh = cpu_to_le32((u32)(li->length>>32));
972                         cur->OffsetLow = cpu_to_le32((u32)li->offset);
973                         cur->OffsetHigh = cpu_to_le32((u32)(li->offset>>32));
974                         if (++num == max_num) {
975                                 stored_rc = cifs_lockv(xid, tcon,
976                                                        cfile->fid.netfid,
977                                                        (__u8)li->type, 0, num,
978                                                        buf);
979                                 if (stored_rc)
980                                         rc = stored_rc;
981                                 cur = buf;
982                                 num = 0;
983                         } else
984                                 cur++;
985                 }
986
987                 if (num) {
988                         stored_rc = cifs_lockv(xid, tcon, cfile->fid.netfid,
989                                                (__u8)types[i], 0, num, buf);
990                         if (stored_rc)
991                                 rc = stored_rc;
992                 }
993         }
994
995         cinode->can_cache_brlcks = false;
996         up_write(&cinode->lock_sem);
997
998         kfree(buf);
999         free_xid(xid);
1000         return rc;
1001 }
1002
1003 /* copied from fs/locks.c with a name change */
1004 #define cifs_for_each_lock(inode, lockp) \
1005         for (lockp = &inode->i_flock; *lockp != NULL; \
1006              lockp = &(*lockp)->fl_next)
1007
1008 struct lock_to_push {
1009         struct list_head llist;
1010         __u64 offset;
1011         __u64 length;
1012         __u32 pid;
1013         __u16 netfid;
1014         __u8 type;
1015 };
1016
1017 static int
1018 cifs_push_posix_locks(struct cifsFileInfo *cfile)
1019 {
1020         struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode);
1021         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1022         struct file_lock *flock, **before;
1023         unsigned int count = 0, i = 0;
1024         int rc = 0, xid, type;
1025         struct list_head locks_to_send, *el;
1026         struct lock_to_push *lck, *tmp;
1027         __u64 length;
1028
1029         xid = get_xid();
1030
1031         /* we are going to update can_cache_brlcks here - need a write access */
1032         down_write(&cinode->lock_sem);
1033         if (!cinode->can_cache_brlcks) {
1034                 up_write(&cinode->lock_sem);
1035                 free_xid(xid);
1036                 return rc;
1037         }
1038
1039         lock_flocks();
1040         cifs_for_each_lock(cfile->dentry->d_inode, before) {
1041                 if ((*before)->fl_flags & FL_POSIX)
1042                         count++;
1043         }
1044         unlock_flocks();
1045
1046         INIT_LIST_HEAD(&locks_to_send);
1047
1048         /*
1049          * Allocating count locks is enough because no FL_POSIX locks can be
1050          * added to the list while we are holding cinode->lock_sem that
1051          * protects locking operations of this inode.
1052          */
1053         for (; i < count; i++) {
1054                 lck = kmalloc(sizeof(struct lock_to_push), GFP_KERNEL);
1055                 if (!lck) {
1056                         rc = -ENOMEM;
1057                         goto err_out;
1058                 }
1059                 list_add_tail(&lck->llist, &locks_to_send);
1060         }
1061
1062         el = locks_to_send.next;
1063         lock_flocks();
1064         cifs_for_each_lock(cfile->dentry->d_inode, before) {
1065                 flock = *before;
1066                 if ((flock->fl_flags & FL_POSIX) == 0)
1067                         continue;
1068                 if (el == &locks_to_send) {
1069                         /*
1070                          * The list ended. We don't have enough allocated
1071                          * structures - something is really wrong.
1072                          */
1073                         cERROR(1, "Can't push all brlocks!");
1074                         break;
1075                 }
1076                 length = 1 + flock->fl_end - flock->fl_start;
1077                 if (flock->fl_type == F_RDLCK || flock->fl_type == F_SHLCK)
1078                         type = CIFS_RDLCK;
1079                 else
1080                         type = CIFS_WRLCK;
1081                 lck = list_entry(el, struct lock_to_push, llist);
1082                 lck->pid = flock->fl_pid;
1083                 lck->netfid = cfile->fid.netfid;
1084                 lck->length = length;
1085                 lck->type = type;
1086                 lck->offset = flock->fl_start;
1087                 el = el->next;
1088         }
1089         unlock_flocks();
1090
1091         list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) {
1092                 int stored_rc;
1093
1094                 stored_rc = CIFSSMBPosixLock(xid, tcon, lck->netfid, lck->pid,
1095                                              lck->offset, lck->length, NULL,
1096                                              lck->type, 0);
1097                 if (stored_rc)
1098                         rc = stored_rc;
1099                 list_del(&lck->llist);
1100                 kfree(lck);
1101         }
1102
1103 out:
1104         cinode->can_cache_brlcks = false;
1105         up_write(&cinode->lock_sem);
1106
1107         free_xid(xid);
1108         return rc;
1109 err_out:
1110         list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) {
1111                 list_del(&lck->llist);
1112                 kfree(lck);
1113         }
1114         goto out;
1115 }
1116
1117 static int
1118 cifs_push_locks(struct cifsFileInfo *cfile)
1119 {
1120         struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
1121         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1122
1123         if (cap_unix(tcon->ses) &&
1124             (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
1125             ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
1126                 return cifs_push_posix_locks(cfile);
1127
1128         return tcon->ses->server->ops->push_mand_locks(cfile);
1129 }
1130
1131 static void
1132 cifs_read_flock(struct file_lock *flock, __u32 *type, int *lock, int *unlock,
1133                 bool *wait_flag, struct TCP_Server_Info *server)
1134 {
1135         if (flock->fl_flags & FL_POSIX)
1136                 cFYI(1, "Posix");
1137         if (flock->fl_flags & FL_FLOCK)
1138                 cFYI(1, "Flock");
1139         if (flock->fl_flags & FL_SLEEP) {
1140                 cFYI(1, "Blocking lock");
1141                 *wait_flag = true;
1142         }
1143         if (flock->fl_flags & FL_ACCESS)
1144                 cFYI(1, "Process suspended by mandatory locking - "
1145                         "not implemented yet");
1146         if (flock->fl_flags & FL_LEASE)
1147                 cFYI(1, "Lease on file - not implemented yet");
1148         if (flock->fl_flags &
1149             (~(FL_POSIX | FL_FLOCK | FL_SLEEP | FL_ACCESS | FL_LEASE)))
1150                 cFYI(1, "Unknown lock flags 0x%x", flock->fl_flags);
1151
1152         *type = server->vals->large_lock_type;
1153         if (flock->fl_type == F_WRLCK) {
1154                 cFYI(1, "F_WRLCK ");
1155                 *type |= server->vals->exclusive_lock_type;
1156                 *lock = 1;
1157         } else if (flock->fl_type == F_UNLCK) {
1158                 cFYI(1, "F_UNLCK");
1159                 *type |= server->vals->unlock_lock_type;
1160                 *unlock = 1;
1161                 /* Check if unlock includes more than one lock range */
1162         } else if (flock->fl_type == F_RDLCK) {
1163                 cFYI(1, "F_RDLCK");
1164                 *type |= server->vals->shared_lock_type;
1165                 *lock = 1;
1166         } else if (flock->fl_type == F_EXLCK) {
1167                 cFYI(1, "F_EXLCK");
1168                 *type |= server->vals->exclusive_lock_type;
1169                 *lock = 1;
1170         } else if (flock->fl_type == F_SHLCK) {
1171                 cFYI(1, "F_SHLCK");
1172                 *type |= server->vals->shared_lock_type;
1173                 *lock = 1;
1174         } else
1175                 cFYI(1, "Unknown type of lock");
1176 }
1177
1178 static int
1179 cifs_getlk(struct file *file, struct file_lock *flock, __u32 type,
1180            bool wait_flag, bool posix_lck, unsigned int xid)
1181 {
1182         int rc = 0;
1183         __u64 length = 1 + flock->fl_end - flock->fl_start;
1184         struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
1185         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1186         struct TCP_Server_Info *server = tcon->ses->server;
1187         __u16 netfid = cfile->fid.netfid;
1188
1189         if (posix_lck) {
1190                 int posix_lock_type;
1191
1192                 rc = cifs_posix_lock_test(file, flock);
1193                 if (!rc)
1194                         return rc;
1195
1196                 if (type & server->vals->shared_lock_type)
1197                         posix_lock_type = CIFS_RDLCK;
1198                 else
1199                         posix_lock_type = CIFS_WRLCK;
1200                 rc = CIFSSMBPosixLock(xid, tcon, netfid, current->tgid,
1201                                       flock->fl_start, length, flock,
1202                                       posix_lock_type, wait_flag);
1203                 return rc;
1204         }
1205
1206         rc = cifs_lock_test(cfile, flock->fl_start, length, type, flock);
1207         if (!rc)
1208                 return rc;
1209
1210         /* BB we could chain these into one lock request BB */
1211         rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length, type,
1212                                     1, 0, false);
1213         if (rc == 0) {
1214                 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1215                                             type, 0, 1, false);
1216                 flock->fl_type = F_UNLCK;
1217                 if (rc != 0)
1218                         cERROR(1, "Error unlocking previously locked "
1219                                   "range %d during test of lock", rc);
1220                 return 0;
1221         }
1222
1223         if (type & server->vals->shared_lock_type) {
1224                 flock->fl_type = F_WRLCK;
1225                 return 0;
1226         }
1227
1228         type &= ~server->vals->exclusive_lock_type;
1229
1230         rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1231                                     type | server->vals->shared_lock_type,
1232                                     1, 0, false);
1233         if (rc == 0) {
1234                 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1235                         type | server->vals->shared_lock_type, 0, 1, false);
1236                 flock->fl_type = F_RDLCK;
1237                 if (rc != 0)
1238                         cERROR(1, "Error unlocking previously locked "
1239                                   "range %d during test of lock", rc);
1240         } else
1241                 flock->fl_type = F_WRLCK;
1242
1243         return 0;
1244 }
1245
1246 void
1247 cifs_move_llist(struct list_head *source, struct list_head *dest)
1248 {
1249         struct list_head *li, *tmp;
1250         list_for_each_safe(li, tmp, source)
1251                 list_move(li, dest);
1252 }
1253
1254 void
1255 cifs_free_llist(struct list_head *llist)
1256 {
1257         struct cifsLockInfo *li, *tmp;
1258         list_for_each_entry_safe(li, tmp, llist, llist) {
1259                 cifs_del_lock_waiters(li);
1260                 list_del(&li->llist);
1261                 kfree(li);
1262         }
1263 }
1264
1265 int
1266 cifs_unlock_range(struct cifsFileInfo *cfile, struct file_lock *flock,
1267                   unsigned int xid)
1268 {
1269         int rc = 0, stored_rc;
1270         int types[] = {LOCKING_ANDX_LARGE_FILES,
1271                        LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES};
1272         unsigned int i;
1273         unsigned int max_num, num, max_buf;
1274         LOCKING_ANDX_RANGE *buf, *cur;
1275         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1276         struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode);
1277         struct cifsLockInfo *li, *tmp;
1278         __u64 length = 1 + flock->fl_end - flock->fl_start;
1279         struct list_head tmp_llist;
1280
1281         INIT_LIST_HEAD(&tmp_llist);
1282
1283         /*
1284          * Accessing maxBuf is racy with cifs_reconnect - need to store value
1285          * and check it for zero before using.
1286          */
1287         max_buf = tcon->ses->server->maxBuf;
1288         if (!max_buf)
1289                 return -EINVAL;
1290
1291         max_num = (max_buf - sizeof(struct smb_hdr)) /
1292                                                 sizeof(LOCKING_ANDX_RANGE);
1293         buf = kzalloc(max_num * sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
1294         if (!buf)
1295                 return -ENOMEM;
1296
1297         down_write(&cinode->lock_sem);
1298         for (i = 0; i < 2; i++) {
1299                 cur = buf;
1300                 num = 0;
1301                 list_for_each_entry_safe(li, tmp, &cfile->llist->locks, llist) {
1302                         if (flock->fl_start > li->offset ||
1303                             (flock->fl_start + length) <
1304                             (li->offset + li->length))
1305                                 continue;
1306                         if (current->tgid != li->pid)
1307                                 continue;
1308                         if (types[i] != li->type)
1309                                 continue;
1310                         if (cinode->can_cache_brlcks) {
1311                                 /*
1312                                  * We can cache brlock requests - simply remove
1313                                  * a lock from the file's list.
1314                                  */
1315                                 list_del(&li->llist);
1316                                 cifs_del_lock_waiters(li);
1317                                 kfree(li);
1318                                 continue;
1319                         }
1320                         cur->Pid = cpu_to_le16(li->pid);
1321                         cur->LengthLow = cpu_to_le32((u32)li->length);
1322                         cur->LengthHigh = cpu_to_le32((u32)(li->length>>32));
1323                         cur->OffsetLow = cpu_to_le32((u32)li->offset);
1324                         cur->OffsetHigh = cpu_to_le32((u32)(li->offset>>32));
1325                         /*
1326                          * We need to save a lock here to let us add it again to
1327                          * the file's list if the unlock range request fails on
1328                          * the server.
1329                          */
1330                         list_move(&li->llist, &tmp_llist);
1331                         if (++num == max_num) {
1332                                 stored_rc = cifs_lockv(xid, tcon,
1333                                                        cfile->fid.netfid,
1334                                                        li->type, num, 0, buf);
1335                                 if (stored_rc) {
1336                                         /*
1337                                          * We failed on the unlock range
1338                                          * request - add all locks from the tmp
1339                                          * list to the head of the file's list.
1340                                          */
1341                                         cifs_move_llist(&tmp_llist,
1342                                                         &cfile->llist->locks);
1343                                         rc = stored_rc;
1344                                 } else
1345                                         /*
1346                                          * The unlock range request succeed -
1347                                          * free the tmp list.
1348                                          */
1349                                         cifs_free_llist(&tmp_llist);
1350                                 cur = buf;
1351                                 num = 0;
1352                         } else
1353                                 cur++;
1354                 }
1355                 if (num) {
1356                         stored_rc = cifs_lockv(xid, tcon, cfile->fid.netfid,
1357                                                types[i], num, 0, buf);
1358                         if (stored_rc) {
1359                                 cifs_move_llist(&tmp_llist,
1360                                                 &cfile->llist->locks);
1361                                 rc = stored_rc;
1362                         } else
1363                                 cifs_free_llist(&tmp_llist);
1364                 }
1365         }
1366
1367         up_write(&cinode->lock_sem);
1368         kfree(buf);
1369         return rc;
1370 }
1371
1372 static int
1373 cifs_setlk(struct file *file, struct file_lock *flock, __u32 type,
1374            bool wait_flag, bool posix_lck, int lock, int unlock,
1375            unsigned int xid)
1376 {
1377         int rc = 0;
1378         __u64 length = 1 + flock->fl_end - flock->fl_start;
1379         struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
1380         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1381         struct TCP_Server_Info *server = tcon->ses->server;
1382
1383         if (posix_lck) {
1384                 int posix_lock_type;
1385
1386                 rc = cifs_posix_lock_set(file, flock);
1387                 if (!rc || rc < 0)
1388                         return rc;
1389
1390                 if (type & server->vals->shared_lock_type)
1391                         posix_lock_type = CIFS_RDLCK;
1392                 else
1393                         posix_lock_type = CIFS_WRLCK;
1394
1395                 if (unlock == 1)
1396                         posix_lock_type = CIFS_UNLCK;
1397
1398                 rc = CIFSSMBPosixLock(xid, tcon, cfile->fid.netfid,
1399                                       current->tgid, flock->fl_start, length,
1400                                       NULL, posix_lock_type, wait_flag);
1401                 goto out;
1402         }
1403
1404         if (lock) {
1405                 struct cifsLockInfo *lock;
1406
1407                 lock = cifs_lock_init(flock->fl_start, length, type);
1408                 if (!lock)
1409                         return -ENOMEM;
1410
1411                 rc = cifs_lock_add_if(cfile, lock, wait_flag);
1412                 if (rc < 0)
1413                         kfree(lock);
1414                 if (rc <= 0)
1415                         goto out;
1416
1417                 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1418                                             type, 1, 0, wait_flag);
1419                 if (rc) {
1420                         kfree(lock);
1421                         goto out;
1422                 }
1423
1424                 cifs_lock_add(cfile, lock);
1425         } else if (unlock)
1426                 rc = server->ops->mand_unlock_range(cfile, flock, xid);
1427
1428 out:
1429         if (flock->fl_flags & FL_POSIX)
1430                 posix_lock_file_wait(file, flock);
1431         return rc;
1432 }
1433
1434 int cifs_lock(struct file *file, int cmd, struct file_lock *flock)
1435 {
1436         int rc, xid;
1437         int lock = 0, unlock = 0;
1438         bool wait_flag = false;
1439         bool posix_lck = false;
1440         struct cifs_sb_info *cifs_sb;
1441         struct cifs_tcon *tcon;
1442         struct cifsInodeInfo *cinode;
1443         struct cifsFileInfo *cfile;
1444         __u16 netfid;
1445         __u32 type;
1446
1447         rc = -EACCES;
1448         xid = get_xid();
1449
1450         cFYI(1, "Lock parm: 0x%x flockflags: 0x%x flocktype: 0x%x start: %lld "
1451                 "end: %lld", cmd, flock->fl_flags, flock->fl_type,
1452                 flock->fl_start, flock->fl_end);
1453
1454         cfile = (struct cifsFileInfo *)file->private_data;
1455         tcon = tlink_tcon(cfile->tlink);
1456
1457         cifs_read_flock(flock, &type, &lock, &unlock, &wait_flag,
1458                         tcon->ses->server);
1459
1460         cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
1461         netfid = cfile->fid.netfid;
1462         cinode = CIFS_I(file->f_path.dentry->d_inode);
1463
1464         if (cap_unix(tcon->ses) &&
1465             (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
1466             ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
1467                 posix_lck = true;
1468         /*
1469          * BB add code here to normalize offset and length to account for
1470          * negative length which we can not accept over the wire.
1471          */
1472         if (IS_GETLK(cmd)) {
1473                 rc = cifs_getlk(file, flock, type, wait_flag, posix_lck, xid);
1474                 free_xid(xid);
1475                 return rc;
1476         }
1477
1478         if (!lock && !unlock) {
1479                 /*
1480                  * if no lock or unlock then nothing to do since we do not
1481                  * know what it is
1482                  */
1483                 free_xid(xid);
1484                 return -EOPNOTSUPP;
1485         }
1486
1487         rc = cifs_setlk(file, flock, type, wait_flag, posix_lck, lock, unlock,
1488                         xid);
1489         free_xid(xid);
1490         return rc;
1491 }
1492
1493 /*
1494  * update the file size (if needed) after a write. Should be called with
1495  * the inode->i_lock held
1496  */
1497 void
1498 cifs_update_eof(struct cifsInodeInfo *cifsi, loff_t offset,
1499                       unsigned int bytes_written)
1500 {
1501         loff_t end_of_write = offset + bytes_written;
1502
1503         if (end_of_write > cifsi->server_eof)
1504                 cifsi->server_eof = end_of_write;
1505 }
1506
1507 static ssize_t
1508 cifs_write(struct cifsFileInfo *open_file, __u32 pid, const char *write_data,
1509            size_t write_size, loff_t *offset)
1510 {
1511         int rc = 0;
1512         unsigned int bytes_written = 0;
1513         unsigned int total_written;
1514         struct cifs_sb_info *cifs_sb;
1515         struct cifs_tcon *tcon;
1516         struct TCP_Server_Info *server;
1517         unsigned int xid;
1518         struct dentry *dentry = open_file->dentry;
1519         struct cifsInodeInfo *cifsi = CIFS_I(dentry->d_inode);
1520         struct cifs_io_parms io_parms;
1521
1522         cifs_sb = CIFS_SB(dentry->d_sb);
1523
1524         cFYI(1, "write %zd bytes to offset %lld of %s", write_size,
1525              *offset, dentry->d_name.name);
1526
1527         tcon = tlink_tcon(open_file->tlink);
1528         server = tcon->ses->server;
1529
1530         if (!server->ops->sync_write)
1531                 return -ENOSYS;
1532
1533         xid = get_xid();
1534
1535         for (total_written = 0; write_size > total_written;
1536              total_written += bytes_written) {
1537                 rc = -EAGAIN;
1538                 while (rc == -EAGAIN) {
1539                         struct kvec iov[2];
1540                         unsigned int len;
1541
1542                         if (open_file->invalidHandle) {
1543                                 /* we could deadlock if we called
1544                                    filemap_fdatawait from here so tell
1545                                    reopen_file not to flush data to
1546                                    server now */
1547                                 rc = cifs_reopen_file(open_file, false);
1548                                 if (rc != 0)
1549                                         break;
1550                         }
1551
1552                         len = min((size_t)cifs_sb->wsize,
1553                                   write_size - total_written);
1554                         /* iov[0] is reserved for smb header */
1555                         iov[1].iov_base = (char *)write_data + total_written;
1556                         iov[1].iov_len = len;
1557                         io_parms.pid = pid;
1558                         io_parms.tcon = tcon;
1559                         io_parms.offset = *offset;
1560                         io_parms.length = len;
1561                         rc = server->ops->sync_write(xid, open_file, &io_parms,
1562                                                      &bytes_written, iov, 1);
1563                 }
1564                 if (rc || (bytes_written == 0)) {
1565                         if (total_written)
1566                                 break;
1567                         else {
1568                                 free_xid(xid);
1569                                 return rc;
1570                         }
1571                 } else {
1572                         spin_lock(&dentry->d_inode->i_lock);
1573                         cifs_update_eof(cifsi, *offset, bytes_written);
1574                         spin_unlock(&dentry->d_inode->i_lock);
1575                         *offset += bytes_written;
1576                 }
1577         }
1578
1579         cifs_stats_bytes_written(tcon, total_written);
1580
1581         if (total_written > 0) {
1582                 spin_lock(&dentry->d_inode->i_lock);
1583                 if (*offset > dentry->d_inode->i_size)
1584                         i_size_write(dentry->d_inode, *offset);
1585                 spin_unlock(&dentry->d_inode->i_lock);
1586         }
1587         mark_inode_dirty_sync(dentry->d_inode);
1588         free_xid(xid);
1589         return total_written;
1590 }
1591
1592 struct cifsFileInfo *find_readable_file(struct cifsInodeInfo *cifs_inode,
1593                                         bool fsuid_only)
1594 {
1595         struct cifsFileInfo *open_file = NULL;
1596         struct cifs_sb_info *cifs_sb = CIFS_SB(cifs_inode->vfs_inode.i_sb);
1597
1598         /* only filter by fsuid on multiuser mounts */
1599         if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
1600                 fsuid_only = false;
1601
1602         spin_lock(&cifs_file_list_lock);
1603         /* we could simply get the first_list_entry since write-only entries
1604            are always at the end of the list but since the first entry might
1605            have a close pending, we go through the whole list */
1606         list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
1607                 if (fsuid_only && open_file->uid != current_fsuid())
1608                         continue;
1609                 if (OPEN_FMODE(open_file->f_flags) & FMODE_READ) {
1610                         if (!open_file->invalidHandle) {
1611                                 /* found a good file */
1612                                 /* lock it so it will not be closed on us */
1613                                 cifsFileInfo_get_locked(open_file);
1614                                 spin_unlock(&cifs_file_list_lock);
1615                                 return open_file;
1616                         } /* else might as well continue, and look for
1617                              another, or simply have the caller reopen it
1618                              again rather than trying to fix this handle */
1619                 } else /* write only file */
1620                         break; /* write only files are last so must be done */
1621         }
1622         spin_unlock(&cifs_file_list_lock);
1623         return NULL;
1624 }
1625
1626 struct cifsFileInfo *find_writable_file(struct cifsInodeInfo *cifs_inode,
1627                                         bool fsuid_only)
1628 {
1629         struct cifsFileInfo *open_file, *inv_file = NULL;
1630         struct cifs_sb_info *cifs_sb;
1631         bool any_available = false;
1632         int rc;
1633         unsigned int refind = 0;
1634
1635         /* Having a null inode here (because mapping->host was set to zero by
1636         the VFS or MM) should not happen but we had reports of on oops (due to
1637         it being zero) during stress testcases so we need to check for it */
1638
1639         if (cifs_inode == NULL) {
1640                 cERROR(1, "Null inode passed to cifs_writeable_file");
1641                 dump_stack();
1642                 return NULL;
1643         }
1644
1645         cifs_sb = CIFS_SB(cifs_inode->vfs_inode.i_sb);
1646
1647         /* only filter by fsuid on multiuser mounts */
1648         if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
1649                 fsuid_only = false;
1650
1651         spin_lock(&cifs_file_list_lock);
1652 refind_writable:
1653         if (refind > MAX_REOPEN_ATT) {
1654                 spin_unlock(&cifs_file_list_lock);
1655                 return NULL;
1656         }
1657         list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
1658                 if (!any_available && open_file->pid != current->tgid)
1659                         continue;
1660                 if (fsuid_only && open_file->uid != current_fsuid())
1661                         continue;
1662                 if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) {
1663                         if (!open_file->invalidHandle) {
1664                                 /* found a good writable file */
1665                                 cifsFileInfo_get_locked(open_file);
1666                                 spin_unlock(&cifs_file_list_lock);
1667                                 return open_file;
1668                         } else {
1669                                 if (!inv_file)
1670                                         inv_file = open_file;
1671                         }
1672                 }
1673         }
1674         /* couldn't find useable FH with same pid, try any available */
1675         if (!any_available) {
1676                 any_available = true;
1677                 goto refind_writable;
1678         }
1679
1680         if (inv_file) {
1681                 any_available = false;
1682                 cifsFileInfo_get_locked(inv_file);
1683         }
1684
1685         spin_unlock(&cifs_file_list_lock);
1686
1687         if (inv_file) {
1688                 rc = cifs_reopen_file(inv_file, false);
1689                 if (!rc)
1690                         return inv_file;
1691                 else {
1692                         spin_lock(&cifs_file_list_lock);
1693                         list_move_tail(&inv_file->flist,
1694                                         &cifs_inode->openFileList);
1695                         spin_unlock(&cifs_file_list_lock);
1696                         cifsFileInfo_put(inv_file);
1697                         spin_lock(&cifs_file_list_lock);
1698                         ++refind;
1699                         goto refind_writable;
1700                 }
1701         }
1702
1703         return NULL;
1704 }
1705
1706 static int cifs_partialpagewrite(struct page *page, unsigned from, unsigned to)
1707 {
1708         struct address_space *mapping = page->mapping;
1709         loff_t offset = (loff_t)page->index << PAGE_CACHE_SHIFT;
1710         char *write_data;
1711         int rc = -EFAULT;
1712         int bytes_written = 0;
1713         struct inode *inode;
1714         struct cifsFileInfo *open_file;
1715
1716         if (!mapping || !mapping->host)
1717                 return -EFAULT;
1718
1719         inode = page->mapping->host;
1720
1721         offset += (loff_t)from;
1722         write_data = kmap(page);
1723         write_data += from;
1724
1725         if ((to > PAGE_CACHE_SIZE) || (from > to)) {
1726                 kunmap(page);
1727                 return -EIO;
1728         }
1729
1730         /* racing with truncate? */
1731         if (offset > mapping->host->i_size) {
1732                 kunmap(page);
1733                 return 0; /* don't care */
1734         }
1735
1736         /* check to make sure that we are not extending the file */
1737         if (mapping->host->i_size - offset < (loff_t)to)
1738                 to = (unsigned)(mapping->host->i_size - offset);
1739
1740         open_file = find_writable_file(CIFS_I(mapping->host), false);
1741         if (open_file) {
1742                 bytes_written = cifs_write(open_file, open_file->pid,
1743                                            write_data, to - from, &offset);
1744                 cifsFileInfo_put(open_file);
1745                 /* Does mm or vfs already set times? */
1746                 inode->i_atime = inode->i_mtime = current_fs_time(inode->i_sb);
1747                 if ((bytes_written > 0) && (offset))
1748                         rc = 0;
1749                 else if (bytes_written < 0)
1750                         rc = bytes_written;
1751         } else {
1752                 cFYI(1, "No writeable filehandles for inode");
1753                 rc = -EIO;
1754         }
1755
1756         kunmap(page);
1757         return rc;
1758 }
1759
1760 static int cifs_writepages(struct address_space *mapping,
1761                            struct writeback_control *wbc)
1762 {
1763         struct cifs_sb_info *cifs_sb = CIFS_SB(mapping->host->i_sb);
1764         bool done = false, scanned = false, range_whole = false;
1765         pgoff_t end, index;
1766         struct cifs_writedata *wdata;
1767         struct TCP_Server_Info *server;
1768         struct page *page;
1769         int rc = 0;
1770         loff_t isize = i_size_read(mapping->host);
1771
1772         /*
1773          * If wsize is smaller than the page cache size, default to writing
1774          * one page at a time via cifs_writepage
1775          */
1776         if (cifs_sb->wsize < PAGE_CACHE_SIZE)
1777                 return generic_writepages(mapping, wbc);
1778
1779         if (wbc->range_cyclic) {
1780                 index = mapping->writeback_index; /* Start from prev offset */
1781                 end = -1;
1782         } else {
1783                 index = wbc->range_start >> PAGE_CACHE_SHIFT;
1784                 end = wbc->range_end >> PAGE_CACHE_SHIFT;
1785                 if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
1786                         range_whole = true;
1787                 scanned = true;
1788         }
1789 retry:
1790         while (!done && index <= end) {
1791                 unsigned int i, nr_pages, found_pages;
1792                 pgoff_t next = 0, tofind;
1793                 struct page **pages;
1794
1795                 tofind = min((cifs_sb->wsize / PAGE_CACHE_SIZE) - 1,
1796                                 end - index) + 1;
1797
1798                 wdata = cifs_writedata_alloc((unsigned int)tofind,
1799                                              cifs_writev_complete);
1800                 if (!wdata) {
1801                         rc = -ENOMEM;
1802                         break;
1803                 }
1804
1805                 /*
1806                  * find_get_pages_tag seems to return a max of 256 on each
1807                  * iteration, so we must call it several times in order to
1808                  * fill the array or the wsize is effectively limited to
1809                  * 256 * PAGE_CACHE_SIZE.
1810                  */
1811                 found_pages = 0;
1812                 pages = wdata->pages;
1813                 do {
1814                         nr_pages = find_get_pages_tag(mapping, &index,
1815                                                         PAGECACHE_TAG_DIRTY,
1816                                                         tofind, pages);
1817                         found_pages += nr_pages;
1818                         tofind -= nr_pages;
1819                         pages += nr_pages;
1820                 } while (nr_pages && tofind && index <= end);
1821
1822                 if (found_pages == 0) {
1823                         kref_put(&wdata->refcount, cifs_writedata_release);
1824                         break;
1825                 }
1826
1827                 nr_pages = 0;
1828                 for (i = 0; i < found_pages; i++) {
1829                         page = wdata->pages[i];
1830                         /*
1831                          * At this point we hold neither mapping->tree_lock nor
1832                          * lock on the page itself: the page may be truncated or
1833                          * invalidated (changing page->mapping to NULL), or even
1834                          * swizzled back from swapper_space to tmpfs file
1835                          * mapping
1836                          */
1837
1838                         if (nr_pages == 0)
1839                                 lock_page(page);
1840                         else if (!trylock_page(page))
1841                                 break;
1842
1843                         if (unlikely(page->mapping != mapping)) {
1844                                 unlock_page(page);
1845                                 break;
1846                         }
1847
1848                         if (!wbc->range_cyclic && page->index > end) {
1849                                 done = true;
1850                                 unlock_page(page);
1851                                 break;
1852                         }
1853
1854                         if (next && (page->index != next)) {
1855                                 /* Not next consecutive page */
1856                                 unlock_page(page);
1857                                 break;
1858                         }
1859
1860                         if (wbc->sync_mode != WB_SYNC_NONE)
1861                                 wait_on_page_writeback(page);
1862
1863                         if (PageWriteback(page) ||
1864                                         !clear_page_dirty_for_io(page)) {
1865                                 unlock_page(page);
1866                                 break;
1867                         }
1868
1869                         /*
1870                          * This actually clears the dirty bit in the radix tree.
1871                          * See cifs_writepage() for more commentary.
1872                          */
1873                         set_page_writeback(page);
1874
1875                         if (page_offset(page) >= isize) {
1876                                 done = true;
1877                                 unlock_page(page);
1878                                 end_page_writeback(page);
1879                                 break;
1880                         }
1881
1882                         wdata->pages[i] = page;
1883                         next = page->index + 1;
1884                         ++nr_pages;
1885                 }
1886
1887                 /* reset index to refind any pages skipped */
1888                 if (nr_pages == 0)
1889                         index = wdata->pages[0]->index + 1;
1890
1891                 /* put any pages we aren't going to use */
1892                 for (i = nr_pages; i < found_pages; i++) {
1893                         page_cache_release(wdata->pages[i]);
1894                         wdata->pages[i] = NULL;
1895                 }
1896
1897                 /* nothing to write? */
1898                 if (nr_pages == 0) {
1899                         kref_put(&wdata->refcount, cifs_writedata_release);
1900                         continue;
1901                 }
1902
1903                 wdata->sync_mode = wbc->sync_mode;
1904                 wdata->nr_pages = nr_pages;
1905                 wdata->offset = page_offset(wdata->pages[0]);
1906                 wdata->pagesz = PAGE_CACHE_SIZE;
1907                 wdata->tailsz =
1908                         min(isize - page_offset(wdata->pages[nr_pages - 1]),
1909                             (loff_t)PAGE_CACHE_SIZE);
1910                 wdata->bytes = ((nr_pages - 1) * PAGE_CACHE_SIZE) +
1911                                         wdata->tailsz;
1912
1913                 do {
1914                         if (wdata->cfile != NULL)
1915                                 cifsFileInfo_put(wdata->cfile);
1916                         wdata->cfile = find_writable_file(CIFS_I(mapping->host),
1917                                                           false);
1918                         if (!wdata->cfile) {
1919                                 cERROR(1, "No writable handles for inode");
1920                                 rc = -EBADF;
1921                                 break;
1922                         }
1923                         wdata->pid = wdata->cfile->pid;
1924                         server = tlink_tcon(wdata->cfile->tlink)->ses->server;
1925                         rc = server->ops->async_writev(wdata);
1926                 } while (wbc->sync_mode == WB_SYNC_ALL && rc == -EAGAIN);
1927
1928                 for (i = 0; i < nr_pages; ++i)
1929                         unlock_page(wdata->pages[i]);
1930
1931                 /* send failure -- clean up the mess */
1932                 if (rc != 0) {
1933                         for (i = 0; i < nr_pages; ++i) {
1934                                 if (rc == -EAGAIN)
1935                                         redirty_page_for_writepage(wbc,
1936                                                            wdata->pages[i]);
1937                                 else
1938                                         SetPageError(wdata->pages[i]);
1939                                 end_page_writeback(wdata->pages[i]);
1940                                 page_cache_release(wdata->pages[i]);
1941                         }
1942                         if (rc != -EAGAIN)
1943                                 mapping_set_error(mapping, rc);
1944                 }
1945                 kref_put(&wdata->refcount, cifs_writedata_release);
1946
1947                 wbc->nr_to_write -= nr_pages;
1948                 if (wbc->nr_to_write <= 0)
1949                         done = true;
1950
1951                 index = next;
1952         }
1953
1954         if (!scanned && !done) {
1955                 /*
1956                  * We hit the last page and there is more work to be done: wrap
1957                  * back to the start of the file
1958                  */
1959                 scanned = true;
1960                 index = 0;
1961                 goto retry;
1962         }
1963
1964         if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0))
1965                 mapping->writeback_index = index;
1966
1967         return rc;
1968 }
1969
1970 static int
1971 cifs_writepage_locked(struct page *page, struct writeback_control *wbc)
1972 {
1973         int rc;
1974         unsigned int xid;
1975
1976         xid = get_xid();
1977 /* BB add check for wbc flags */
1978         page_cache_get(page);
1979         if (!PageUptodate(page))
1980                 cFYI(1, "ppw - page not up to date");
1981
1982         /*
1983          * Set the "writeback" flag, and clear "dirty" in the radix tree.
1984          *
1985          * A writepage() implementation always needs to do either this,
1986          * or re-dirty the page with "redirty_page_for_writepage()" in
1987          * the case of a failure.
1988          *
1989          * Just unlocking the page will cause the radix tree tag-bits
1990          * to fail to update with the state of the page correctly.
1991          */
1992         set_page_writeback(page);
1993 retry_write:
1994         rc = cifs_partialpagewrite(page, 0, PAGE_CACHE_SIZE);
1995         if (rc == -EAGAIN && wbc->sync_mode == WB_SYNC_ALL)
1996                 goto retry_write;
1997         else if (rc == -EAGAIN)
1998                 redirty_page_for_writepage(wbc, page);
1999         else if (rc != 0)
2000                 SetPageError(page);
2001         else
2002                 SetPageUptodate(page);
2003         end_page_writeback(page);
2004         page_cache_release(page);
2005         free_xid(xid);
2006         return rc;
2007 }
2008
2009 static int cifs_writepage(struct page *page, struct writeback_control *wbc)
2010 {
2011         int rc = cifs_writepage_locked(page, wbc);
2012         unlock_page(page);
2013         return rc;
2014 }
2015
2016 static int cifs_write_end(struct file *file, struct address_space *mapping,
2017                         loff_t pos, unsigned len, unsigned copied,
2018                         struct page *page, void *fsdata)
2019 {
2020         int rc;
2021         struct inode *inode = mapping->host;
2022         struct cifsFileInfo *cfile = file->private_data;
2023         struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
2024         __u32 pid;
2025
2026         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
2027                 pid = cfile->pid;
2028         else
2029                 pid = current->tgid;
2030
2031         cFYI(1, "write_end for page %p from pos %lld with %d bytes",
2032                  page, pos, copied);
2033
2034         if (PageChecked(page)) {
2035                 if (copied == len)
2036                         SetPageUptodate(page);
2037                 ClearPageChecked(page);
2038         } else if (!PageUptodate(page) && copied == PAGE_CACHE_SIZE)
2039                 SetPageUptodate(page);
2040
2041         if (!PageUptodate(page)) {
2042                 char *page_data;
2043                 unsigned offset = pos & (PAGE_CACHE_SIZE - 1);
2044                 unsigned int xid;
2045
2046                 xid = get_xid();
2047                 /* this is probably better than directly calling
2048                    partialpage_write since in this function the file handle is
2049                    known which we might as well leverage */
2050                 /* BB check if anything else missing out of ppw
2051                    such as updating last write time */
2052                 page_data = kmap(page);
2053                 rc = cifs_write(cfile, pid, page_data + offset, copied, &pos);
2054                 /* if (rc < 0) should we set writebehind rc? */
2055                 kunmap(page);
2056
2057                 free_xid(xid);
2058         } else {
2059                 rc = copied;
2060                 pos += copied;
2061                 set_page_dirty(page);
2062         }
2063
2064         if (rc > 0) {
2065                 spin_lock(&inode->i_lock);
2066                 if (pos > inode->i_size)
2067                         i_size_write(inode, pos);
2068                 spin_unlock(&inode->i_lock);
2069         }
2070
2071         unlock_page(page);
2072         page_cache_release(page);
2073
2074         return rc;
2075 }
2076
2077 int cifs_strict_fsync(struct file *file, loff_t start, loff_t end,
2078                       int datasync)
2079 {
2080         unsigned int xid;
2081         int rc = 0;
2082         struct cifs_tcon *tcon;
2083         struct TCP_Server_Info *server;
2084         struct cifsFileInfo *smbfile = file->private_data;
2085         struct inode *inode = file->f_path.dentry->d_inode;
2086         struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
2087
2088         rc = filemap_write_and_wait_range(inode->i_mapping, start, end);
2089         if (rc)
2090                 return rc;
2091         mutex_lock(&inode->i_mutex);
2092
2093         xid = get_xid();
2094
2095         cFYI(1, "Sync file - name: %s datasync: 0x%x",
2096                 file->f_path.dentry->d_name.name, datasync);
2097
2098         if (!CIFS_I(inode)->clientCanCacheRead) {
2099                 rc = cifs_invalidate_mapping(inode);
2100                 if (rc) {
2101                         cFYI(1, "rc: %d during invalidate phase", rc);
2102                         rc = 0; /* don't care about it in fsync */
2103                 }
2104         }
2105
2106         tcon = tlink_tcon(smbfile->tlink);
2107         if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) {
2108                 server = tcon->ses->server;
2109                 if (server->ops->flush)
2110                         rc = server->ops->flush(xid, tcon, &smbfile->fid);
2111                 else
2112                         rc = -ENOSYS;
2113         }
2114
2115         free_xid(xid);
2116         mutex_unlock(&inode->i_mutex);
2117         return rc;
2118 }
2119
2120 int cifs_fsync(struct file *file, loff_t start, loff_t end, int datasync)
2121 {
2122         unsigned int xid;
2123         int rc = 0;
2124         struct cifs_tcon *tcon;
2125         struct TCP_Server_Info *server;
2126         struct cifsFileInfo *smbfile = file->private_data;
2127         struct cifs_sb_info *cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
2128         struct inode *inode = file->f_mapping->host;
2129
2130         rc = filemap_write_and_wait_range(inode->i_mapping, start, end);
2131         if (rc)
2132                 return rc;
2133         mutex_lock(&inode->i_mutex);
2134
2135         xid = get_xid();
2136
2137         cFYI(1, "Sync file - name: %s datasync: 0x%x",
2138                 file->f_path.dentry->d_name.name, datasync);
2139
2140         tcon = tlink_tcon(smbfile->tlink);
2141         if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) {
2142                 server = tcon->ses->server;
2143                 if (server->ops->flush)
2144                         rc = server->ops->flush(xid, tcon, &smbfile->fid);
2145                 else
2146                         rc = -ENOSYS;
2147         }
2148
2149         free_xid(xid);
2150         mutex_unlock(&inode->i_mutex);
2151         return rc;
2152 }
2153
2154 /*
2155  * As file closes, flush all cached write data for this inode checking
2156  * for write behind errors.
2157  */
2158 int cifs_flush(struct file *file, fl_owner_t id)
2159 {
2160         struct inode *inode = file->f_path.dentry->d_inode;
2161         int rc = 0;
2162
2163         if (file->f_mode & FMODE_WRITE)
2164                 rc = filemap_write_and_wait(inode->i_mapping);
2165
2166         cFYI(1, "Flush inode %p file %p rc %d", inode, file, rc);
2167
2168         return rc;
2169 }
2170
2171 static int
2172 cifs_write_allocate_pages(struct page **pages, unsigned long num_pages)
2173 {
2174         int rc = 0;
2175         unsigned long i;
2176
2177         for (i = 0; i < num_pages; i++) {
2178                 pages[i] = alloc_page(GFP_KERNEL|__GFP_HIGHMEM);
2179                 if (!pages[i]) {
2180                         /*
2181                          * save number of pages we have already allocated and
2182                          * return with ENOMEM error
2183                          */
2184                         num_pages = i;
2185                         rc = -ENOMEM;
2186                         break;
2187                 }
2188         }
2189
2190         if (rc) {
2191                 for (i = 0; i < num_pages; i++)
2192                         put_page(pages[i]);
2193         }
2194         return rc;
2195 }
2196
2197 static inline
2198 size_t get_numpages(const size_t wsize, const size_t len, size_t *cur_len)
2199 {
2200         size_t num_pages;
2201         size_t clen;
2202
2203         clen = min_t(const size_t, len, wsize);
2204         num_pages = DIV_ROUND_UP(clen, PAGE_SIZE);
2205
2206         if (cur_len)
2207                 *cur_len = clen;
2208
2209         return num_pages;
2210 }
2211
2212 static void
2213 cifs_uncached_writev_complete(struct work_struct *work)
2214 {
2215         int i;
2216         struct cifs_writedata *wdata = container_of(work,
2217                                         struct cifs_writedata, work);
2218         struct inode *inode = wdata->cfile->dentry->d_inode;
2219         struct cifsInodeInfo *cifsi = CIFS_I(inode);
2220
2221         spin_lock(&inode->i_lock);
2222         cifs_update_eof(cifsi, wdata->offset, wdata->bytes);
2223         if (cifsi->server_eof > inode->i_size)
2224                 i_size_write(inode, cifsi->server_eof);
2225         spin_unlock(&inode->i_lock);
2226
2227         complete(&wdata->done);
2228
2229         if (wdata->result != -EAGAIN) {
2230                 for (i = 0; i < wdata->nr_pages; i++)
2231                         put_page(wdata->pages[i]);
2232         }
2233
2234         kref_put(&wdata->refcount, cifs_writedata_release);
2235 }
2236
2237 /* attempt to send write to server, retry on any -EAGAIN errors */
2238 static int
2239 cifs_uncached_retry_writev(struct cifs_writedata *wdata)
2240 {
2241         int rc;
2242         struct TCP_Server_Info *server;
2243
2244         server = tlink_tcon(wdata->cfile->tlink)->ses->server;
2245
2246         do {
2247                 if (wdata->cfile->invalidHandle) {
2248                         rc = cifs_reopen_file(wdata->cfile, false);
2249                         if (rc != 0)
2250                                 continue;
2251                 }
2252                 rc = server->ops->async_writev(wdata);
2253         } while (rc == -EAGAIN);
2254
2255         return rc;
2256 }
2257
2258 static ssize_t
2259 cifs_iovec_write(struct file *file, const struct iovec *iov,
2260                  unsigned long nr_segs, loff_t *poffset)
2261 {
2262         unsigned long nr_pages, i;
2263         size_t copied, len, cur_len;
2264         ssize_t total_written = 0;
2265         loff_t offset;
2266         struct iov_iter it;
2267         struct cifsFileInfo *open_file;
2268         struct cifs_tcon *tcon;
2269         struct cifs_sb_info *cifs_sb;
2270         struct cifs_writedata *wdata, *tmp;
2271         struct list_head wdata_list;
2272         int rc;
2273         pid_t pid;
2274
2275         len = iov_length(iov, nr_segs);
2276         if (!len)
2277                 return 0;
2278
2279         rc = generic_write_checks(file, poffset, &len, 0);
2280         if (rc)
2281                 return rc;
2282
2283         INIT_LIST_HEAD(&wdata_list);
2284         cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
2285         open_file = file->private_data;
2286         tcon = tlink_tcon(open_file->tlink);
2287
2288         if (!tcon->ses->server->ops->async_writev)
2289                 return -ENOSYS;
2290
2291         offset = *poffset;
2292
2293         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
2294                 pid = open_file->pid;
2295         else
2296                 pid = current->tgid;
2297
2298         iov_iter_init(&it, iov, nr_segs, len, 0);
2299         do {
2300                 size_t save_len;
2301
2302                 nr_pages = get_numpages(cifs_sb->wsize, len, &cur_len);
2303                 wdata = cifs_writedata_alloc(nr_pages,
2304                                              cifs_uncached_writev_complete);
2305                 if (!wdata) {
2306                         rc = -ENOMEM;
2307                         break;
2308                 }
2309
2310                 rc = cifs_write_allocate_pages(wdata->pages, nr_pages);
2311                 if (rc) {
2312                         kfree(wdata);
2313                         break;
2314                 }
2315
2316                 save_len = cur_len;
2317                 for (i = 0; i < nr_pages; i++) {
2318                         copied = min_t(const size_t, cur_len, PAGE_SIZE);
2319                         copied = iov_iter_copy_from_user(wdata->pages[i], &it,
2320                                                          0, copied);
2321                         cur_len -= copied;
2322                         iov_iter_advance(&it, copied);
2323                 }
2324                 cur_len = save_len - cur_len;
2325
2326                 wdata->sync_mode = WB_SYNC_ALL;
2327                 wdata->nr_pages = nr_pages;
2328                 wdata->offset = (__u64)offset;
2329                 wdata->cfile = cifsFileInfo_get(open_file);
2330                 wdata->pid = pid;
2331                 wdata->bytes = cur_len;
2332                 wdata->pagesz = PAGE_SIZE;
2333                 wdata->tailsz = cur_len - ((nr_pages - 1) * PAGE_SIZE);
2334                 rc = cifs_uncached_retry_writev(wdata);
2335                 if (rc) {
2336                         kref_put(&wdata->refcount, cifs_writedata_release);
2337                         break;
2338                 }
2339
2340                 list_add_tail(&wdata->list, &wdata_list);
2341                 offset += cur_len;
2342                 len -= cur_len;
2343         } while (len > 0);
2344
2345         /*
2346          * If at least one write was successfully sent, then discard any rc
2347          * value from the later writes. If the other write succeeds, then
2348          * we'll end up returning whatever was written. If it fails, then
2349          * we'll get a new rc value from that.
2350          */
2351         if (!list_empty(&wdata_list))
2352                 rc = 0;
2353
2354         /*
2355          * Wait for and collect replies for any successful sends in order of
2356          * increasing offset. Once an error is hit or we get a fatal signal
2357          * while waiting, then return without waiting for any more replies.
2358          */
2359 restart_loop:
2360         list_for_each_entry_safe(wdata, tmp, &wdata_list, list) {
2361                 if (!rc) {
2362                         /* FIXME: freezable too? */
2363                         rc = wait_for_completion_killable(&wdata->done);
2364                         if (rc)
2365                                 rc = -EINTR;
2366                         else if (wdata->result)
2367                                 rc = wdata->result;
2368                         else
2369                                 total_written += wdata->bytes;
2370
2371                         /* resend call if it's a retryable error */
2372                         if (rc == -EAGAIN) {
2373                                 rc = cifs_uncached_retry_writev(wdata);
2374                                 goto restart_loop;
2375                         }
2376                 }
2377                 list_del_init(&wdata->list);
2378                 kref_put(&wdata->refcount, cifs_writedata_release);
2379         }
2380
2381         if (total_written > 0)
2382                 *poffset += total_written;
2383
2384         cifs_stats_bytes_written(tcon, total_written);
2385         return total_written ? total_written : (ssize_t)rc;
2386 }
2387
2388 ssize_t cifs_user_writev(struct kiocb *iocb, const struct iovec *iov,
2389                                 unsigned long nr_segs, loff_t pos)
2390 {
2391         ssize_t written;
2392         struct inode *inode;
2393
2394         inode = iocb->ki_filp->f_path.dentry->d_inode;
2395
2396         /*
2397          * BB - optimize the way when signing is disabled. We can drop this
2398          * extra memory-to-memory copying and use iovec buffers for constructing
2399          * write request.
2400          */
2401
2402         written = cifs_iovec_write(iocb->ki_filp, iov, nr_segs, &pos);
2403         if (written > 0) {
2404                 CIFS_I(inode)->invalid_mapping = true;
2405                 iocb->ki_pos = pos;
2406         }
2407
2408         return written;
2409 }
2410
2411 static ssize_t
2412 cifs_writev(struct kiocb *iocb, const struct iovec *iov,
2413             unsigned long nr_segs, loff_t pos)
2414 {
2415         struct file *file = iocb->ki_filp;
2416         struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
2417         struct inode *inode = file->f_mapping->host;
2418         struct cifsInodeInfo *cinode = CIFS_I(inode);
2419         struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
2420         ssize_t rc = -EACCES;
2421
2422         BUG_ON(iocb->ki_pos != pos);
2423
2424         sb_start_write(inode->i_sb);
2425
2426         /*
2427          * We need to hold the sem to be sure nobody modifies lock list
2428          * with a brlock that prevents writing.
2429          */
2430         down_read(&cinode->lock_sem);
2431         if (!cifs_find_lock_conflict(cfile, pos, iov_length(iov, nr_segs),
2432                                      server->vals->exclusive_lock_type, NULL,
2433                                      true)) {
2434                 mutex_lock(&inode->i_mutex);
2435                 rc = __generic_file_aio_write(iocb, iov, nr_segs,
2436                                                &iocb->ki_pos);
2437                 mutex_unlock(&inode->i_mutex);
2438         }
2439
2440         if (rc > 0 || rc == -EIOCBQUEUED) {
2441                 ssize_t err;
2442
2443                 err = generic_write_sync(file, pos, rc);
2444                 if (err < 0 && rc > 0)
2445                         rc = err;
2446         }
2447
2448         up_read(&cinode->lock_sem);
2449         sb_end_write(inode->i_sb);
2450         return rc;
2451 }
2452
2453 ssize_t
2454 cifs_strict_writev(struct kiocb *iocb, const struct iovec *iov,
2455                    unsigned long nr_segs, loff_t pos)
2456 {
2457         struct inode *inode = iocb->ki_filp->f_path.dentry->d_inode;
2458         struct cifsInodeInfo *cinode = CIFS_I(inode);
2459         struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
2460         struct cifsFileInfo *cfile = (struct cifsFileInfo *)
2461                                                 iocb->ki_filp->private_data;
2462         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
2463
2464 #ifdef CONFIG_CIFS_SMB2
2465         /*
2466          * If we have an oplock for read and want to write a data to the file
2467          * we need to store it in the page cache and then push it to the server
2468          * to be sure the next read will get a valid data.
2469          */
2470         if (!cinode->clientCanCacheAll && cinode->clientCanCacheRead) {
2471                 ssize_t written;
2472                 int rc;
2473
2474                 written = generic_file_aio_write(iocb, iov, nr_segs, pos);
2475                 rc = filemap_fdatawrite(inode->i_mapping);
2476                 if (rc)
2477                         return (ssize_t)rc;
2478
2479                 return written;
2480         }
2481 #endif
2482
2483         /*
2484          * For non-oplocked files in strict cache mode we need to write the data
2485          * to the server exactly from the pos to pos+len-1 rather than flush all
2486          * affected pages because it may cause a error with mandatory locks on
2487          * these pages but not on the region from pos to ppos+len-1.
2488          */
2489
2490         if (!cinode->clientCanCacheAll)
2491                 return cifs_user_writev(iocb, iov, nr_segs, pos);
2492
2493         if (cap_unix(tcon->ses) &&
2494             (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
2495             ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
2496                 return generic_file_aio_write(iocb, iov, nr_segs, pos);
2497
2498         return cifs_writev(iocb, iov, nr_segs, pos);
2499 }
2500
2501 static struct cifs_readdata *
2502 cifs_readdata_alloc(unsigned int nr_pages, work_func_t complete)
2503 {
2504         struct cifs_readdata *rdata;
2505
2506         rdata = kzalloc(sizeof(*rdata) + (sizeof(struct page *) * nr_pages),
2507                         GFP_KERNEL);
2508         if (rdata != NULL) {
2509                 kref_init(&rdata->refcount);
2510                 INIT_LIST_HEAD(&rdata->list);
2511                 init_completion(&rdata->done);
2512                 INIT_WORK(&rdata->work, complete);
2513         }
2514
2515         return rdata;
2516 }
2517
2518 void
2519 cifs_readdata_release(struct kref *refcount)
2520 {
2521         struct cifs_readdata *rdata = container_of(refcount,
2522                                         struct cifs_readdata, refcount);
2523
2524         if (rdata->cfile)
2525                 cifsFileInfo_put(rdata->cfile);
2526
2527         kfree(rdata);
2528 }
2529
2530 static int
2531 cifs_read_allocate_pages(struct cifs_readdata *rdata, unsigned int nr_pages)
2532 {
2533         int rc = 0;
2534         struct page *page;
2535         unsigned int i;
2536
2537         for (i = 0; i < nr_pages; i++) {
2538                 page = alloc_page(GFP_KERNEL|__GFP_HIGHMEM);
2539                 if (!page) {
2540                         rc = -ENOMEM;
2541                         break;
2542                 }
2543                 rdata->pages[i] = page;
2544         }
2545
2546         if (rc) {
2547                 for (i = 0; i < nr_pages; i++) {
2548                         put_page(rdata->pages[i]);
2549                         rdata->pages[i] = NULL;
2550                 }
2551         }
2552         return rc;
2553 }
2554
2555 static void
2556 cifs_uncached_readdata_release(struct kref *refcount)
2557 {
2558         struct cifs_readdata *rdata = container_of(refcount,
2559                                         struct cifs_readdata, refcount);
2560         unsigned int i;
2561
2562         for (i = 0; i < rdata->nr_pages; i++) {
2563                 put_page(rdata->pages[i]);
2564                 rdata->pages[i] = NULL;
2565         }
2566         cifs_readdata_release(refcount);
2567 }
2568
2569 static int
2570 cifs_retry_async_readv(struct cifs_readdata *rdata)
2571 {
2572         int rc;
2573         struct TCP_Server_Info *server;
2574
2575         server = tlink_tcon(rdata->cfile->tlink)->ses->server;
2576
2577         do {
2578                 if (rdata->cfile->invalidHandle) {
2579                         rc = cifs_reopen_file(rdata->cfile, true);
2580                         if (rc != 0)
2581                                 continue;
2582                 }
2583                 rc = server->ops->async_readv(rdata);
2584         } while (rc == -EAGAIN);
2585
2586         return rc;
2587 }
2588
2589 /**
2590  * cifs_readdata_to_iov - copy data from pages in response to an iovec
2591  * @rdata:      the readdata response with list of pages holding data
2592  * @iov:        vector in which we should copy the data
2593  * @nr_segs:    number of segments in vector
2594  * @offset:     offset into file of the first iovec
2595  * @copied:     used to return the amount of data copied to the iov
2596  *
2597  * This function copies data from a list of pages in a readdata response into
2598  * an array of iovecs. It will first calculate where the data should go
2599  * based on the info in the readdata and then copy the data into that spot.
2600  */
2601 static ssize_t
2602 cifs_readdata_to_iov(struct cifs_readdata *rdata, const struct iovec *iov,
2603                         unsigned long nr_segs, loff_t offset, ssize_t *copied)
2604 {
2605         int rc = 0;
2606         struct iov_iter ii;
2607         size_t pos = rdata->offset - offset;
2608         ssize_t remaining = rdata->bytes;
2609         unsigned char *pdata;
2610         unsigned int i;
2611
2612         /* set up iov_iter and advance to the correct offset */
2613         iov_iter_init(&ii, iov, nr_segs, iov_length(iov, nr_segs), 0);
2614         iov_iter_advance(&ii, pos);
2615
2616         *copied = 0;
2617         for (i = 0; i < rdata->nr_pages; i++) {
2618                 ssize_t copy;
2619                 struct page *page = rdata->pages[i];
2620
2621                 /* copy a whole page or whatever's left */
2622                 copy = min_t(ssize_t, remaining, PAGE_SIZE);
2623
2624                 /* ...but limit it to whatever space is left in the iov */
2625                 copy = min_t(ssize_t, copy, iov_iter_count(&ii));
2626
2627                 /* go while there's data to be copied and no errors */
2628                 if (copy && !rc) {
2629                         pdata = kmap(page);
2630                         rc = memcpy_toiovecend(ii.iov, pdata, ii.iov_offset,
2631                                                 (int)copy);
2632                         kunmap(page);
2633                         if (!rc) {
2634                                 *copied += copy;
2635                                 remaining -= copy;
2636                                 iov_iter_advance(&ii, copy);
2637                         }
2638                 }
2639         }
2640
2641         return rc;
2642 }
2643
2644 static void
2645 cifs_uncached_readv_complete(struct work_struct *work)
2646 {
2647         struct cifs_readdata *rdata = container_of(work,
2648                                                 struct cifs_readdata, work);
2649
2650         complete(&rdata->done);
2651         kref_put(&rdata->refcount, cifs_uncached_readdata_release);
2652 }
2653
2654 static int
2655 cifs_uncached_read_into_pages(struct TCP_Server_Info *server,
2656                         struct cifs_readdata *rdata, unsigned int len)
2657 {
2658         int total_read = 0, result = 0;
2659         unsigned int i;
2660         unsigned int nr_pages = rdata->nr_pages;
2661         struct kvec iov;
2662
2663         rdata->tailsz = PAGE_SIZE;
2664         for (i = 0; i < nr_pages; i++) {
2665                 struct page *page = rdata->pages[i];
2666
2667                 if (len >= PAGE_SIZE) {
2668                         /* enough data to fill the page */
2669                         iov.iov_base = kmap(page);
2670                         iov.iov_len = PAGE_SIZE;
2671                         cFYI(1, "%u: iov_base=%p iov_len=%zu",
2672                                 i, iov.iov_base, iov.iov_len);
2673                         len -= PAGE_SIZE;
2674                 } else if (len > 0) {
2675                         /* enough for partial page, fill and zero the rest */
2676                         iov.iov_base = kmap(page);
2677                         iov.iov_len = len;
2678                         cFYI(1, "%u: iov_base=%p iov_len=%zu",
2679                                 i, iov.iov_base, iov.iov_len);
2680                         memset(iov.iov_base + len, '\0', PAGE_SIZE - len);
2681                         rdata->tailsz = len;
2682                         len = 0;
2683                 } else {
2684                         /* no need to hold page hostage */
2685                         rdata->pages[i] = NULL;
2686                         rdata->nr_pages--;
2687                         put_page(page);
2688                         continue;
2689                 }
2690
2691                 result = cifs_readv_from_socket(server, &iov, 1, iov.iov_len);
2692                 kunmap(page);
2693                 if (result < 0)
2694                         break;
2695
2696                 total_read += result;
2697         }
2698
2699         return total_read > 0 ? total_read : result;
2700 }
2701
2702 static ssize_t
2703 cifs_iovec_read(struct file *file, const struct iovec *iov,
2704                  unsigned long nr_segs, loff_t *poffset)
2705 {
2706         ssize_t rc;
2707         size_t len, cur_len;
2708         ssize_t total_read = 0;
2709         loff_t offset = *poffset;
2710         unsigned int npages;
2711         struct cifs_sb_info *cifs_sb;
2712         struct cifs_tcon *tcon;
2713         struct cifsFileInfo *open_file;
2714         struct cifs_readdata *rdata, *tmp;
2715         struct list_head rdata_list;
2716         pid_t pid;
2717
2718         if (!nr_segs)
2719                 return 0;
2720
2721         len = iov_length(iov, nr_segs);
2722         if (!len)
2723                 return 0;
2724
2725         INIT_LIST_HEAD(&rdata_list);
2726         cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
2727         open_file = file->private_data;
2728         tcon = tlink_tcon(open_file->tlink);
2729
2730         if (!tcon->ses->server->ops->async_readv)
2731                 return -ENOSYS;
2732
2733         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
2734                 pid = open_file->pid;
2735         else
2736                 pid = current->tgid;
2737
2738         if ((file->f_flags & O_ACCMODE) == O_WRONLY)
2739                 cFYI(1, "attempting read on write only file instance");
2740
2741         do {
2742                 cur_len = min_t(const size_t, len - total_read, cifs_sb->rsize);
2743                 npages = DIV_ROUND_UP(cur_len, PAGE_SIZE);
2744
2745                 /* allocate a readdata struct */
2746                 rdata = cifs_readdata_alloc(npages,
2747                                             cifs_uncached_readv_complete);
2748                 if (!rdata) {
2749                         rc = -ENOMEM;
2750                         goto error;
2751                 }
2752
2753                 rc = cifs_read_allocate_pages(rdata, npages);
2754                 if (rc)
2755                         goto error;
2756
2757                 rdata->cfile = cifsFileInfo_get(open_file);
2758                 rdata->nr_pages = npages;
2759                 rdata->offset = offset;
2760                 rdata->bytes = cur_len;
2761                 rdata->pid = pid;
2762                 rdata->pagesz = PAGE_SIZE;
2763                 rdata->read_into_pages = cifs_uncached_read_into_pages;
2764
2765                 rc = cifs_retry_async_readv(rdata);
2766 error:
2767                 if (rc) {
2768                         kref_put(&rdata->refcount,
2769                                  cifs_uncached_readdata_release);
2770                         break;
2771                 }
2772
2773                 list_add_tail(&rdata->list, &rdata_list);
2774                 offset += cur_len;
2775                 len -= cur_len;
2776         } while (len > 0);
2777
2778         /* if at least one read request send succeeded, then reset rc */
2779         if (!list_empty(&rdata_list))
2780                 rc = 0;
2781
2782         /* the loop below should proceed in the order of increasing offsets */
2783 restart_loop:
2784         list_for_each_entry_safe(rdata, tmp, &rdata_list, list) {
2785                 if (!rc) {
2786                         ssize_t copied;
2787
2788                         /* FIXME: freezable sleep too? */
2789                         rc = wait_for_completion_killable(&rdata->done);
2790                         if (rc)
2791                                 rc = -EINTR;
2792                         else if (rdata->result)
2793                                 rc = rdata->result;
2794                         else {
2795                                 rc = cifs_readdata_to_iov(rdata, iov,
2796                                                         nr_segs, *poffset,
2797                                                         &copied);
2798                                 total_read += copied;
2799                         }
2800
2801                         /* resend call if it's a retryable error */
2802                         if (rc == -EAGAIN) {
2803                                 rc = cifs_retry_async_readv(rdata);
2804                                 goto restart_loop;
2805                         }
2806                 }
2807                 list_del_init(&rdata->list);
2808                 kref_put(&rdata->refcount, cifs_uncached_readdata_release);
2809         }
2810
2811         cifs_stats_bytes_read(tcon, total_read);
2812         *poffset += total_read;
2813
2814         /* mask nodata case */
2815         if (rc == -ENODATA)
2816                 rc = 0;
2817
2818         return total_read ? total_read : rc;
2819 }
2820
2821 ssize_t cifs_user_readv(struct kiocb *iocb, const struct iovec *iov,
2822                                unsigned long nr_segs, loff_t pos)
2823 {
2824         ssize_t read;
2825
2826         read = cifs_iovec_read(iocb->ki_filp, iov, nr_segs, &pos);
2827         if (read > 0)
2828                 iocb->ki_pos = pos;
2829
2830         return read;
2831 }
2832
2833 ssize_t
2834 cifs_strict_readv(struct kiocb *iocb, const struct iovec *iov,
2835                   unsigned long nr_segs, loff_t pos)
2836 {
2837         struct inode *inode = iocb->ki_filp->f_path.dentry->d_inode;
2838         struct cifsInodeInfo *cinode = CIFS_I(inode);
2839         struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
2840         struct cifsFileInfo *cfile = (struct cifsFileInfo *)
2841                                                 iocb->ki_filp->private_data;
2842         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
2843         int rc = -EACCES;
2844
2845         /*
2846          * In strict cache mode we need to read from the server all the time
2847          * if we don't have level II oplock because the server can delay mtime
2848          * change - so we can't make a decision about inode invalidating.
2849          * And we can also fail with pagereading if there are mandatory locks
2850          * on pages affected by this read but not on the region from pos to
2851          * pos+len-1.
2852          */
2853         if (!cinode->clientCanCacheRead)
2854                 return cifs_user_readv(iocb, iov, nr_segs, pos);
2855
2856         if (cap_unix(tcon->ses) &&
2857             (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
2858             ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
2859                 return generic_file_aio_read(iocb, iov, nr_segs, pos);
2860
2861         /*
2862          * We need to hold the sem to be sure nobody modifies lock list
2863          * with a brlock that prevents reading.
2864          */
2865         down_read(&cinode->lock_sem);
2866         if (!cifs_find_lock_conflict(cfile, pos, iov_length(iov, nr_segs),
2867                                      tcon->ses->server->vals->shared_lock_type,
2868                                      NULL, true))
2869                 rc = generic_file_aio_read(iocb, iov, nr_segs, pos);
2870         up_read(&cinode->lock_sem);
2871         return rc;
2872 }
2873
2874 static ssize_t
2875 cifs_read(struct file *file, char *read_data, size_t read_size, loff_t *offset)
2876 {
2877         int rc = -EACCES;
2878         unsigned int bytes_read = 0;
2879         unsigned int total_read;
2880         unsigned int current_read_size;
2881         unsigned int rsize;
2882         struct cifs_sb_info *cifs_sb;
2883         struct cifs_tcon *tcon;
2884         struct TCP_Server_Info *server;
2885         unsigned int xid;
2886         char *cur_offset;
2887         struct cifsFileInfo *open_file;
2888         struct cifs_io_parms io_parms;
2889         int buf_type = CIFS_NO_BUFFER;
2890         __u32 pid;
2891
2892         xid = get_xid();
2893         cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
2894
2895         /* FIXME: set up handlers for larger reads and/or convert to async */
2896         rsize = min_t(unsigned int, cifs_sb->rsize, CIFSMaxBufSize);
2897
2898         if (file->private_data == NULL) {
2899                 rc = -EBADF;
2900                 free_xid(xid);
2901                 return rc;
2902         }
2903         open_file = file->private_data;
2904         tcon = tlink_tcon(open_file->tlink);
2905         server = tcon->ses->server;
2906
2907         if (!server->ops->sync_read) {
2908                 free_xid(xid);
2909                 return -ENOSYS;
2910         }
2911
2912         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
2913                 pid = open_file->pid;
2914         else
2915                 pid = current->tgid;
2916
2917         if ((file->f_flags & O_ACCMODE) == O_WRONLY)
2918                 cFYI(1, "attempting read on write only file instance");
2919
2920         for (total_read = 0, cur_offset = read_data; read_size > total_read;
2921              total_read += bytes_read, cur_offset += bytes_read) {
2922                 current_read_size = min_t(uint, read_size - total_read, rsize);
2923                 /*
2924                  * For windows me and 9x we do not want to request more than it
2925                  * negotiated since it will refuse the read then.
2926                  */
2927                 if ((tcon->ses) && !(tcon->ses->capabilities &
2928                                 tcon->ses->server->vals->cap_large_files)) {
2929                         current_read_size = min_t(uint, current_read_size,
2930                                         CIFSMaxBufSize);
2931                 }
2932                 rc = -EAGAIN;
2933                 while (rc == -EAGAIN) {
2934                         if (open_file->invalidHandle) {
2935                                 rc = cifs_reopen_file(open_file, true);
2936                                 if (rc != 0)
2937                                         break;
2938                         }
2939                         io_parms.pid = pid;
2940                         io_parms.tcon = tcon;
2941                         io_parms.offset = *offset;
2942                         io_parms.length = current_read_size;
2943                         rc = server->ops->sync_read(xid, open_file, &io_parms,
2944                                                     &bytes_read, &cur_offset,
2945                                                     &buf_type);
2946                 }
2947                 if (rc || (bytes_read == 0)) {
2948                         if (total_read) {
2949                                 break;
2950                         } else {
2951                                 free_xid(xid);
2952                                 return rc;
2953                         }
2954                 } else {
2955                         cifs_stats_bytes_read(tcon, total_read);
2956                         *offset += bytes_read;
2957                 }
2958         }
2959         free_xid(xid);
2960         return total_read;
2961 }
2962
2963 /*
2964  * If the page is mmap'ed into a process' page tables, then we need to make
2965  * sure that it doesn't change while being written back.
2966  */
2967 static int
2968 cifs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
2969 {
2970         struct page *page = vmf->page;
2971
2972         lock_page(page);
2973         return VM_FAULT_LOCKED;
2974 }
2975
2976 static struct vm_operations_struct cifs_file_vm_ops = {
2977         .fault = filemap_fault,
2978         .page_mkwrite = cifs_page_mkwrite,
2979 };
2980
2981 int cifs_file_strict_mmap(struct file *file, struct vm_area_struct *vma)
2982 {
2983         int rc, xid;
2984         struct inode *inode = file->f_path.dentry->d_inode;
2985
2986         xid = get_xid();
2987
2988         if (!CIFS_I(inode)->clientCanCacheRead) {
2989                 rc = cifs_invalidate_mapping(inode);
2990                 if (rc)
2991                         return rc;
2992         }
2993
2994         rc = generic_file_mmap(file, vma);
2995         if (rc == 0)
2996                 vma->vm_ops = &cifs_file_vm_ops;
2997         free_xid(xid);
2998         return rc;
2999 }
3000
3001 int cifs_file_mmap(struct file *file, struct vm_area_struct *vma)
3002 {
3003         int rc, xid;
3004
3005         xid = get_xid();
3006         rc = cifs_revalidate_file(file);
3007         if (rc) {
3008                 cFYI(1, "Validation prior to mmap failed, error=%d", rc);
3009                 free_xid(xid);
3010                 return rc;
3011         }
3012         rc = generic_file_mmap(file, vma);
3013         if (rc == 0)
3014                 vma->vm_ops = &cifs_file_vm_ops;
3015         free_xid(xid);
3016         return rc;
3017 }
3018
3019 static void
3020 cifs_readv_complete(struct work_struct *work)
3021 {
3022         unsigned int i;
3023         struct cifs_readdata *rdata = container_of(work,
3024                                                 struct cifs_readdata, work);
3025
3026         for (i = 0; i < rdata->nr_pages; i++) {
3027                 struct page *page = rdata->pages[i];
3028
3029                 lru_cache_add_file(page);
3030
3031                 if (rdata->result == 0) {
3032                         flush_dcache_page(page);
3033                         SetPageUptodate(page);
3034                 }
3035
3036                 unlock_page(page);
3037
3038                 if (rdata->result == 0)
3039                         cifs_readpage_to_fscache(rdata->mapping->host, page);
3040
3041                 page_cache_release(page);
3042                 rdata->pages[i] = NULL;
3043         }
3044         kref_put(&rdata->refcount, cifs_readdata_release);
3045 }
3046
3047 static int
3048 cifs_readpages_read_into_pages(struct TCP_Server_Info *server,
3049                         struct cifs_readdata *rdata, unsigned int len)
3050 {
3051         int total_read = 0, result = 0;
3052         unsigned int i;
3053         u64 eof;
3054         pgoff_t eof_index;
3055         unsigned int nr_pages = rdata->nr_pages;
3056         struct kvec iov;
3057
3058         /* determine the eof that the server (probably) has */
3059         eof = CIFS_I(rdata->mapping->host)->server_eof;
3060         eof_index = eof ? (eof - 1) >> PAGE_CACHE_SHIFT : 0;
3061         cFYI(1, "eof=%llu eof_index=%lu", eof, eof_index);
3062
3063         rdata->tailsz = PAGE_CACHE_SIZE;
3064         for (i = 0; i < nr_pages; i++) {
3065                 struct page *page = rdata->pages[i];
3066
3067                 if (len >= PAGE_CACHE_SIZE) {
3068                         /* enough data to fill the page */
3069                         iov.iov_base = kmap(page);
3070                         iov.iov_len = PAGE_CACHE_SIZE;
3071                         cFYI(1, "%u: idx=%lu iov_base=%p iov_len=%zu",
3072                                 i, page->index, iov.iov_base, iov.iov_len);
3073                         len -= PAGE_CACHE_SIZE;
3074                 } else if (len > 0) {
3075                         /* enough for partial page, fill and zero the rest */
3076                         iov.iov_base = kmap(page);
3077                         iov.iov_len = len;
3078                         cFYI(1, "%u: idx=%lu iov_base=%p iov_len=%zu",
3079                                 i, page->index, iov.iov_base, iov.iov_len);
3080                         memset(iov.iov_base + len,
3081                                 '\0', PAGE_CACHE_SIZE - len);
3082                         rdata->tailsz = len;
3083                         len = 0;
3084                 } else if (page->index > eof_index) {
3085                         /*
3086                          * The VFS will not try to do readahead past the
3087                          * i_size, but it's possible that we have outstanding
3088                          * writes with gaps in the middle and the i_size hasn't
3089                          * caught up yet. Populate those with zeroed out pages
3090                          * to prevent the VFS from repeatedly attempting to
3091                          * fill them until the writes are flushed.
3092                          */
3093                         zero_user(page, 0, PAGE_CACHE_SIZE);
3094                         lru_cache_add_file(page);
3095                         flush_dcache_page(page);
3096                         SetPageUptodate(page);
3097                         unlock_page(page);
3098                         page_cache_release(page);
3099                         rdata->pages[i] = NULL;
3100                         rdata->nr_pages--;
3101                         continue;
3102                 } else {
3103                         /* no need to hold page hostage */
3104                         lru_cache_add_file(page);
3105                         unlock_page(page);
3106                         page_cache_release(page);
3107                         rdata->pages[i] = NULL;
3108                         rdata->nr_pages--;
3109                         continue;
3110                 }
3111
3112                 result = cifs_readv_from_socket(server, &iov, 1, iov.iov_len);
3113                 kunmap(page);
3114                 if (result < 0)
3115                         break;
3116
3117                 total_read += result;
3118         }
3119
3120         return total_read > 0 ? total_read : result;
3121 }
3122
3123 static int cifs_readpages(struct file *file, struct address_space *mapping,
3124         struct list_head *page_list, unsigned num_pages)
3125 {
3126         int rc;
3127         struct list_head tmplist;
3128         struct cifsFileInfo *open_file = file->private_data;
3129         struct cifs_sb_info *cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
3130         unsigned int rsize = cifs_sb->rsize;
3131         pid_t pid;
3132
3133         /*
3134          * Give up immediately if rsize is too small to read an entire page.
3135          * The VFS will fall back to readpage. We should never reach this
3136          * point however since we set ra_pages to 0 when the rsize is smaller
3137          * than a cache page.
3138          */
3139         if (unlikely(rsize < PAGE_CACHE_SIZE))
3140                 return 0;
3141
3142         /*
3143          * Reads as many pages as possible from fscache. Returns -ENOBUFS
3144          * immediately if the cookie is negative
3145          */
3146         rc = cifs_readpages_from_fscache(mapping->host, mapping, page_list,
3147                                          &num_pages);
3148         if (rc == 0)
3149                 return rc;
3150
3151         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
3152                 pid = open_file->pid;
3153         else
3154                 pid = current->tgid;
3155
3156         rc = 0;
3157         INIT_LIST_HEAD(&tmplist);
3158
3159         cFYI(1, "%s: file=%p mapping=%p num_pages=%u", __func__, file,
3160                 mapping, num_pages);
3161
3162         /*
3163          * Start with the page at end of list and move it to private
3164          * list. Do the same with any following pages until we hit
3165          * the rsize limit, hit an index discontinuity, or run out of
3166          * pages. Issue the async read and then start the loop again
3167          * until the list is empty.
3168          *
3169          * Note that list order is important. The page_list is in
3170          * the order of declining indexes. When we put the pages in
3171          * the rdata->pages, then we want them in increasing order.
3172          */
3173         while (!list_empty(page_list)) {
3174                 unsigned int i;
3175                 unsigned int bytes = PAGE_CACHE_SIZE;
3176                 unsigned int expected_index;
3177                 unsigned int nr_pages = 1;
3178                 loff_t offset;
3179                 struct page *page, *tpage;
3180                 struct cifs_readdata *rdata;
3181
3182                 page = list_entry(page_list->prev, struct page, lru);
3183
3184                 /*
3185                  * Lock the page and put it in the cache. Since no one else
3186                  * should have access to this page, we're safe to simply set
3187                  * PG_locked without checking it first.
3188                  */
3189                 __set_page_locked(page);
3190                 rc = add_to_page_cache_locked(page, mapping,
3191                                               page->index, GFP_KERNEL);
3192
3193                 /* give up if we can't stick it in the cache */
3194                 if (rc) {
3195                         __clear_page_locked(page);
3196                         break;
3197                 }
3198
3199                 /* move first page to the tmplist */
3200                 offset = (loff_t)page->index << PAGE_CACHE_SHIFT;
3201                 list_move_tail(&page->lru, &tmplist);
3202
3203                 /* now try and add more pages onto the request */
3204                 expected_index = page->index + 1;
3205                 list_for_each_entry_safe_reverse(page, tpage, page_list, lru) {
3206                         /* discontinuity ? */
3207                         if (page->index != expected_index)
3208                                 break;
3209
3210                         /* would this page push the read over the rsize? */
3211                         if (bytes + PAGE_CACHE_SIZE > rsize)
3212                                 break;
3213
3214                         __set_page_locked(page);
3215                         if (add_to_page_cache_locked(page, mapping,
3216                                                 page->index, GFP_KERNEL)) {
3217                                 __clear_page_locked(page);
3218                                 break;
3219                         }
3220                         list_move_tail(&page->lru, &tmplist);
3221                         bytes += PAGE_CACHE_SIZE;
3222                         expected_index++;
3223                         nr_pages++;
3224                 }
3225
3226                 rdata = cifs_readdata_alloc(nr_pages, cifs_readv_complete);
3227                 if (!rdata) {
3228                         /* best to give up if we're out of mem */
3229                         list_for_each_entry_safe(page, tpage, &tmplist, lru) {
3230                                 list_del(&page->lru);
3231                                 lru_cache_add_file(page);
3232                                 unlock_page(page);
3233                                 page_cache_release(page);
3234                         }
3235                         rc = -ENOMEM;
3236                         break;
3237                 }
3238
3239                 rdata->cfile = cifsFileInfo_get(open_file);
3240                 rdata->mapping = mapping;
3241                 rdata->offset = offset;
3242                 rdata->bytes = bytes;
3243                 rdata->pid = pid;
3244                 rdata->pagesz = PAGE_CACHE_SIZE;
3245                 rdata->read_into_pages = cifs_readpages_read_into_pages;
3246
3247                 list_for_each_entry_safe(page, tpage, &tmplist, lru) {
3248                         list_del(&page->lru);
3249                         rdata->pages[rdata->nr_pages++] = page;
3250                 }
3251
3252                 rc = cifs_retry_async_readv(rdata);
3253                 if (rc != 0) {
3254                         for (i = 0; i < rdata->nr_pages; i++) {
3255                                 page = rdata->pages[i];
3256                                 lru_cache_add_file(page);
3257                                 unlock_page(page);
3258                                 page_cache_release(page);
3259                         }
3260                         kref_put(&rdata->refcount, cifs_readdata_release);
3261                         break;
3262                 }
3263
3264                 kref_put(&rdata->refcount, cifs_readdata_release);
3265         }
3266
3267         return rc;
3268 }
3269
3270 static int cifs_readpage_worker(struct file *file, struct page *page,
3271         loff_t *poffset)
3272 {
3273         char *read_data;
3274         int rc;
3275
3276         /* Is the page cached? */
3277         rc = cifs_readpage_from_fscache(file->f_path.dentry->d_inode, page);
3278         if (rc == 0)
3279                 goto read_complete;
3280
3281         page_cache_get(page);
3282         read_data = kmap(page);
3283         /* for reads over a certain size could initiate async read ahead */
3284
3285         rc = cifs_read(file, read_data, PAGE_CACHE_SIZE, poffset);
3286
3287         if (rc < 0)
3288                 goto io_error;
3289         else
3290                 cFYI(1, "Bytes read %d", rc);
3291
3292         file->f_path.dentry->d_inode->i_atime =
3293                 current_fs_time(file->f_path.dentry->d_inode->i_sb);
3294
3295         if (PAGE_CACHE_SIZE > rc)
3296                 memset(read_data + rc, 0, PAGE_CACHE_SIZE - rc);
3297
3298         flush_dcache_page(page);
3299         SetPageUptodate(page);
3300
3301         /* send this page to the cache */
3302         cifs_readpage_to_fscache(file->f_path.dentry->d_inode, page);
3303
3304         rc = 0;
3305
3306 io_error:
3307         kunmap(page);
3308         page_cache_release(page);
3309
3310 read_complete:
3311         return rc;
3312 }
3313
3314 static int cifs_readpage(struct file *file, struct page *page)
3315 {
3316         loff_t offset = (loff_t)page->index << PAGE_CACHE_SHIFT;
3317         int rc = -EACCES;
3318         unsigned int xid;
3319
3320         xid = get_xid();
3321
3322         if (file->private_data == NULL) {
3323                 rc = -EBADF;
3324                 free_xid(xid);
3325                 return rc;
3326         }
3327
3328         cFYI(1, "readpage %p at offset %d 0x%x",
3329                  page, (int)offset, (int)offset);
3330
3331         rc = cifs_readpage_worker(file, page, &offset);
3332
3333         unlock_page(page);
3334
3335         free_xid(xid);
3336         return rc;
3337 }
3338
3339 static int is_inode_writable(struct cifsInodeInfo *cifs_inode)
3340 {
3341         struct cifsFileInfo *open_file;
3342
3343         spin_lock(&cifs_file_list_lock);
3344         list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
3345                 if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) {
3346                         spin_unlock(&cifs_file_list_lock);
3347                         return 1;
3348                 }
3349         }
3350         spin_unlock(&cifs_file_list_lock);
3351         return 0;
3352 }
3353
3354 /* We do not want to update the file size from server for inodes
3355    open for write - to avoid races with writepage extending
3356    the file - in the future we could consider allowing
3357    refreshing the inode only on increases in the file size
3358    but this is tricky to do without racing with writebehind
3359    page caching in the current Linux kernel design */
3360 bool is_size_safe_to_change(struct cifsInodeInfo *cifsInode, __u64 end_of_file)
3361 {
3362         if (!cifsInode)
3363                 return true;
3364
3365         if (is_inode_writable(cifsInode)) {
3366                 /* This inode is open for write at least once */
3367                 struct cifs_sb_info *cifs_sb;
3368
3369                 cifs_sb = CIFS_SB(cifsInode->vfs_inode.i_sb);
3370                 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DIRECT_IO) {
3371                         /* since no page cache to corrupt on directio
3372                         we can change size safely */
3373                         return true;
3374                 }
3375
3376                 if (i_size_read(&cifsInode->vfs_inode) < end_of_file)
3377                         return true;
3378
3379                 return false;
3380         } else
3381                 return true;
3382 }
3383
3384 static int cifs_write_begin(struct file *file, struct address_space *mapping,
3385                         loff_t pos, unsigned len, unsigned flags,
3386                         struct page **pagep, void **fsdata)
3387 {
3388         pgoff_t index = pos >> PAGE_CACHE_SHIFT;
3389         loff_t offset = pos & (PAGE_CACHE_SIZE - 1);
3390         loff_t page_start = pos & PAGE_MASK;
3391         loff_t i_size;
3392         struct page *page;
3393         int rc = 0;
3394
3395         cFYI(1, "write_begin from %lld len %d", (long long)pos, len);
3396
3397         page = grab_cache_page_write_begin(mapping, index, flags);
3398         if (!page) {
3399                 rc = -ENOMEM;
3400                 goto out;
3401         }
3402
3403         if (PageUptodate(page))
3404                 goto out;
3405
3406         /*
3407          * If we write a full page it will be up to date, no need to read from
3408          * the server. If the write is short, we'll end up doing a sync write
3409          * instead.
3410          */
3411         if (len == PAGE_CACHE_SIZE)
3412                 goto out;
3413
3414         /*
3415          * optimize away the read when we have an oplock, and we're not
3416          * expecting to use any of the data we'd be reading in. That
3417          * is, when the page lies beyond the EOF, or straddles the EOF
3418          * and the write will cover all of the existing data.
3419          */
3420         if (CIFS_I(mapping->host)->clientCanCacheRead) {
3421                 i_size = i_size_read(mapping->host);
3422                 if (page_start >= i_size ||
3423                     (offset == 0 && (pos + len) >= i_size)) {
3424                         zero_user_segments(page, 0, offset,
3425                                            offset + len,
3426                                            PAGE_CACHE_SIZE);
3427                         /*
3428                          * PageChecked means that the parts of the page
3429                          * to which we're not writing are considered up
3430                          * to date. Once the data is copied to the
3431                          * page, it can be set uptodate.
3432                          */
3433                         SetPageChecked(page);
3434                         goto out;
3435                 }
3436         }
3437
3438         if ((file->f_flags & O_ACCMODE) != O_WRONLY) {
3439                 /*
3440                  * might as well read a page, it is fast enough. If we get
3441                  * an error, we don't need to return it. cifs_write_end will
3442                  * do a sync write instead since PG_uptodate isn't set.
3443                  */
3444                 cifs_readpage_worker(file, page, &page_start);
3445         } else {
3446                 /* we could try using another file handle if there is one -
3447                    but how would we lock it to prevent close of that handle
3448                    racing with this read? In any case
3449                    this will be written out by write_end so is fine */
3450         }
3451 out:
3452         *pagep = page;
3453         return rc;
3454 }
3455
3456 static int cifs_release_page(struct page *page, gfp_t gfp)
3457 {
3458         if (PagePrivate(page))
3459                 return 0;
3460
3461         return cifs_fscache_release_page(page, gfp);
3462 }
3463
3464 static void cifs_invalidate_page(struct page *page, unsigned long offset)
3465 {
3466         struct cifsInodeInfo *cifsi = CIFS_I(page->mapping->host);
3467
3468         if (offset == 0)
3469                 cifs_fscache_invalidate_page(page, &cifsi->vfs_inode);
3470 }
3471
3472 static int cifs_launder_page(struct page *page)
3473 {
3474         int rc = 0;
3475         loff_t range_start = page_offset(page);
3476         loff_t range_end = range_start + (loff_t)(PAGE_CACHE_SIZE - 1);
3477         struct writeback_control wbc = {
3478                 .sync_mode = WB_SYNC_ALL,
3479                 .nr_to_write = 0,
3480                 .range_start = range_start,
3481                 .range_end = range_end,
3482         };
3483
3484         cFYI(1, "Launder page: %p", page);
3485
3486         if (clear_page_dirty_for_io(page))
3487                 rc = cifs_writepage_locked(page, &wbc);
3488
3489         cifs_fscache_invalidate_page(page, page->mapping->host);
3490         return rc;
3491 }
3492
3493 void cifs_oplock_break(struct work_struct *work)
3494 {
3495         struct cifsFileInfo *cfile = container_of(work, struct cifsFileInfo,
3496                                                   oplock_break);
3497         struct inode *inode = cfile->dentry->d_inode;
3498         struct cifsInodeInfo *cinode = CIFS_I(inode);
3499         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
3500         int rc = 0;
3501
3502         if (inode && S_ISREG(inode->i_mode)) {
3503                 if (cinode->clientCanCacheRead)
3504                         break_lease(inode, O_RDONLY);
3505                 else
3506                         break_lease(inode, O_WRONLY);
3507                 rc = filemap_fdatawrite(inode->i_mapping);
3508                 if (cinode->clientCanCacheRead == 0) {
3509                         rc = filemap_fdatawait(inode->i_mapping);
3510                         mapping_set_error(inode->i_mapping, rc);
3511                         invalidate_remote_inode(inode);
3512                 }
3513                 cFYI(1, "Oplock flush inode %p rc %d", inode, rc);
3514         }
3515
3516         rc = cifs_push_locks(cfile);
3517         if (rc)
3518                 cERROR(1, "Push locks rc = %d", rc);
3519
3520         /*
3521          * releasing stale oplock after recent reconnect of smb session using
3522          * a now incorrect file handle is not a data integrity issue but do
3523          * not bother sending an oplock release if session to server still is
3524          * disconnected since oplock already released by the server
3525          */
3526         if (!cfile->oplock_break_cancelled) {
3527                 rc = tcon->ses->server->ops->oplock_response(tcon, &cfile->fid,
3528                                                              cinode);
3529                 cFYI(1, "Oplock release rc = %d", rc);
3530         }
3531 }
3532
3533 const struct address_space_operations cifs_addr_ops = {
3534         .readpage = cifs_readpage,
3535         .readpages = cifs_readpages,
3536         .writepage = cifs_writepage,
3537         .writepages = cifs_writepages,
3538         .write_begin = cifs_write_begin,
3539         .write_end = cifs_write_end,
3540         .set_page_dirty = __set_page_dirty_nobuffers,
3541         .releasepage = cifs_release_page,
3542         .invalidatepage = cifs_invalidate_page,
3543         .launder_page = cifs_launder_page,
3544 };
3545
3546 /*
3547  * cifs_readpages requires the server to support a buffer large enough to
3548  * contain the header plus one complete page of data.  Otherwise, we need
3549  * to leave cifs_readpages out of the address space operations.
3550  */
3551 const struct address_space_operations cifs_addr_ops_smallbuf = {
3552         .readpage = cifs_readpage,
3553         .writepage = cifs_writepage,
3554         .writepages = cifs_writepages,
3555         .write_begin = cifs_write_begin,
3556         .write_end = cifs_write_end,
3557         .set_page_dirty = __set_page_dirty_nobuffers,
3558         .releasepage = cifs_release_page,
3559         .invalidatepage = cifs_invalidate_page,
3560         .launder_page = cifs_launder_page,
3561 };