CIFS: Separate pushing posix locks and lock_sem handling
[platform/adaptation/renesas_rcar/renesas_kernel.git] / fs / cifs / file.c
1 /*
2  *   fs/cifs/file.c
3  *
4  *   vfs operations that deal with files
5  *
6  *   Copyright (C) International Business Machines  Corp., 2002,2010
7  *   Author(s): Steve French (sfrench@us.ibm.com)
8  *              Jeremy Allison (jra@samba.org)
9  *
10  *   This library is free software; you can redistribute it and/or modify
11  *   it under the terms of the GNU Lesser General Public License as published
12  *   by the Free Software Foundation; either version 2.1 of the License, or
13  *   (at your option) any later version.
14  *
15  *   This library is distributed in the hope that it will be useful,
16  *   but WITHOUT ANY WARRANTY; without even the implied warranty of
17  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
18  *   the GNU Lesser General Public License for more details.
19  *
20  *   You should have received a copy of the GNU Lesser General Public License
21  *   along with this library; if not, write to the Free Software
22  *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
23  */
24 #include <linux/fs.h>
25 #include <linux/backing-dev.h>
26 #include <linux/stat.h>
27 #include <linux/fcntl.h>
28 #include <linux/pagemap.h>
29 #include <linux/pagevec.h>
30 #include <linux/writeback.h>
31 #include <linux/task_io_accounting_ops.h>
32 #include <linux/delay.h>
33 #include <linux/mount.h>
34 #include <linux/slab.h>
35 #include <linux/swap.h>
36 #include <asm/div64.h>
37 #include "cifsfs.h"
38 #include "cifspdu.h"
39 #include "cifsglob.h"
40 #include "cifsproto.h"
41 #include "cifs_unicode.h"
42 #include "cifs_debug.h"
43 #include "cifs_fs_sb.h"
44 #include "fscache.h"
45
46 static inline int cifs_convert_flags(unsigned int flags)
47 {
48         if ((flags & O_ACCMODE) == O_RDONLY)
49                 return GENERIC_READ;
50         else if ((flags & O_ACCMODE) == O_WRONLY)
51                 return GENERIC_WRITE;
52         else if ((flags & O_ACCMODE) == O_RDWR) {
53                 /* GENERIC_ALL is too much permission to request
54                    can cause unnecessary access denied on create */
55                 /* return GENERIC_ALL; */
56                 return (GENERIC_READ | GENERIC_WRITE);
57         }
58
59         return (READ_CONTROL | FILE_WRITE_ATTRIBUTES | FILE_READ_ATTRIBUTES |
60                 FILE_WRITE_EA | FILE_APPEND_DATA | FILE_WRITE_DATA |
61                 FILE_READ_DATA);
62 }
63
64 static u32 cifs_posix_convert_flags(unsigned int flags)
65 {
66         u32 posix_flags = 0;
67
68         if ((flags & O_ACCMODE) == O_RDONLY)
69                 posix_flags = SMB_O_RDONLY;
70         else if ((flags & O_ACCMODE) == O_WRONLY)
71                 posix_flags = SMB_O_WRONLY;
72         else if ((flags & O_ACCMODE) == O_RDWR)
73                 posix_flags = SMB_O_RDWR;
74
75         if (flags & O_CREAT)
76                 posix_flags |= SMB_O_CREAT;
77         if (flags & O_EXCL)
78                 posix_flags |= SMB_O_EXCL;
79         if (flags & O_TRUNC)
80                 posix_flags |= SMB_O_TRUNC;
81         /* be safe and imply O_SYNC for O_DSYNC */
82         if (flags & O_DSYNC)
83                 posix_flags |= SMB_O_SYNC;
84         if (flags & O_DIRECTORY)
85                 posix_flags |= SMB_O_DIRECTORY;
86         if (flags & O_NOFOLLOW)
87                 posix_flags |= SMB_O_NOFOLLOW;
88         if (flags & O_DIRECT)
89                 posix_flags |= SMB_O_DIRECT;
90
91         return posix_flags;
92 }
93
94 static inline int cifs_get_disposition(unsigned int flags)
95 {
96         if ((flags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL))
97                 return FILE_CREATE;
98         else if ((flags & (O_CREAT | O_TRUNC)) == (O_CREAT | O_TRUNC))
99                 return FILE_OVERWRITE_IF;
100         else if ((flags & O_CREAT) == O_CREAT)
101                 return FILE_OPEN_IF;
102         else if ((flags & O_TRUNC) == O_TRUNC)
103                 return FILE_OVERWRITE;
104         else
105                 return FILE_OPEN;
106 }
107
108 int cifs_posix_open(char *full_path, struct inode **pinode,
109                         struct super_block *sb, int mode, unsigned int f_flags,
110                         __u32 *poplock, __u16 *pnetfid, unsigned int xid)
111 {
112         int rc;
113         FILE_UNIX_BASIC_INFO *presp_data;
114         __u32 posix_flags = 0;
115         struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
116         struct cifs_fattr fattr;
117         struct tcon_link *tlink;
118         struct cifs_tcon *tcon;
119
120         cFYI(1, "posix open %s", full_path);
121
122         presp_data = kzalloc(sizeof(FILE_UNIX_BASIC_INFO), GFP_KERNEL);
123         if (presp_data == NULL)
124                 return -ENOMEM;
125
126         tlink = cifs_sb_tlink(cifs_sb);
127         if (IS_ERR(tlink)) {
128                 rc = PTR_ERR(tlink);
129                 goto posix_open_ret;
130         }
131
132         tcon = tlink_tcon(tlink);
133         mode &= ~current_umask();
134
135         posix_flags = cifs_posix_convert_flags(f_flags);
136         rc = CIFSPOSIXCreate(xid, tcon, posix_flags, mode, pnetfid, presp_data,
137                              poplock, full_path, cifs_sb->local_nls,
138                              cifs_sb->mnt_cifs_flags &
139                                         CIFS_MOUNT_MAP_SPECIAL_CHR);
140         cifs_put_tlink(tlink);
141
142         if (rc)
143                 goto posix_open_ret;
144
145         if (presp_data->Type == cpu_to_le32(-1))
146                 goto posix_open_ret; /* open ok, caller does qpathinfo */
147
148         if (!pinode)
149                 goto posix_open_ret; /* caller does not need info */
150
151         cifs_unix_basic_to_fattr(&fattr, presp_data, cifs_sb);
152
153         /* get new inode and set it up */
154         if (*pinode == NULL) {
155                 cifs_fill_uniqueid(sb, &fattr);
156                 *pinode = cifs_iget(sb, &fattr);
157                 if (!*pinode) {
158                         rc = -ENOMEM;
159                         goto posix_open_ret;
160                 }
161         } else {
162                 cifs_fattr_to_inode(*pinode, &fattr);
163         }
164
165 posix_open_ret:
166         kfree(presp_data);
167         return rc;
168 }
169
170 static int
171 cifs_nt_open(char *full_path, struct inode *inode, struct cifs_sb_info *cifs_sb,
172              struct cifs_tcon *tcon, unsigned int f_flags, __u32 *oplock,
173              struct cifs_fid *fid, unsigned int xid)
174 {
175         int rc;
176         int desired_access;
177         int disposition;
178         int create_options = CREATE_NOT_DIR;
179         FILE_ALL_INFO *buf;
180         struct TCP_Server_Info *server = tcon->ses->server;
181
182         if (!server->ops->open)
183                 return -ENOSYS;
184
185         desired_access = cifs_convert_flags(f_flags);
186
187 /*********************************************************************
188  *  open flag mapping table:
189  *
190  *      POSIX Flag            CIFS Disposition
191  *      ----------            ----------------
192  *      O_CREAT               FILE_OPEN_IF
193  *      O_CREAT | O_EXCL      FILE_CREATE
194  *      O_CREAT | O_TRUNC     FILE_OVERWRITE_IF
195  *      O_TRUNC               FILE_OVERWRITE
196  *      none of the above     FILE_OPEN
197  *
198  *      Note that there is not a direct match between disposition
199  *      FILE_SUPERSEDE (ie create whether or not file exists although
200  *      O_CREAT | O_TRUNC is similar but truncates the existing
201  *      file rather than creating a new file as FILE_SUPERSEDE does
202  *      (which uses the attributes / metadata passed in on open call)
203  *?
204  *?  O_SYNC is a reasonable match to CIFS writethrough flag
205  *?  and the read write flags match reasonably.  O_LARGEFILE
206  *?  is irrelevant because largefile support is always used
207  *?  by this client. Flags O_APPEND, O_DIRECT, O_DIRECTORY,
208  *       O_FASYNC, O_NOFOLLOW, O_NONBLOCK need further investigation
209  *********************************************************************/
210
211         disposition = cifs_get_disposition(f_flags);
212
213         /* BB pass O_SYNC flag through on file attributes .. BB */
214
215         buf = kmalloc(sizeof(FILE_ALL_INFO), GFP_KERNEL);
216         if (!buf)
217                 return -ENOMEM;
218
219         if (backup_cred(cifs_sb))
220                 create_options |= CREATE_OPEN_BACKUP_INTENT;
221
222         rc = server->ops->open(xid, tcon, full_path, disposition,
223                                desired_access, create_options, fid, oplock, buf,
224                                cifs_sb);
225
226         if (rc)
227                 goto out;
228
229         if (tcon->unix_ext)
230                 rc = cifs_get_inode_info_unix(&inode, full_path, inode->i_sb,
231                                               xid);
232         else
233                 rc = cifs_get_inode_info(&inode, full_path, buf, inode->i_sb,
234                                          xid, &fid->netfid);
235
236 out:
237         kfree(buf);
238         return rc;
239 }
240
241 struct cifsFileInfo *
242 cifs_new_fileinfo(struct cifs_fid *fid, struct file *file,
243                   struct tcon_link *tlink, __u32 oplock)
244 {
245         struct dentry *dentry = file->f_path.dentry;
246         struct inode *inode = dentry->d_inode;
247         struct cifsInodeInfo *cinode = CIFS_I(inode);
248         struct cifsFileInfo *cfile;
249         struct cifs_fid_locks *fdlocks;
250         struct cifs_tcon *tcon = tlink_tcon(tlink);
251
252         cfile = kzalloc(sizeof(struct cifsFileInfo), GFP_KERNEL);
253         if (cfile == NULL)
254                 return cfile;
255
256         fdlocks = kzalloc(sizeof(struct cifs_fid_locks), GFP_KERNEL);
257         if (!fdlocks) {
258                 kfree(cfile);
259                 return NULL;
260         }
261
262         INIT_LIST_HEAD(&fdlocks->locks);
263         fdlocks->cfile = cfile;
264         cfile->llist = fdlocks;
265         down_write(&cinode->lock_sem);
266         list_add(&fdlocks->llist, &cinode->llist);
267         up_write(&cinode->lock_sem);
268
269         cfile->count = 1;
270         cfile->pid = current->tgid;
271         cfile->uid = current_fsuid();
272         cfile->dentry = dget(dentry);
273         cfile->f_flags = file->f_flags;
274         cfile->invalidHandle = false;
275         cfile->tlink = cifs_get_tlink(tlink);
276         INIT_WORK(&cfile->oplock_break, cifs_oplock_break);
277         mutex_init(&cfile->fh_mutex);
278
279         spin_lock(&cifs_file_list_lock);
280         if (fid->pending_open->oplock != CIFS_OPLOCK_NO_CHANGE)
281                 oplock = fid->pending_open->oplock;
282         list_del(&fid->pending_open->olist);
283
284         tlink_tcon(tlink)->ses->server->ops->set_fid(cfile, fid, oplock);
285
286         list_add(&cfile->tlist, &tcon->openFileList);
287         /* if readable file instance put first in list*/
288         if (file->f_mode & FMODE_READ)
289                 list_add(&cfile->flist, &cinode->openFileList);
290         else
291                 list_add_tail(&cfile->flist, &cinode->openFileList);
292         spin_unlock(&cifs_file_list_lock);
293
294         file->private_data = cfile;
295         return cfile;
296 }
297
298 struct cifsFileInfo *
299 cifsFileInfo_get(struct cifsFileInfo *cifs_file)
300 {
301         spin_lock(&cifs_file_list_lock);
302         cifsFileInfo_get_locked(cifs_file);
303         spin_unlock(&cifs_file_list_lock);
304         return cifs_file;
305 }
306
307 /*
308  * Release a reference on the file private data. This may involve closing
309  * the filehandle out on the server. Must be called without holding
310  * cifs_file_list_lock.
311  */
312 void cifsFileInfo_put(struct cifsFileInfo *cifs_file)
313 {
314         struct inode *inode = cifs_file->dentry->d_inode;
315         struct cifs_tcon *tcon = tlink_tcon(cifs_file->tlink);
316         struct TCP_Server_Info *server = tcon->ses->server;
317         struct cifsInodeInfo *cifsi = CIFS_I(inode);
318         struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
319         struct cifsLockInfo *li, *tmp;
320         struct cifs_fid fid;
321         struct cifs_pending_open open;
322
323         spin_lock(&cifs_file_list_lock);
324         if (--cifs_file->count > 0) {
325                 spin_unlock(&cifs_file_list_lock);
326                 return;
327         }
328
329         if (server->ops->get_lease_key)
330                 server->ops->get_lease_key(inode, &fid);
331
332         /* store open in pending opens to make sure we don't miss lease break */
333         cifs_add_pending_open_locked(&fid, cifs_file->tlink, &open);
334
335         /* remove it from the lists */
336         list_del(&cifs_file->flist);
337         list_del(&cifs_file->tlist);
338
339         if (list_empty(&cifsi->openFileList)) {
340                 cFYI(1, "closing last open instance for inode %p",
341                         cifs_file->dentry->d_inode);
342                 /*
343                  * In strict cache mode we need invalidate mapping on the last
344                  * close  because it may cause a error when we open this file
345                  * again and get at least level II oplock.
346                  */
347                 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO)
348                         CIFS_I(inode)->invalid_mapping = true;
349                 cifs_set_oplock_level(cifsi, 0);
350         }
351         spin_unlock(&cifs_file_list_lock);
352
353         cancel_work_sync(&cifs_file->oplock_break);
354
355         if (!tcon->need_reconnect && !cifs_file->invalidHandle) {
356                 struct TCP_Server_Info *server = tcon->ses->server;
357                 unsigned int xid;
358
359                 xid = get_xid();
360                 if (server->ops->close)
361                         server->ops->close(xid, tcon, &cifs_file->fid);
362                 _free_xid(xid);
363         }
364
365         cifs_del_pending_open(&open);
366
367         /*
368          * Delete any outstanding lock records. We'll lose them when the file
369          * is closed anyway.
370          */
371         down_write(&cifsi->lock_sem);
372         list_for_each_entry_safe(li, tmp, &cifs_file->llist->locks, llist) {
373                 list_del(&li->llist);
374                 cifs_del_lock_waiters(li);
375                 kfree(li);
376         }
377         list_del(&cifs_file->llist->llist);
378         kfree(cifs_file->llist);
379         up_write(&cifsi->lock_sem);
380
381         cifs_put_tlink(cifs_file->tlink);
382         dput(cifs_file->dentry);
383         kfree(cifs_file);
384 }
385
386 int cifs_open(struct inode *inode, struct file *file)
387
388 {
389         int rc = -EACCES;
390         unsigned int xid;
391         __u32 oplock;
392         struct cifs_sb_info *cifs_sb;
393         struct TCP_Server_Info *server;
394         struct cifs_tcon *tcon;
395         struct tcon_link *tlink;
396         struct cifsFileInfo *cfile = NULL;
397         char *full_path = NULL;
398         bool posix_open_ok = false;
399         struct cifs_fid fid;
400         struct cifs_pending_open open;
401
402         xid = get_xid();
403
404         cifs_sb = CIFS_SB(inode->i_sb);
405         tlink = cifs_sb_tlink(cifs_sb);
406         if (IS_ERR(tlink)) {
407                 free_xid(xid);
408                 return PTR_ERR(tlink);
409         }
410         tcon = tlink_tcon(tlink);
411         server = tcon->ses->server;
412
413         full_path = build_path_from_dentry(file->f_path.dentry);
414         if (full_path == NULL) {
415                 rc = -ENOMEM;
416                 goto out;
417         }
418
419         cFYI(1, "inode = 0x%p file flags are 0x%x for %s",
420                  inode, file->f_flags, full_path);
421
422         if (server->oplocks)
423                 oplock = REQ_OPLOCK;
424         else
425                 oplock = 0;
426
427         if (!tcon->broken_posix_open && tcon->unix_ext &&
428             cap_unix(tcon->ses) && (CIFS_UNIX_POSIX_PATH_OPS_CAP &
429                                 le64_to_cpu(tcon->fsUnixInfo.Capability))) {
430                 /* can not refresh inode info since size could be stale */
431                 rc = cifs_posix_open(full_path, &inode, inode->i_sb,
432                                 cifs_sb->mnt_file_mode /* ignored */,
433                                 file->f_flags, &oplock, &fid.netfid, xid);
434                 if (rc == 0) {
435                         cFYI(1, "posix open succeeded");
436                         posix_open_ok = true;
437                 } else if ((rc == -EINVAL) || (rc == -EOPNOTSUPP)) {
438                         if (tcon->ses->serverNOS)
439                                 cERROR(1, "server %s of type %s returned"
440                                            " unexpected error on SMB posix open"
441                                            ", disabling posix open support."
442                                            " Check if server update available.",
443                                            tcon->ses->serverName,
444                                            tcon->ses->serverNOS);
445                         tcon->broken_posix_open = true;
446                 } else if ((rc != -EIO) && (rc != -EREMOTE) &&
447                          (rc != -EOPNOTSUPP)) /* path not found or net err */
448                         goto out;
449                 /*
450                  * Else fallthrough to retry open the old way on network i/o
451                  * or DFS errors.
452                  */
453         }
454
455         if (server->ops->get_lease_key)
456                 server->ops->get_lease_key(inode, &fid);
457
458         cifs_add_pending_open(&fid, tlink, &open);
459
460         if (!posix_open_ok) {
461                 if (server->ops->get_lease_key)
462                         server->ops->get_lease_key(inode, &fid);
463
464                 rc = cifs_nt_open(full_path, inode, cifs_sb, tcon,
465                                   file->f_flags, &oplock, &fid, xid);
466                 if (rc) {
467                         cifs_del_pending_open(&open);
468                         goto out;
469                 }
470         }
471
472         cfile = cifs_new_fileinfo(&fid, file, tlink, oplock);
473         if (cfile == NULL) {
474                 if (server->ops->close)
475                         server->ops->close(xid, tcon, &fid);
476                 cifs_del_pending_open(&open);
477                 rc = -ENOMEM;
478                 goto out;
479         }
480
481         cifs_fscache_set_inode_cookie(inode, file);
482
483         if ((oplock & CIFS_CREATE_ACTION) && !posix_open_ok && tcon->unix_ext) {
484                 /*
485                  * Time to set mode which we can not set earlier due to
486                  * problems creating new read-only files.
487                  */
488                 struct cifs_unix_set_info_args args = {
489                         .mode   = inode->i_mode,
490                         .uid    = NO_CHANGE_64,
491                         .gid    = NO_CHANGE_64,
492                         .ctime  = NO_CHANGE_64,
493                         .atime  = NO_CHANGE_64,
494                         .mtime  = NO_CHANGE_64,
495                         .device = 0,
496                 };
497                 CIFSSMBUnixSetFileInfo(xid, tcon, &args, fid.netfid,
498                                        cfile->pid);
499         }
500
501 out:
502         kfree(full_path);
503         free_xid(xid);
504         cifs_put_tlink(tlink);
505         return rc;
506 }
507
508 /*
509  * Try to reacquire byte range locks that were released when session
510  * to server was lost
511  */
512 static int cifs_relock_file(struct cifsFileInfo *cifsFile)
513 {
514         int rc = 0;
515
516         /* BB list all locks open on this file and relock */
517
518         return rc;
519 }
520
521 static int
522 cifs_reopen_file(struct cifsFileInfo *cfile, bool can_flush)
523 {
524         int rc = -EACCES;
525         unsigned int xid;
526         __u32 oplock;
527         struct cifs_sb_info *cifs_sb;
528         struct cifs_tcon *tcon;
529         struct TCP_Server_Info *server;
530         struct cifsInodeInfo *cinode;
531         struct inode *inode;
532         char *full_path = NULL;
533         int desired_access;
534         int disposition = FILE_OPEN;
535         int create_options = CREATE_NOT_DIR;
536         struct cifs_fid fid;
537
538         xid = get_xid();
539         mutex_lock(&cfile->fh_mutex);
540         if (!cfile->invalidHandle) {
541                 mutex_unlock(&cfile->fh_mutex);
542                 rc = 0;
543                 free_xid(xid);
544                 return rc;
545         }
546
547         inode = cfile->dentry->d_inode;
548         cifs_sb = CIFS_SB(inode->i_sb);
549         tcon = tlink_tcon(cfile->tlink);
550         server = tcon->ses->server;
551
552         /*
553          * Can not grab rename sem here because various ops, including those
554          * that already have the rename sem can end up causing writepage to get
555          * called and if the server was down that means we end up here, and we
556          * can never tell if the caller already has the rename_sem.
557          */
558         full_path = build_path_from_dentry(cfile->dentry);
559         if (full_path == NULL) {
560                 rc = -ENOMEM;
561                 mutex_unlock(&cfile->fh_mutex);
562                 free_xid(xid);
563                 return rc;
564         }
565
566         cFYI(1, "inode = 0x%p file flags 0x%x for %s", inode, cfile->f_flags,
567              full_path);
568
569         if (tcon->ses->server->oplocks)
570                 oplock = REQ_OPLOCK;
571         else
572                 oplock = 0;
573
574         if (tcon->unix_ext && cap_unix(tcon->ses) &&
575             (CIFS_UNIX_POSIX_PATH_OPS_CAP &
576                                 le64_to_cpu(tcon->fsUnixInfo.Capability))) {
577                 /*
578                  * O_CREAT, O_EXCL and O_TRUNC already had their effect on the
579                  * original open. Must mask them off for a reopen.
580                  */
581                 unsigned int oflags = cfile->f_flags &
582                                                 ~(O_CREAT | O_EXCL | O_TRUNC);
583
584                 rc = cifs_posix_open(full_path, NULL, inode->i_sb,
585                                      cifs_sb->mnt_file_mode /* ignored */,
586                                      oflags, &oplock, &fid.netfid, xid);
587                 if (rc == 0) {
588                         cFYI(1, "posix reopen succeeded");
589                         goto reopen_success;
590                 }
591                 /*
592                  * fallthrough to retry open the old way on errors, especially
593                  * in the reconnect path it is important to retry hard
594                  */
595         }
596
597         desired_access = cifs_convert_flags(cfile->f_flags);
598
599         if (backup_cred(cifs_sb))
600                 create_options |= CREATE_OPEN_BACKUP_INTENT;
601
602         if (server->ops->get_lease_key)
603                 server->ops->get_lease_key(inode, &fid);
604
605         /*
606          * Can not refresh inode by passing in file_info buf to be returned by
607          * CIFSSMBOpen and then calling get_inode_info with returned buf since
608          * file might have write behind data that needs to be flushed and server
609          * version of file size can be stale. If we knew for sure that inode was
610          * not dirty locally we could do this.
611          */
612         rc = server->ops->open(xid, tcon, full_path, disposition,
613                                desired_access, create_options, &fid, &oplock,
614                                NULL, cifs_sb);
615         if (rc) {
616                 mutex_unlock(&cfile->fh_mutex);
617                 cFYI(1, "cifs_reopen returned 0x%x", rc);
618                 cFYI(1, "oplock: %d", oplock);
619                 goto reopen_error_exit;
620         }
621
622 reopen_success:
623         cfile->invalidHandle = false;
624         mutex_unlock(&cfile->fh_mutex);
625         cinode = CIFS_I(inode);
626
627         if (can_flush) {
628                 rc = filemap_write_and_wait(inode->i_mapping);
629                 mapping_set_error(inode->i_mapping, rc);
630
631                 if (tcon->unix_ext)
632                         rc = cifs_get_inode_info_unix(&inode, full_path,
633                                                       inode->i_sb, xid);
634                 else
635                         rc = cifs_get_inode_info(&inode, full_path, NULL,
636                                                  inode->i_sb, xid, NULL);
637         }
638         /*
639          * Else we are writing out data to server already and could deadlock if
640          * we tried to flush data, and since we do not know if we have data that
641          * would invalidate the current end of file on the server we can not go
642          * to the server to get the new inode info.
643          */
644
645         server->ops->set_fid(cfile, &fid, oplock);
646         cifs_relock_file(cfile);
647
648 reopen_error_exit:
649         kfree(full_path);
650         free_xid(xid);
651         return rc;
652 }
653
654 int cifs_close(struct inode *inode, struct file *file)
655 {
656         if (file->private_data != NULL) {
657                 cifsFileInfo_put(file->private_data);
658                 file->private_data = NULL;
659         }
660
661         /* return code from the ->release op is always ignored */
662         return 0;
663 }
664
665 int cifs_closedir(struct inode *inode, struct file *file)
666 {
667         int rc = 0;
668         unsigned int xid;
669         struct cifsFileInfo *cfile = file->private_data;
670         struct cifs_tcon *tcon;
671         struct TCP_Server_Info *server;
672         char *buf;
673
674         cFYI(1, "Closedir inode = 0x%p", inode);
675
676         if (cfile == NULL)
677                 return rc;
678
679         xid = get_xid();
680         tcon = tlink_tcon(cfile->tlink);
681         server = tcon->ses->server;
682
683         cFYI(1, "Freeing private data in close dir");
684         spin_lock(&cifs_file_list_lock);
685         if (!cfile->srch_inf.endOfSearch && !cfile->invalidHandle) {
686                 cfile->invalidHandle = true;
687                 spin_unlock(&cifs_file_list_lock);
688                 if (server->ops->close_dir)
689                         rc = server->ops->close_dir(xid, tcon, &cfile->fid);
690                 else
691                         rc = -ENOSYS;
692                 cFYI(1, "Closing uncompleted readdir with rc %d", rc);
693                 /* not much we can do if it fails anyway, ignore rc */
694                 rc = 0;
695         } else
696                 spin_unlock(&cifs_file_list_lock);
697
698         buf = cfile->srch_inf.ntwrk_buf_start;
699         if (buf) {
700                 cFYI(1, "closedir free smb buf in srch struct");
701                 cfile->srch_inf.ntwrk_buf_start = NULL;
702                 if (cfile->srch_inf.smallBuf)
703                         cifs_small_buf_release(buf);
704                 else
705                         cifs_buf_release(buf);
706         }
707
708         cifs_put_tlink(cfile->tlink);
709         kfree(file->private_data);
710         file->private_data = NULL;
711         /* BB can we lock the filestruct while this is going on? */
712         free_xid(xid);
713         return rc;
714 }
715
716 static struct cifsLockInfo *
717 cifs_lock_init(__u64 offset, __u64 length, __u8 type)
718 {
719         struct cifsLockInfo *lock =
720                 kmalloc(sizeof(struct cifsLockInfo), GFP_KERNEL);
721         if (!lock)
722                 return lock;
723         lock->offset = offset;
724         lock->length = length;
725         lock->type = type;
726         lock->pid = current->tgid;
727         INIT_LIST_HEAD(&lock->blist);
728         init_waitqueue_head(&lock->block_q);
729         return lock;
730 }
731
732 void
733 cifs_del_lock_waiters(struct cifsLockInfo *lock)
734 {
735         struct cifsLockInfo *li, *tmp;
736         list_for_each_entry_safe(li, tmp, &lock->blist, blist) {
737                 list_del_init(&li->blist);
738                 wake_up(&li->block_q);
739         }
740 }
741
742 static bool
743 cifs_find_fid_lock_conflict(struct cifs_fid_locks *fdlocks, __u64 offset,
744                             __u64 length, __u8 type, struct cifsFileInfo *cfile,
745                             struct cifsLockInfo **conf_lock, bool rw_check)
746 {
747         struct cifsLockInfo *li;
748         struct cifsFileInfo *cur_cfile = fdlocks->cfile;
749         struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
750
751         list_for_each_entry(li, &fdlocks->locks, llist) {
752                 if (offset + length <= li->offset ||
753                     offset >= li->offset + li->length)
754                         continue;
755                 if (rw_check && server->ops->compare_fids(cfile, cur_cfile) &&
756                     current->tgid == li->pid)
757                         continue;
758                 if ((type & server->vals->shared_lock_type) &&
759                     ((server->ops->compare_fids(cfile, cur_cfile) &&
760                      current->tgid == li->pid) || type == li->type))
761                         continue;
762                 if (conf_lock)
763                         *conf_lock = li;
764                 return true;
765         }
766         return false;
767 }
768
769 bool
770 cifs_find_lock_conflict(struct cifsFileInfo *cfile, __u64 offset, __u64 length,
771                         __u8 type, struct cifsLockInfo **conf_lock,
772                         bool rw_check)
773 {
774         bool rc = false;
775         struct cifs_fid_locks *cur;
776         struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode);
777
778         list_for_each_entry(cur, &cinode->llist, llist) {
779                 rc = cifs_find_fid_lock_conflict(cur, offset, length, type,
780                                                  cfile, conf_lock, rw_check);
781                 if (rc)
782                         break;
783         }
784
785         return rc;
786 }
787
788 /*
789  * Check if there is another lock that prevents us to set the lock (mandatory
790  * style). If such a lock exists, update the flock structure with its
791  * properties. Otherwise, set the flock type to F_UNLCK if we can cache brlocks
792  * or leave it the same if we can't. Returns 0 if we don't need to request to
793  * the server or 1 otherwise.
794  */
795 static int
796 cifs_lock_test(struct cifsFileInfo *cfile, __u64 offset, __u64 length,
797                __u8 type, struct file_lock *flock)
798 {
799         int rc = 0;
800         struct cifsLockInfo *conf_lock;
801         struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode);
802         struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
803         bool exist;
804
805         down_read(&cinode->lock_sem);
806
807         exist = cifs_find_lock_conflict(cfile, offset, length, type,
808                                         &conf_lock, false);
809         if (exist) {
810                 flock->fl_start = conf_lock->offset;
811                 flock->fl_end = conf_lock->offset + conf_lock->length - 1;
812                 flock->fl_pid = conf_lock->pid;
813                 if (conf_lock->type & server->vals->shared_lock_type)
814                         flock->fl_type = F_RDLCK;
815                 else
816                         flock->fl_type = F_WRLCK;
817         } else if (!cinode->can_cache_brlcks)
818                 rc = 1;
819         else
820                 flock->fl_type = F_UNLCK;
821
822         up_read(&cinode->lock_sem);
823         return rc;
824 }
825
826 static void
827 cifs_lock_add(struct cifsFileInfo *cfile, struct cifsLockInfo *lock)
828 {
829         struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode);
830         down_write(&cinode->lock_sem);
831         list_add_tail(&lock->llist, &cfile->llist->locks);
832         up_write(&cinode->lock_sem);
833 }
834
835 /*
836  * Set the byte-range lock (mandatory style). Returns:
837  * 1) 0, if we set the lock and don't need to request to the server;
838  * 2) 1, if no locks prevent us but we need to request to the server;
839  * 3) -EACCESS, if there is a lock that prevents us and wait is false.
840  */
841 static int
842 cifs_lock_add_if(struct cifsFileInfo *cfile, struct cifsLockInfo *lock,
843                  bool wait)
844 {
845         struct cifsLockInfo *conf_lock;
846         struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode);
847         bool exist;
848         int rc = 0;
849
850 try_again:
851         exist = false;
852         down_write(&cinode->lock_sem);
853
854         exist = cifs_find_lock_conflict(cfile, lock->offset, lock->length,
855                                         lock->type, &conf_lock, false);
856         if (!exist && cinode->can_cache_brlcks) {
857                 list_add_tail(&lock->llist, &cfile->llist->locks);
858                 up_write(&cinode->lock_sem);
859                 return rc;
860         }
861
862         if (!exist)
863                 rc = 1;
864         else if (!wait)
865                 rc = -EACCES;
866         else {
867                 list_add_tail(&lock->blist, &conf_lock->blist);
868                 up_write(&cinode->lock_sem);
869                 rc = wait_event_interruptible(lock->block_q,
870                                         (lock->blist.prev == &lock->blist) &&
871                                         (lock->blist.next == &lock->blist));
872                 if (!rc)
873                         goto try_again;
874                 down_write(&cinode->lock_sem);
875                 list_del_init(&lock->blist);
876         }
877
878         up_write(&cinode->lock_sem);
879         return rc;
880 }
881
882 /*
883  * Check if there is another lock that prevents us to set the lock (posix
884  * style). If such a lock exists, update the flock structure with its
885  * properties. Otherwise, set the flock type to F_UNLCK if we can cache brlocks
886  * or leave it the same if we can't. Returns 0 if we don't need to request to
887  * the server or 1 otherwise.
888  */
889 static int
890 cifs_posix_lock_test(struct file *file, struct file_lock *flock)
891 {
892         int rc = 0;
893         struct cifsInodeInfo *cinode = CIFS_I(file->f_path.dentry->d_inode);
894         unsigned char saved_type = flock->fl_type;
895
896         if ((flock->fl_flags & FL_POSIX) == 0)
897                 return 1;
898
899         down_read(&cinode->lock_sem);
900         posix_test_lock(file, flock);
901
902         if (flock->fl_type == F_UNLCK && !cinode->can_cache_brlcks) {
903                 flock->fl_type = saved_type;
904                 rc = 1;
905         }
906
907         up_read(&cinode->lock_sem);
908         return rc;
909 }
910
911 /*
912  * Set the byte-range lock (posix style). Returns:
913  * 1) 0, if we set the lock and don't need to request to the server;
914  * 2) 1, if we need to request to the server;
915  * 3) <0, if the error occurs while setting the lock.
916  */
917 static int
918 cifs_posix_lock_set(struct file *file, struct file_lock *flock)
919 {
920         struct cifsInodeInfo *cinode = CIFS_I(file->f_path.dentry->d_inode);
921         int rc = 1;
922
923         if ((flock->fl_flags & FL_POSIX) == 0)
924                 return rc;
925
926 try_again:
927         down_write(&cinode->lock_sem);
928         if (!cinode->can_cache_brlcks) {
929                 up_write(&cinode->lock_sem);
930                 return rc;
931         }
932
933         rc = posix_lock_file(file, flock, NULL);
934         up_write(&cinode->lock_sem);
935         if (rc == FILE_LOCK_DEFERRED) {
936                 rc = wait_event_interruptible(flock->fl_wait, !flock->fl_next);
937                 if (!rc)
938                         goto try_again;
939                 locks_delete_block(flock);
940         }
941         return rc;
942 }
943
944 int
945 cifs_push_mandatory_locks(struct cifsFileInfo *cfile)
946 {
947         unsigned int xid;
948         int rc = 0, stored_rc;
949         struct cifsLockInfo *li, *tmp;
950         struct cifs_tcon *tcon;
951         struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode);
952         unsigned int num, max_num, max_buf;
953         LOCKING_ANDX_RANGE *buf, *cur;
954         int types[] = {LOCKING_ANDX_LARGE_FILES,
955                        LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES};
956         int i;
957
958         xid = get_xid();
959         tcon = tlink_tcon(cfile->tlink);
960
961         /* we are going to update can_cache_brlcks here - need a write access */
962         down_write(&cinode->lock_sem);
963         if (!cinode->can_cache_brlcks) {
964                 up_write(&cinode->lock_sem);
965                 free_xid(xid);
966                 return rc;
967         }
968
969         /*
970          * Accessing maxBuf is racy with cifs_reconnect - need to store value
971          * and check it for zero before using.
972          */
973         max_buf = tcon->ses->server->maxBuf;
974         if (!max_buf) {
975                 up_write(&cinode->lock_sem);
976                 free_xid(xid);
977                 return -EINVAL;
978         }
979
980         max_num = (max_buf - sizeof(struct smb_hdr)) /
981                                                 sizeof(LOCKING_ANDX_RANGE);
982         buf = kzalloc(max_num * sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
983         if (!buf) {
984                 up_write(&cinode->lock_sem);
985                 free_xid(xid);
986                 return -ENOMEM;
987         }
988
989         for (i = 0; i < 2; i++) {
990                 cur = buf;
991                 num = 0;
992                 list_for_each_entry_safe(li, tmp, &cfile->llist->locks, llist) {
993                         if (li->type != types[i])
994                                 continue;
995                         cur->Pid = cpu_to_le16(li->pid);
996                         cur->LengthLow = cpu_to_le32((u32)li->length);
997                         cur->LengthHigh = cpu_to_le32((u32)(li->length>>32));
998                         cur->OffsetLow = cpu_to_le32((u32)li->offset);
999                         cur->OffsetHigh = cpu_to_le32((u32)(li->offset>>32));
1000                         if (++num == max_num) {
1001                                 stored_rc = cifs_lockv(xid, tcon,
1002                                                        cfile->fid.netfid,
1003                                                        (__u8)li->type, 0, num,
1004                                                        buf);
1005                                 if (stored_rc)
1006                                         rc = stored_rc;
1007                                 cur = buf;
1008                                 num = 0;
1009                         } else
1010                                 cur++;
1011                 }
1012
1013                 if (num) {
1014                         stored_rc = cifs_lockv(xid, tcon, cfile->fid.netfid,
1015                                                (__u8)types[i], 0, num, buf);
1016                         if (stored_rc)
1017                                 rc = stored_rc;
1018                 }
1019         }
1020
1021         cinode->can_cache_brlcks = false;
1022         up_write(&cinode->lock_sem);
1023
1024         kfree(buf);
1025         free_xid(xid);
1026         return rc;
1027 }
1028
1029 /* copied from fs/locks.c with a name change */
1030 #define cifs_for_each_lock(inode, lockp) \
1031         for (lockp = &inode->i_flock; *lockp != NULL; \
1032              lockp = &(*lockp)->fl_next)
1033
1034 struct lock_to_push {
1035         struct list_head llist;
1036         __u64 offset;
1037         __u64 length;
1038         __u32 pid;
1039         __u16 netfid;
1040         __u8 type;
1041 };
1042
1043 static int
1044 cifs_push_posix_locks_locked(struct cifsFileInfo *cfile)
1045 {
1046         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1047         struct file_lock *flock, **before;
1048         unsigned int count = 0, i = 0;
1049         int rc = 0, xid, type;
1050         struct list_head locks_to_send, *el;
1051         struct lock_to_push *lck, *tmp;
1052         __u64 length;
1053
1054         xid = get_xid();
1055
1056         lock_flocks();
1057         cifs_for_each_lock(cfile->dentry->d_inode, before) {
1058                 if ((*before)->fl_flags & FL_POSIX)
1059                         count++;
1060         }
1061         unlock_flocks();
1062
1063         INIT_LIST_HEAD(&locks_to_send);
1064
1065         /*
1066          * Allocating count locks is enough because no FL_POSIX locks can be
1067          * added to the list while we are holding cinode->lock_sem that
1068          * protects locking operations of this inode.
1069          */
1070         for (; i < count; i++) {
1071                 lck = kmalloc(sizeof(struct lock_to_push), GFP_KERNEL);
1072                 if (!lck) {
1073                         rc = -ENOMEM;
1074                         goto err_out;
1075                 }
1076                 list_add_tail(&lck->llist, &locks_to_send);
1077         }
1078
1079         el = locks_to_send.next;
1080         lock_flocks();
1081         cifs_for_each_lock(cfile->dentry->d_inode, before) {
1082                 flock = *before;
1083                 if ((flock->fl_flags & FL_POSIX) == 0)
1084                         continue;
1085                 if (el == &locks_to_send) {
1086                         /*
1087                          * The list ended. We don't have enough allocated
1088                          * structures - something is really wrong.
1089                          */
1090                         cERROR(1, "Can't push all brlocks!");
1091                         break;
1092                 }
1093                 length = 1 + flock->fl_end - flock->fl_start;
1094                 if (flock->fl_type == F_RDLCK || flock->fl_type == F_SHLCK)
1095                         type = CIFS_RDLCK;
1096                 else
1097                         type = CIFS_WRLCK;
1098                 lck = list_entry(el, struct lock_to_push, llist);
1099                 lck->pid = flock->fl_pid;
1100                 lck->netfid = cfile->fid.netfid;
1101                 lck->length = length;
1102                 lck->type = type;
1103                 lck->offset = flock->fl_start;
1104                 el = el->next;
1105         }
1106         unlock_flocks();
1107
1108         list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) {
1109                 int stored_rc;
1110
1111                 stored_rc = CIFSSMBPosixLock(xid, tcon, lck->netfid, lck->pid,
1112                                              lck->offset, lck->length, NULL,
1113                                              lck->type, 0);
1114                 if (stored_rc)
1115                         rc = stored_rc;
1116                 list_del(&lck->llist);
1117                 kfree(lck);
1118         }
1119
1120 out:
1121         free_xid(xid);
1122         return rc;
1123 err_out:
1124         list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) {
1125                 list_del(&lck->llist);
1126                 kfree(lck);
1127         }
1128         goto out;
1129 }
1130
1131 static int
1132 cifs_push_posix_locks(struct cifsFileInfo *cfile)
1133 {
1134         struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode);
1135         int rc = 0;
1136
1137         /* we are going to update can_cache_brlcks here - need a write access */
1138         down_write(&cinode->lock_sem);
1139         if (!cinode->can_cache_brlcks) {
1140                 up_write(&cinode->lock_sem);
1141                 return rc;
1142         }
1143         rc = cifs_push_posix_locks_locked(cfile);
1144         cinode->can_cache_brlcks = false;
1145         up_write(&cinode->lock_sem);
1146         return rc;
1147 }
1148
1149 static int
1150 cifs_push_locks(struct cifsFileInfo *cfile)
1151 {
1152         struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
1153         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1154
1155         if (cap_unix(tcon->ses) &&
1156             (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
1157             ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
1158                 return cifs_push_posix_locks(cfile);
1159
1160         return tcon->ses->server->ops->push_mand_locks(cfile);
1161 }
1162
1163 static void
1164 cifs_read_flock(struct file_lock *flock, __u32 *type, int *lock, int *unlock,
1165                 bool *wait_flag, struct TCP_Server_Info *server)
1166 {
1167         if (flock->fl_flags & FL_POSIX)
1168                 cFYI(1, "Posix");
1169         if (flock->fl_flags & FL_FLOCK)
1170                 cFYI(1, "Flock");
1171         if (flock->fl_flags & FL_SLEEP) {
1172                 cFYI(1, "Blocking lock");
1173                 *wait_flag = true;
1174         }
1175         if (flock->fl_flags & FL_ACCESS)
1176                 cFYI(1, "Process suspended by mandatory locking - "
1177                         "not implemented yet");
1178         if (flock->fl_flags & FL_LEASE)
1179                 cFYI(1, "Lease on file - not implemented yet");
1180         if (flock->fl_flags &
1181             (~(FL_POSIX | FL_FLOCK | FL_SLEEP |
1182                FL_ACCESS | FL_LEASE | FL_CLOSE)))
1183                 cFYI(1, "Unknown lock flags 0x%x", flock->fl_flags);
1184
1185         *type = server->vals->large_lock_type;
1186         if (flock->fl_type == F_WRLCK) {
1187                 cFYI(1, "F_WRLCK ");
1188                 *type |= server->vals->exclusive_lock_type;
1189                 *lock = 1;
1190         } else if (flock->fl_type == F_UNLCK) {
1191                 cFYI(1, "F_UNLCK");
1192                 *type |= server->vals->unlock_lock_type;
1193                 *unlock = 1;
1194                 /* Check if unlock includes more than one lock range */
1195         } else if (flock->fl_type == F_RDLCK) {
1196                 cFYI(1, "F_RDLCK");
1197                 *type |= server->vals->shared_lock_type;
1198                 *lock = 1;
1199         } else if (flock->fl_type == F_EXLCK) {
1200                 cFYI(1, "F_EXLCK");
1201                 *type |= server->vals->exclusive_lock_type;
1202                 *lock = 1;
1203         } else if (flock->fl_type == F_SHLCK) {
1204                 cFYI(1, "F_SHLCK");
1205                 *type |= server->vals->shared_lock_type;
1206                 *lock = 1;
1207         } else
1208                 cFYI(1, "Unknown type of lock");
1209 }
1210
1211 static int
1212 cifs_getlk(struct file *file, struct file_lock *flock, __u32 type,
1213            bool wait_flag, bool posix_lck, unsigned int xid)
1214 {
1215         int rc = 0;
1216         __u64 length = 1 + flock->fl_end - flock->fl_start;
1217         struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
1218         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1219         struct TCP_Server_Info *server = tcon->ses->server;
1220         __u16 netfid = cfile->fid.netfid;
1221
1222         if (posix_lck) {
1223                 int posix_lock_type;
1224
1225                 rc = cifs_posix_lock_test(file, flock);
1226                 if (!rc)
1227                         return rc;
1228
1229                 if (type & server->vals->shared_lock_type)
1230                         posix_lock_type = CIFS_RDLCK;
1231                 else
1232                         posix_lock_type = CIFS_WRLCK;
1233                 rc = CIFSSMBPosixLock(xid, tcon, netfid, current->tgid,
1234                                       flock->fl_start, length, flock,
1235                                       posix_lock_type, wait_flag);
1236                 return rc;
1237         }
1238
1239         rc = cifs_lock_test(cfile, flock->fl_start, length, type, flock);
1240         if (!rc)
1241                 return rc;
1242
1243         /* BB we could chain these into one lock request BB */
1244         rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length, type,
1245                                     1, 0, false);
1246         if (rc == 0) {
1247                 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1248                                             type, 0, 1, false);
1249                 flock->fl_type = F_UNLCK;
1250                 if (rc != 0)
1251                         cERROR(1, "Error unlocking previously locked "
1252                                   "range %d during test of lock", rc);
1253                 return 0;
1254         }
1255
1256         if (type & server->vals->shared_lock_type) {
1257                 flock->fl_type = F_WRLCK;
1258                 return 0;
1259         }
1260
1261         type &= ~server->vals->exclusive_lock_type;
1262
1263         rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1264                                     type | server->vals->shared_lock_type,
1265                                     1, 0, false);
1266         if (rc == 0) {
1267                 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1268                         type | server->vals->shared_lock_type, 0, 1, false);
1269                 flock->fl_type = F_RDLCK;
1270                 if (rc != 0)
1271                         cERROR(1, "Error unlocking previously locked "
1272                                   "range %d during test of lock", rc);
1273         } else
1274                 flock->fl_type = F_WRLCK;
1275
1276         return 0;
1277 }
1278
1279 void
1280 cifs_move_llist(struct list_head *source, struct list_head *dest)
1281 {
1282         struct list_head *li, *tmp;
1283         list_for_each_safe(li, tmp, source)
1284                 list_move(li, dest);
1285 }
1286
1287 void
1288 cifs_free_llist(struct list_head *llist)
1289 {
1290         struct cifsLockInfo *li, *tmp;
1291         list_for_each_entry_safe(li, tmp, llist, llist) {
1292                 cifs_del_lock_waiters(li);
1293                 list_del(&li->llist);
1294                 kfree(li);
1295         }
1296 }
1297
1298 int
1299 cifs_unlock_range(struct cifsFileInfo *cfile, struct file_lock *flock,
1300                   unsigned int xid)
1301 {
1302         int rc = 0, stored_rc;
1303         int types[] = {LOCKING_ANDX_LARGE_FILES,
1304                        LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES};
1305         unsigned int i;
1306         unsigned int max_num, num, max_buf;
1307         LOCKING_ANDX_RANGE *buf, *cur;
1308         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1309         struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode);
1310         struct cifsLockInfo *li, *tmp;
1311         __u64 length = 1 + flock->fl_end - flock->fl_start;
1312         struct list_head tmp_llist;
1313
1314         INIT_LIST_HEAD(&tmp_llist);
1315
1316         /*
1317          * Accessing maxBuf is racy with cifs_reconnect - need to store value
1318          * and check it for zero before using.
1319          */
1320         max_buf = tcon->ses->server->maxBuf;
1321         if (!max_buf)
1322                 return -EINVAL;
1323
1324         max_num = (max_buf - sizeof(struct smb_hdr)) /
1325                                                 sizeof(LOCKING_ANDX_RANGE);
1326         buf = kzalloc(max_num * sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
1327         if (!buf)
1328                 return -ENOMEM;
1329
1330         down_write(&cinode->lock_sem);
1331         for (i = 0; i < 2; i++) {
1332                 cur = buf;
1333                 num = 0;
1334                 list_for_each_entry_safe(li, tmp, &cfile->llist->locks, llist) {
1335                         if (flock->fl_start > li->offset ||
1336                             (flock->fl_start + length) <
1337                             (li->offset + li->length))
1338                                 continue;
1339                         if (current->tgid != li->pid)
1340                                 continue;
1341                         if (types[i] != li->type)
1342                                 continue;
1343                         if (cinode->can_cache_brlcks) {
1344                                 /*
1345                                  * We can cache brlock requests - simply remove
1346                                  * a lock from the file's list.
1347                                  */
1348                                 list_del(&li->llist);
1349                                 cifs_del_lock_waiters(li);
1350                                 kfree(li);
1351                                 continue;
1352                         }
1353                         cur->Pid = cpu_to_le16(li->pid);
1354                         cur->LengthLow = cpu_to_le32((u32)li->length);
1355                         cur->LengthHigh = cpu_to_le32((u32)(li->length>>32));
1356                         cur->OffsetLow = cpu_to_le32((u32)li->offset);
1357                         cur->OffsetHigh = cpu_to_le32((u32)(li->offset>>32));
1358                         /*
1359                          * We need to save a lock here to let us add it again to
1360                          * the file's list if the unlock range request fails on
1361                          * the server.
1362                          */
1363                         list_move(&li->llist, &tmp_llist);
1364                         if (++num == max_num) {
1365                                 stored_rc = cifs_lockv(xid, tcon,
1366                                                        cfile->fid.netfid,
1367                                                        li->type, num, 0, buf);
1368                                 if (stored_rc) {
1369                                         /*
1370                                          * We failed on the unlock range
1371                                          * request - add all locks from the tmp
1372                                          * list to the head of the file's list.
1373                                          */
1374                                         cifs_move_llist(&tmp_llist,
1375                                                         &cfile->llist->locks);
1376                                         rc = stored_rc;
1377                                 } else
1378                                         /*
1379                                          * The unlock range request succeed -
1380                                          * free the tmp list.
1381                                          */
1382                                         cifs_free_llist(&tmp_llist);
1383                                 cur = buf;
1384                                 num = 0;
1385                         } else
1386                                 cur++;
1387                 }
1388                 if (num) {
1389                         stored_rc = cifs_lockv(xid, tcon, cfile->fid.netfid,
1390                                                types[i], num, 0, buf);
1391                         if (stored_rc) {
1392                                 cifs_move_llist(&tmp_llist,
1393                                                 &cfile->llist->locks);
1394                                 rc = stored_rc;
1395                         } else
1396                                 cifs_free_llist(&tmp_llist);
1397                 }
1398         }
1399
1400         up_write(&cinode->lock_sem);
1401         kfree(buf);
1402         return rc;
1403 }
1404
1405 static int
1406 cifs_setlk(struct file *file, struct file_lock *flock, __u32 type,
1407            bool wait_flag, bool posix_lck, int lock, int unlock,
1408            unsigned int xid)
1409 {
1410         int rc = 0;
1411         __u64 length = 1 + flock->fl_end - flock->fl_start;
1412         struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
1413         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1414         struct TCP_Server_Info *server = tcon->ses->server;
1415
1416         if (posix_lck) {
1417                 int posix_lock_type;
1418
1419                 rc = cifs_posix_lock_set(file, flock);
1420                 if (!rc || rc < 0)
1421                         return rc;
1422
1423                 if (type & server->vals->shared_lock_type)
1424                         posix_lock_type = CIFS_RDLCK;
1425                 else
1426                         posix_lock_type = CIFS_WRLCK;
1427
1428                 if (unlock == 1)
1429                         posix_lock_type = CIFS_UNLCK;
1430
1431                 rc = CIFSSMBPosixLock(xid, tcon, cfile->fid.netfid,
1432                                       current->tgid, flock->fl_start, length,
1433                                       NULL, posix_lock_type, wait_flag);
1434                 goto out;
1435         }
1436
1437         if (lock) {
1438                 struct cifsLockInfo *lock;
1439
1440                 lock = cifs_lock_init(flock->fl_start, length, type);
1441                 if (!lock)
1442                         return -ENOMEM;
1443
1444                 rc = cifs_lock_add_if(cfile, lock, wait_flag);
1445                 if (rc < 0)
1446                         kfree(lock);
1447                 if (rc <= 0)
1448                         goto out;
1449
1450                 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1451                                             type, 1, 0, wait_flag);
1452                 if (rc) {
1453                         kfree(lock);
1454                         goto out;
1455                 }
1456
1457                 cifs_lock_add(cfile, lock);
1458         } else if (unlock)
1459                 rc = server->ops->mand_unlock_range(cfile, flock, xid);
1460
1461 out:
1462         if (flock->fl_flags & FL_POSIX)
1463                 posix_lock_file_wait(file, flock);
1464         return rc;
1465 }
1466
1467 int cifs_lock(struct file *file, int cmd, struct file_lock *flock)
1468 {
1469         int rc, xid;
1470         int lock = 0, unlock = 0;
1471         bool wait_flag = false;
1472         bool posix_lck = false;
1473         struct cifs_sb_info *cifs_sb;
1474         struct cifs_tcon *tcon;
1475         struct cifsInodeInfo *cinode;
1476         struct cifsFileInfo *cfile;
1477         __u16 netfid;
1478         __u32 type;
1479
1480         rc = -EACCES;
1481         xid = get_xid();
1482
1483         cFYI(1, "Lock parm: 0x%x flockflags: 0x%x flocktype: 0x%x start: %lld "
1484                 "end: %lld", cmd, flock->fl_flags, flock->fl_type,
1485                 flock->fl_start, flock->fl_end);
1486
1487         cfile = (struct cifsFileInfo *)file->private_data;
1488         tcon = tlink_tcon(cfile->tlink);
1489
1490         cifs_read_flock(flock, &type, &lock, &unlock, &wait_flag,
1491                         tcon->ses->server);
1492
1493         cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
1494         netfid = cfile->fid.netfid;
1495         cinode = CIFS_I(file->f_path.dentry->d_inode);
1496
1497         if (cap_unix(tcon->ses) &&
1498             (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
1499             ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
1500                 posix_lck = true;
1501         /*
1502          * BB add code here to normalize offset and length to account for
1503          * negative length which we can not accept over the wire.
1504          */
1505         if (IS_GETLK(cmd)) {
1506                 rc = cifs_getlk(file, flock, type, wait_flag, posix_lck, xid);
1507                 free_xid(xid);
1508                 return rc;
1509         }
1510
1511         if (!lock && !unlock) {
1512                 /*
1513                  * if no lock or unlock then nothing to do since we do not
1514                  * know what it is
1515                  */
1516                 free_xid(xid);
1517                 return -EOPNOTSUPP;
1518         }
1519
1520         rc = cifs_setlk(file, flock, type, wait_flag, posix_lck, lock, unlock,
1521                         xid);
1522         free_xid(xid);
1523         return rc;
1524 }
1525
1526 /*
1527  * update the file size (if needed) after a write. Should be called with
1528  * the inode->i_lock held
1529  */
1530 void
1531 cifs_update_eof(struct cifsInodeInfo *cifsi, loff_t offset,
1532                       unsigned int bytes_written)
1533 {
1534         loff_t end_of_write = offset + bytes_written;
1535
1536         if (end_of_write > cifsi->server_eof)
1537                 cifsi->server_eof = end_of_write;
1538 }
1539
1540 static ssize_t
1541 cifs_write(struct cifsFileInfo *open_file, __u32 pid, const char *write_data,
1542            size_t write_size, loff_t *offset)
1543 {
1544         int rc = 0;
1545         unsigned int bytes_written = 0;
1546         unsigned int total_written;
1547         struct cifs_sb_info *cifs_sb;
1548         struct cifs_tcon *tcon;
1549         struct TCP_Server_Info *server;
1550         unsigned int xid;
1551         struct dentry *dentry = open_file->dentry;
1552         struct cifsInodeInfo *cifsi = CIFS_I(dentry->d_inode);
1553         struct cifs_io_parms io_parms;
1554
1555         cifs_sb = CIFS_SB(dentry->d_sb);
1556
1557         cFYI(1, "write %zd bytes to offset %lld of %s", write_size,
1558              *offset, dentry->d_name.name);
1559
1560         tcon = tlink_tcon(open_file->tlink);
1561         server = tcon->ses->server;
1562
1563         if (!server->ops->sync_write)
1564                 return -ENOSYS;
1565
1566         xid = get_xid();
1567
1568         for (total_written = 0; write_size > total_written;
1569              total_written += bytes_written) {
1570                 rc = -EAGAIN;
1571                 while (rc == -EAGAIN) {
1572                         struct kvec iov[2];
1573                         unsigned int len;
1574
1575                         if (open_file->invalidHandle) {
1576                                 /* we could deadlock if we called
1577                                    filemap_fdatawait from here so tell
1578                                    reopen_file not to flush data to
1579                                    server now */
1580                                 rc = cifs_reopen_file(open_file, false);
1581                                 if (rc != 0)
1582                                         break;
1583                         }
1584
1585                         len = min((size_t)cifs_sb->wsize,
1586                                   write_size - total_written);
1587                         /* iov[0] is reserved for smb header */
1588                         iov[1].iov_base = (char *)write_data + total_written;
1589                         iov[1].iov_len = len;
1590                         io_parms.pid = pid;
1591                         io_parms.tcon = tcon;
1592                         io_parms.offset = *offset;
1593                         io_parms.length = len;
1594                         rc = server->ops->sync_write(xid, open_file, &io_parms,
1595                                                      &bytes_written, iov, 1);
1596                 }
1597                 if (rc || (bytes_written == 0)) {
1598                         if (total_written)
1599                                 break;
1600                         else {
1601                                 free_xid(xid);
1602                                 return rc;
1603                         }
1604                 } else {
1605                         spin_lock(&dentry->d_inode->i_lock);
1606                         cifs_update_eof(cifsi, *offset, bytes_written);
1607                         spin_unlock(&dentry->d_inode->i_lock);
1608                         *offset += bytes_written;
1609                 }
1610         }
1611
1612         cifs_stats_bytes_written(tcon, total_written);
1613
1614         if (total_written > 0) {
1615                 spin_lock(&dentry->d_inode->i_lock);
1616                 if (*offset > dentry->d_inode->i_size)
1617                         i_size_write(dentry->d_inode, *offset);
1618                 spin_unlock(&dentry->d_inode->i_lock);
1619         }
1620         mark_inode_dirty_sync(dentry->d_inode);
1621         free_xid(xid);
1622         return total_written;
1623 }
1624
1625 struct cifsFileInfo *find_readable_file(struct cifsInodeInfo *cifs_inode,
1626                                         bool fsuid_only)
1627 {
1628         struct cifsFileInfo *open_file = NULL;
1629         struct cifs_sb_info *cifs_sb = CIFS_SB(cifs_inode->vfs_inode.i_sb);
1630
1631         /* only filter by fsuid on multiuser mounts */
1632         if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
1633                 fsuid_only = false;
1634
1635         spin_lock(&cifs_file_list_lock);
1636         /* we could simply get the first_list_entry since write-only entries
1637            are always at the end of the list but since the first entry might
1638            have a close pending, we go through the whole list */
1639         list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
1640                 if (fsuid_only && open_file->uid != current_fsuid())
1641                         continue;
1642                 if (OPEN_FMODE(open_file->f_flags) & FMODE_READ) {
1643                         if (!open_file->invalidHandle) {
1644                                 /* found a good file */
1645                                 /* lock it so it will not be closed on us */
1646                                 cifsFileInfo_get_locked(open_file);
1647                                 spin_unlock(&cifs_file_list_lock);
1648                                 return open_file;
1649                         } /* else might as well continue, and look for
1650                              another, or simply have the caller reopen it
1651                              again rather than trying to fix this handle */
1652                 } else /* write only file */
1653                         break; /* write only files are last so must be done */
1654         }
1655         spin_unlock(&cifs_file_list_lock);
1656         return NULL;
1657 }
1658
1659 struct cifsFileInfo *find_writable_file(struct cifsInodeInfo *cifs_inode,
1660                                         bool fsuid_only)
1661 {
1662         struct cifsFileInfo *open_file, *inv_file = NULL;
1663         struct cifs_sb_info *cifs_sb;
1664         bool any_available = false;
1665         int rc;
1666         unsigned int refind = 0;
1667
1668         /* Having a null inode here (because mapping->host was set to zero by
1669         the VFS or MM) should not happen but we had reports of on oops (due to
1670         it being zero) during stress testcases so we need to check for it */
1671
1672         if (cifs_inode == NULL) {
1673                 cERROR(1, "Null inode passed to cifs_writeable_file");
1674                 dump_stack();
1675                 return NULL;
1676         }
1677
1678         cifs_sb = CIFS_SB(cifs_inode->vfs_inode.i_sb);
1679
1680         /* only filter by fsuid on multiuser mounts */
1681         if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
1682                 fsuid_only = false;
1683
1684         spin_lock(&cifs_file_list_lock);
1685 refind_writable:
1686         if (refind > MAX_REOPEN_ATT) {
1687                 spin_unlock(&cifs_file_list_lock);
1688                 return NULL;
1689         }
1690         list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
1691                 if (!any_available && open_file->pid != current->tgid)
1692                         continue;
1693                 if (fsuid_only && open_file->uid != current_fsuid())
1694                         continue;
1695                 if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) {
1696                         if (!open_file->invalidHandle) {
1697                                 /* found a good writable file */
1698                                 cifsFileInfo_get_locked(open_file);
1699                                 spin_unlock(&cifs_file_list_lock);
1700                                 return open_file;
1701                         } else {
1702                                 if (!inv_file)
1703                                         inv_file = open_file;
1704                         }
1705                 }
1706         }
1707         /* couldn't find useable FH with same pid, try any available */
1708         if (!any_available) {
1709                 any_available = true;
1710                 goto refind_writable;
1711         }
1712
1713         if (inv_file) {
1714                 any_available = false;
1715                 cifsFileInfo_get_locked(inv_file);
1716         }
1717
1718         spin_unlock(&cifs_file_list_lock);
1719
1720         if (inv_file) {
1721                 rc = cifs_reopen_file(inv_file, false);
1722                 if (!rc)
1723                         return inv_file;
1724                 else {
1725                         spin_lock(&cifs_file_list_lock);
1726                         list_move_tail(&inv_file->flist,
1727                                         &cifs_inode->openFileList);
1728                         spin_unlock(&cifs_file_list_lock);
1729                         cifsFileInfo_put(inv_file);
1730                         spin_lock(&cifs_file_list_lock);
1731                         ++refind;
1732                         goto refind_writable;
1733                 }
1734         }
1735
1736         return NULL;
1737 }
1738
1739 static int cifs_partialpagewrite(struct page *page, unsigned from, unsigned to)
1740 {
1741         struct address_space *mapping = page->mapping;
1742         loff_t offset = (loff_t)page->index << PAGE_CACHE_SHIFT;
1743         char *write_data;
1744         int rc = -EFAULT;
1745         int bytes_written = 0;
1746         struct inode *inode;
1747         struct cifsFileInfo *open_file;
1748
1749         if (!mapping || !mapping->host)
1750                 return -EFAULT;
1751
1752         inode = page->mapping->host;
1753
1754         offset += (loff_t)from;
1755         write_data = kmap(page);
1756         write_data += from;
1757
1758         if ((to > PAGE_CACHE_SIZE) || (from > to)) {
1759                 kunmap(page);
1760                 return -EIO;
1761         }
1762
1763         /* racing with truncate? */
1764         if (offset > mapping->host->i_size) {
1765                 kunmap(page);
1766                 return 0; /* don't care */
1767         }
1768
1769         /* check to make sure that we are not extending the file */
1770         if (mapping->host->i_size - offset < (loff_t)to)
1771                 to = (unsigned)(mapping->host->i_size - offset);
1772
1773         open_file = find_writable_file(CIFS_I(mapping->host), false);
1774         if (open_file) {
1775                 bytes_written = cifs_write(open_file, open_file->pid,
1776                                            write_data, to - from, &offset);
1777                 cifsFileInfo_put(open_file);
1778                 /* Does mm or vfs already set times? */
1779                 inode->i_atime = inode->i_mtime = current_fs_time(inode->i_sb);
1780                 if ((bytes_written > 0) && (offset))
1781                         rc = 0;
1782                 else if (bytes_written < 0)
1783                         rc = bytes_written;
1784         } else {
1785                 cFYI(1, "No writeable filehandles for inode");
1786                 rc = -EIO;
1787         }
1788
1789         kunmap(page);
1790         return rc;
1791 }
1792
1793 static int cifs_writepages(struct address_space *mapping,
1794                            struct writeback_control *wbc)
1795 {
1796         struct cifs_sb_info *cifs_sb = CIFS_SB(mapping->host->i_sb);
1797         bool done = false, scanned = false, range_whole = false;
1798         pgoff_t end, index;
1799         struct cifs_writedata *wdata;
1800         struct TCP_Server_Info *server;
1801         struct page *page;
1802         int rc = 0;
1803
1804         /*
1805          * If wsize is smaller than the page cache size, default to writing
1806          * one page at a time via cifs_writepage
1807          */
1808         if (cifs_sb->wsize < PAGE_CACHE_SIZE)
1809                 return generic_writepages(mapping, wbc);
1810
1811         if (wbc->range_cyclic) {
1812                 index = mapping->writeback_index; /* Start from prev offset */
1813                 end = -1;
1814         } else {
1815                 index = wbc->range_start >> PAGE_CACHE_SHIFT;
1816                 end = wbc->range_end >> PAGE_CACHE_SHIFT;
1817                 if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
1818                         range_whole = true;
1819                 scanned = true;
1820         }
1821 retry:
1822         while (!done && index <= end) {
1823                 unsigned int i, nr_pages, found_pages;
1824                 pgoff_t next = 0, tofind;
1825                 struct page **pages;
1826
1827                 tofind = min((cifs_sb->wsize / PAGE_CACHE_SIZE) - 1,
1828                                 end - index) + 1;
1829
1830                 wdata = cifs_writedata_alloc((unsigned int)tofind,
1831                                              cifs_writev_complete);
1832                 if (!wdata) {
1833                         rc = -ENOMEM;
1834                         break;
1835                 }
1836
1837                 /*
1838                  * find_get_pages_tag seems to return a max of 256 on each
1839                  * iteration, so we must call it several times in order to
1840                  * fill the array or the wsize is effectively limited to
1841                  * 256 * PAGE_CACHE_SIZE.
1842                  */
1843                 found_pages = 0;
1844                 pages = wdata->pages;
1845                 do {
1846                         nr_pages = find_get_pages_tag(mapping, &index,
1847                                                         PAGECACHE_TAG_DIRTY,
1848                                                         tofind, pages);
1849                         found_pages += nr_pages;
1850                         tofind -= nr_pages;
1851                         pages += nr_pages;
1852                 } while (nr_pages && tofind && index <= end);
1853
1854                 if (found_pages == 0) {
1855                         kref_put(&wdata->refcount, cifs_writedata_release);
1856                         break;
1857                 }
1858
1859                 nr_pages = 0;
1860                 for (i = 0; i < found_pages; i++) {
1861                         page = wdata->pages[i];
1862                         /*
1863                          * At this point we hold neither mapping->tree_lock nor
1864                          * lock on the page itself: the page may be truncated or
1865                          * invalidated (changing page->mapping to NULL), or even
1866                          * swizzled back from swapper_space to tmpfs file
1867                          * mapping
1868                          */
1869
1870                         if (nr_pages == 0)
1871                                 lock_page(page);
1872                         else if (!trylock_page(page))
1873                                 break;
1874
1875                         if (unlikely(page->mapping != mapping)) {
1876                                 unlock_page(page);
1877                                 break;
1878                         }
1879
1880                         if (!wbc->range_cyclic && page->index > end) {
1881                                 done = true;
1882                                 unlock_page(page);
1883                                 break;
1884                         }
1885
1886                         if (next && (page->index != next)) {
1887                                 /* Not next consecutive page */
1888                                 unlock_page(page);
1889                                 break;
1890                         }
1891
1892                         if (wbc->sync_mode != WB_SYNC_NONE)
1893                                 wait_on_page_writeback(page);
1894
1895                         if (PageWriteback(page) ||
1896                                         !clear_page_dirty_for_io(page)) {
1897                                 unlock_page(page);
1898                                 break;
1899                         }
1900
1901                         /*
1902                          * This actually clears the dirty bit in the radix tree.
1903                          * See cifs_writepage() for more commentary.
1904                          */
1905                         set_page_writeback(page);
1906
1907                         if (page_offset(page) >= i_size_read(mapping->host)) {
1908                                 done = true;
1909                                 unlock_page(page);
1910                                 end_page_writeback(page);
1911                                 break;
1912                         }
1913
1914                         wdata->pages[i] = page;
1915                         next = page->index + 1;
1916                         ++nr_pages;
1917                 }
1918
1919                 /* reset index to refind any pages skipped */
1920                 if (nr_pages == 0)
1921                         index = wdata->pages[0]->index + 1;
1922
1923                 /* put any pages we aren't going to use */
1924                 for (i = nr_pages; i < found_pages; i++) {
1925                         page_cache_release(wdata->pages[i]);
1926                         wdata->pages[i] = NULL;
1927                 }
1928
1929                 /* nothing to write? */
1930                 if (nr_pages == 0) {
1931                         kref_put(&wdata->refcount, cifs_writedata_release);
1932                         continue;
1933                 }
1934
1935                 wdata->sync_mode = wbc->sync_mode;
1936                 wdata->nr_pages = nr_pages;
1937                 wdata->offset = page_offset(wdata->pages[0]);
1938                 wdata->pagesz = PAGE_CACHE_SIZE;
1939                 wdata->tailsz =
1940                         min(i_size_read(mapping->host) -
1941                             page_offset(wdata->pages[nr_pages - 1]),
1942                             (loff_t)PAGE_CACHE_SIZE);
1943                 wdata->bytes = ((nr_pages - 1) * PAGE_CACHE_SIZE) +
1944                                         wdata->tailsz;
1945
1946                 do {
1947                         if (wdata->cfile != NULL)
1948                                 cifsFileInfo_put(wdata->cfile);
1949                         wdata->cfile = find_writable_file(CIFS_I(mapping->host),
1950                                                           false);
1951                         if (!wdata->cfile) {
1952                                 cERROR(1, "No writable handles for inode");
1953                                 rc = -EBADF;
1954                                 break;
1955                         }
1956                         wdata->pid = wdata->cfile->pid;
1957                         server = tlink_tcon(wdata->cfile->tlink)->ses->server;
1958                         rc = server->ops->async_writev(wdata);
1959                 } while (wbc->sync_mode == WB_SYNC_ALL && rc == -EAGAIN);
1960
1961                 for (i = 0; i < nr_pages; ++i)
1962                         unlock_page(wdata->pages[i]);
1963
1964                 /* send failure -- clean up the mess */
1965                 if (rc != 0) {
1966                         for (i = 0; i < nr_pages; ++i) {
1967                                 if (rc == -EAGAIN)
1968                                         redirty_page_for_writepage(wbc,
1969                                                            wdata->pages[i]);
1970                                 else
1971                                         SetPageError(wdata->pages[i]);
1972                                 end_page_writeback(wdata->pages[i]);
1973                                 page_cache_release(wdata->pages[i]);
1974                         }
1975                         if (rc != -EAGAIN)
1976                                 mapping_set_error(mapping, rc);
1977                 }
1978                 kref_put(&wdata->refcount, cifs_writedata_release);
1979
1980                 wbc->nr_to_write -= nr_pages;
1981                 if (wbc->nr_to_write <= 0)
1982                         done = true;
1983
1984                 index = next;
1985         }
1986
1987         if (!scanned && !done) {
1988                 /*
1989                  * We hit the last page and there is more work to be done: wrap
1990                  * back to the start of the file
1991                  */
1992                 scanned = true;
1993                 index = 0;
1994                 goto retry;
1995         }
1996
1997         if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0))
1998                 mapping->writeback_index = index;
1999
2000         return rc;
2001 }
2002
2003 static int
2004 cifs_writepage_locked(struct page *page, struct writeback_control *wbc)
2005 {
2006         int rc;
2007         unsigned int xid;
2008
2009         xid = get_xid();
2010 /* BB add check for wbc flags */
2011         page_cache_get(page);
2012         if (!PageUptodate(page))
2013                 cFYI(1, "ppw - page not up to date");
2014
2015         /*
2016          * Set the "writeback" flag, and clear "dirty" in the radix tree.
2017          *
2018          * A writepage() implementation always needs to do either this,
2019          * or re-dirty the page with "redirty_page_for_writepage()" in
2020          * the case of a failure.
2021          *
2022          * Just unlocking the page will cause the radix tree tag-bits
2023          * to fail to update with the state of the page correctly.
2024          */
2025         set_page_writeback(page);
2026 retry_write:
2027         rc = cifs_partialpagewrite(page, 0, PAGE_CACHE_SIZE);
2028         if (rc == -EAGAIN && wbc->sync_mode == WB_SYNC_ALL)
2029                 goto retry_write;
2030         else if (rc == -EAGAIN)
2031                 redirty_page_for_writepage(wbc, page);
2032         else if (rc != 0)
2033                 SetPageError(page);
2034         else
2035                 SetPageUptodate(page);
2036         end_page_writeback(page);
2037         page_cache_release(page);
2038         free_xid(xid);
2039         return rc;
2040 }
2041
2042 static int cifs_writepage(struct page *page, struct writeback_control *wbc)
2043 {
2044         int rc = cifs_writepage_locked(page, wbc);
2045         unlock_page(page);
2046         return rc;
2047 }
2048
2049 static int cifs_write_end(struct file *file, struct address_space *mapping,
2050                         loff_t pos, unsigned len, unsigned copied,
2051                         struct page *page, void *fsdata)
2052 {
2053         int rc;
2054         struct inode *inode = mapping->host;
2055         struct cifsFileInfo *cfile = file->private_data;
2056         struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
2057         __u32 pid;
2058
2059         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
2060                 pid = cfile->pid;
2061         else
2062                 pid = current->tgid;
2063
2064         cFYI(1, "write_end for page %p from pos %lld with %d bytes",
2065                  page, pos, copied);
2066
2067         if (PageChecked(page)) {
2068                 if (copied == len)
2069                         SetPageUptodate(page);
2070                 ClearPageChecked(page);
2071         } else if (!PageUptodate(page) && copied == PAGE_CACHE_SIZE)
2072                 SetPageUptodate(page);
2073
2074         if (!PageUptodate(page)) {
2075                 char *page_data;
2076                 unsigned offset = pos & (PAGE_CACHE_SIZE - 1);
2077                 unsigned int xid;
2078
2079                 xid = get_xid();
2080                 /* this is probably better than directly calling
2081                    partialpage_write since in this function the file handle is
2082                    known which we might as well leverage */
2083                 /* BB check if anything else missing out of ppw
2084                    such as updating last write time */
2085                 page_data = kmap(page);
2086                 rc = cifs_write(cfile, pid, page_data + offset, copied, &pos);
2087                 /* if (rc < 0) should we set writebehind rc? */
2088                 kunmap(page);
2089
2090                 free_xid(xid);
2091         } else {
2092                 rc = copied;
2093                 pos += copied;
2094                 set_page_dirty(page);
2095         }
2096
2097         if (rc > 0) {
2098                 spin_lock(&inode->i_lock);
2099                 if (pos > inode->i_size)
2100                         i_size_write(inode, pos);
2101                 spin_unlock(&inode->i_lock);
2102         }
2103
2104         unlock_page(page);
2105         page_cache_release(page);
2106
2107         return rc;
2108 }
2109
2110 int cifs_strict_fsync(struct file *file, loff_t start, loff_t end,
2111                       int datasync)
2112 {
2113         unsigned int xid;
2114         int rc = 0;
2115         struct cifs_tcon *tcon;
2116         struct TCP_Server_Info *server;
2117         struct cifsFileInfo *smbfile = file->private_data;
2118         struct inode *inode = file->f_path.dentry->d_inode;
2119         struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
2120
2121         rc = filemap_write_and_wait_range(inode->i_mapping, start, end);
2122         if (rc)
2123                 return rc;
2124         mutex_lock(&inode->i_mutex);
2125
2126         xid = get_xid();
2127
2128         cFYI(1, "Sync file - name: %s datasync: 0x%x",
2129                 file->f_path.dentry->d_name.name, datasync);
2130
2131         if (!CIFS_I(inode)->clientCanCacheRead) {
2132                 rc = cifs_invalidate_mapping(inode);
2133                 if (rc) {
2134                         cFYI(1, "rc: %d during invalidate phase", rc);
2135                         rc = 0; /* don't care about it in fsync */
2136                 }
2137         }
2138
2139         tcon = tlink_tcon(smbfile->tlink);
2140         if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) {
2141                 server = tcon->ses->server;
2142                 if (server->ops->flush)
2143                         rc = server->ops->flush(xid, tcon, &smbfile->fid);
2144                 else
2145                         rc = -ENOSYS;
2146         }
2147
2148         free_xid(xid);
2149         mutex_unlock(&inode->i_mutex);
2150         return rc;
2151 }
2152
2153 int cifs_fsync(struct file *file, loff_t start, loff_t end, int datasync)
2154 {
2155         unsigned int xid;
2156         int rc = 0;
2157         struct cifs_tcon *tcon;
2158         struct TCP_Server_Info *server;
2159         struct cifsFileInfo *smbfile = file->private_data;
2160         struct cifs_sb_info *cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
2161         struct inode *inode = file->f_mapping->host;
2162
2163         rc = filemap_write_and_wait_range(inode->i_mapping, start, end);
2164         if (rc)
2165                 return rc;
2166         mutex_lock(&inode->i_mutex);
2167
2168         xid = get_xid();
2169
2170         cFYI(1, "Sync file - name: %s datasync: 0x%x",
2171                 file->f_path.dentry->d_name.name, datasync);
2172
2173         tcon = tlink_tcon(smbfile->tlink);
2174         if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) {
2175                 server = tcon->ses->server;
2176                 if (server->ops->flush)
2177                         rc = server->ops->flush(xid, tcon, &smbfile->fid);
2178                 else
2179                         rc = -ENOSYS;
2180         }
2181
2182         free_xid(xid);
2183         mutex_unlock(&inode->i_mutex);
2184         return rc;
2185 }
2186
2187 /*
2188  * As file closes, flush all cached write data for this inode checking
2189  * for write behind errors.
2190  */
2191 int cifs_flush(struct file *file, fl_owner_t id)
2192 {
2193         struct inode *inode = file->f_path.dentry->d_inode;
2194         int rc = 0;
2195
2196         if (file->f_mode & FMODE_WRITE)
2197                 rc = filemap_write_and_wait(inode->i_mapping);
2198
2199         cFYI(1, "Flush inode %p file %p rc %d", inode, file, rc);
2200
2201         return rc;
2202 }
2203
2204 static int
2205 cifs_write_allocate_pages(struct page **pages, unsigned long num_pages)
2206 {
2207         int rc = 0;
2208         unsigned long i;
2209
2210         for (i = 0; i < num_pages; i++) {
2211                 pages[i] = alloc_page(GFP_KERNEL|__GFP_HIGHMEM);
2212                 if (!pages[i]) {
2213                         /*
2214                          * save number of pages we have already allocated and
2215                          * return with ENOMEM error
2216                          */
2217                         num_pages = i;
2218                         rc = -ENOMEM;
2219                         break;
2220                 }
2221         }
2222
2223         if (rc) {
2224                 for (i = 0; i < num_pages; i++)
2225                         put_page(pages[i]);
2226         }
2227         return rc;
2228 }
2229
2230 static inline
2231 size_t get_numpages(const size_t wsize, const size_t len, size_t *cur_len)
2232 {
2233         size_t num_pages;
2234         size_t clen;
2235
2236         clen = min_t(const size_t, len, wsize);
2237         num_pages = DIV_ROUND_UP(clen, PAGE_SIZE);
2238
2239         if (cur_len)
2240                 *cur_len = clen;
2241
2242         return num_pages;
2243 }
2244
2245 static void
2246 cifs_uncached_writev_complete(struct work_struct *work)
2247 {
2248         int i;
2249         struct cifs_writedata *wdata = container_of(work,
2250                                         struct cifs_writedata, work);
2251         struct inode *inode = wdata->cfile->dentry->d_inode;
2252         struct cifsInodeInfo *cifsi = CIFS_I(inode);
2253
2254         spin_lock(&inode->i_lock);
2255         cifs_update_eof(cifsi, wdata->offset, wdata->bytes);
2256         if (cifsi->server_eof > inode->i_size)
2257                 i_size_write(inode, cifsi->server_eof);
2258         spin_unlock(&inode->i_lock);
2259
2260         complete(&wdata->done);
2261
2262         if (wdata->result != -EAGAIN) {
2263                 for (i = 0; i < wdata->nr_pages; i++)
2264                         put_page(wdata->pages[i]);
2265         }
2266
2267         kref_put(&wdata->refcount, cifs_writedata_release);
2268 }
2269
2270 /* attempt to send write to server, retry on any -EAGAIN errors */
2271 static int
2272 cifs_uncached_retry_writev(struct cifs_writedata *wdata)
2273 {
2274         int rc;
2275         struct TCP_Server_Info *server;
2276
2277         server = tlink_tcon(wdata->cfile->tlink)->ses->server;
2278
2279         do {
2280                 if (wdata->cfile->invalidHandle) {
2281                         rc = cifs_reopen_file(wdata->cfile, false);
2282                         if (rc != 0)
2283                                 continue;
2284                 }
2285                 rc = server->ops->async_writev(wdata);
2286         } while (rc == -EAGAIN);
2287
2288         return rc;
2289 }
2290
2291 static ssize_t
2292 cifs_iovec_write(struct file *file, const struct iovec *iov,
2293                  unsigned long nr_segs, loff_t *poffset)
2294 {
2295         unsigned long nr_pages, i;
2296         size_t copied, len, cur_len;
2297         ssize_t total_written = 0;
2298         loff_t offset;
2299         struct iov_iter it;
2300         struct cifsFileInfo *open_file;
2301         struct cifs_tcon *tcon;
2302         struct cifs_sb_info *cifs_sb;
2303         struct cifs_writedata *wdata, *tmp;
2304         struct list_head wdata_list;
2305         int rc;
2306         pid_t pid;
2307
2308         len = iov_length(iov, nr_segs);
2309         if (!len)
2310                 return 0;
2311
2312         rc = generic_write_checks(file, poffset, &len, 0);
2313         if (rc)
2314                 return rc;
2315
2316         INIT_LIST_HEAD(&wdata_list);
2317         cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
2318         open_file = file->private_data;
2319         tcon = tlink_tcon(open_file->tlink);
2320
2321         if (!tcon->ses->server->ops->async_writev)
2322                 return -ENOSYS;
2323
2324         offset = *poffset;
2325
2326         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
2327                 pid = open_file->pid;
2328         else
2329                 pid = current->tgid;
2330
2331         iov_iter_init(&it, iov, nr_segs, len, 0);
2332         do {
2333                 size_t save_len;
2334
2335                 nr_pages = get_numpages(cifs_sb->wsize, len, &cur_len);
2336                 wdata = cifs_writedata_alloc(nr_pages,
2337                                              cifs_uncached_writev_complete);
2338                 if (!wdata) {
2339                         rc = -ENOMEM;
2340                         break;
2341                 }
2342
2343                 rc = cifs_write_allocate_pages(wdata->pages, nr_pages);
2344                 if (rc) {
2345                         kfree(wdata);
2346                         break;
2347                 }
2348
2349                 save_len = cur_len;
2350                 for (i = 0; i < nr_pages; i++) {
2351                         copied = min_t(const size_t, cur_len, PAGE_SIZE);
2352                         copied = iov_iter_copy_from_user(wdata->pages[i], &it,
2353                                                          0, copied);
2354                         cur_len -= copied;
2355                         iov_iter_advance(&it, copied);
2356                 }
2357                 cur_len = save_len - cur_len;
2358
2359                 wdata->sync_mode = WB_SYNC_ALL;
2360                 wdata->nr_pages = nr_pages;
2361                 wdata->offset = (__u64)offset;
2362                 wdata->cfile = cifsFileInfo_get(open_file);
2363                 wdata->pid = pid;
2364                 wdata->bytes = cur_len;
2365                 wdata->pagesz = PAGE_SIZE;
2366                 wdata->tailsz = cur_len - ((nr_pages - 1) * PAGE_SIZE);
2367                 rc = cifs_uncached_retry_writev(wdata);
2368                 if (rc) {
2369                         kref_put(&wdata->refcount, cifs_writedata_release);
2370                         break;
2371                 }
2372
2373                 list_add_tail(&wdata->list, &wdata_list);
2374                 offset += cur_len;
2375                 len -= cur_len;
2376         } while (len > 0);
2377
2378         /*
2379          * If at least one write was successfully sent, then discard any rc
2380          * value from the later writes. If the other write succeeds, then
2381          * we'll end up returning whatever was written. If it fails, then
2382          * we'll get a new rc value from that.
2383          */
2384         if (!list_empty(&wdata_list))
2385                 rc = 0;
2386
2387         /*
2388          * Wait for and collect replies for any successful sends in order of
2389          * increasing offset. Once an error is hit or we get a fatal signal
2390          * while waiting, then return without waiting for any more replies.
2391          */
2392 restart_loop:
2393         list_for_each_entry_safe(wdata, tmp, &wdata_list, list) {
2394                 if (!rc) {
2395                         /* FIXME: freezable too? */
2396                         rc = wait_for_completion_killable(&wdata->done);
2397                         if (rc)
2398                                 rc = -EINTR;
2399                         else if (wdata->result)
2400                                 rc = wdata->result;
2401                         else
2402                                 total_written += wdata->bytes;
2403
2404                         /* resend call if it's a retryable error */
2405                         if (rc == -EAGAIN) {
2406                                 rc = cifs_uncached_retry_writev(wdata);
2407                                 goto restart_loop;
2408                         }
2409                 }
2410                 list_del_init(&wdata->list);
2411                 kref_put(&wdata->refcount, cifs_writedata_release);
2412         }
2413
2414         if (total_written > 0)
2415                 *poffset += total_written;
2416
2417         cifs_stats_bytes_written(tcon, total_written);
2418         return total_written ? total_written : (ssize_t)rc;
2419 }
2420
2421 ssize_t cifs_user_writev(struct kiocb *iocb, const struct iovec *iov,
2422                                 unsigned long nr_segs, loff_t pos)
2423 {
2424         ssize_t written;
2425         struct inode *inode;
2426
2427         inode = iocb->ki_filp->f_path.dentry->d_inode;
2428
2429         /*
2430          * BB - optimize the way when signing is disabled. We can drop this
2431          * extra memory-to-memory copying and use iovec buffers for constructing
2432          * write request.
2433          */
2434
2435         written = cifs_iovec_write(iocb->ki_filp, iov, nr_segs, &pos);
2436         if (written > 0) {
2437                 CIFS_I(inode)->invalid_mapping = true;
2438                 iocb->ki_pos = pos;
2439         }
2440
2441         return written;
2442 }
2443
2444 static ssize_t
2445 cifs_writev(struct kiocb *iocb, const struct iovec *iov,
2446             unsigned long nr_segs, loff_t pos)
2447 {
2448         struct file *file = iocb->ki_filp;
2449         struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
2450         struct inode *inode = file->f_mapping->host;
2451         struct cifsInodeInfo *cinode = CIFS_I(inode);
2452         struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
2453         ssize_t rc = -EACCES;
2454
2455         BUG_ON(iocb->ki_pos != pos);
2456
2457         sb_start_write(inode->i_sb);
2458
2459         /*
2460          * We need to hold the sem to be sure nobody modifies lock list
2461          * with a brlock that prevents writing.
2462          */
2463         down_read(&cinode->lock_sem);
2464         if (!cifs_find_lock_conflict(cfile, pos, iov_length(iov, nr_segs),
2465                                      server->vals->exclusive_lock_type, NULL,
2466                                      true)) {
2467                 mutex_lock(&inode->i_mutex);
2468                 rc = __generic_file_aio_write(iocb, iov, nr_segs,
2469                                                &iocb->ki_pos);
2470                 mutex_unlock(&inode->i_mutex);
2471         }
2472
2473         if (rc > 0 || rc == -EIOCBQUEUED) {
2474                 ssize_t err;
2475
2476                 err = generic_write_sync(file, pos, rc);
2477                 if (err < 0 && rc > 0)
2478                         rc = err;
2479         }
2480
2481         up_read(&cinode->lock_sem);
2482         sb_end_write(inode->i_sb);
2483         return rc;
2484 }
2485
2486 ssize_t
2487 cifs_strict_writev(struct kiocb *iocb, const struct iovec *iov,
2488                    unsigned long nr_segs, loff_t pos)
2489 {
2490         struct inode *inode = iocb->ki_filp->f_path.dentry->d_inode;
2491         struct cifsInodeInfo *cinode = CIFS_I(inode);
2492         struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
2493         struct cifsFileInfo *cfile = (struct cifsFileInfo *)
2494                                                 iocb->ki_filp->private_data;
2495         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
2496
2497 #ifdef CONFIG_CIFS_SMB2
2498         /*
2499          * If we have an oplock for read and want to write a data to the file
2500          * we need to store it in the page cache and then push it to the server
2501          * to be sure the next read will get a valid data.
2502          */
2503         if (!cinode->clientCanCacheAll && cinode->clientCanCacheRead) {
2504                 ssize_t written;
2505                 int rc;
2506
2507                 written = generic_file_aio_write(iocb, iov, nr_segs, pos);
2508                 rc = filemap_fdatawrite(inode->i_mapping);
2509                 if (rc)
2510                         return (ssize_t)rc;
2511
2512                 return written;
2513         }
2514 #endif
2515
2516         /*
2517          * For non-oplocked files in strict cache mode we need to write the data
2518          * to the server exactly from the pos to pos+len-1 rather than flush all
2519          * affected pages because it may cause a error with mandatory locks on
2520          * these pages but not on the region from pos to ppos+len-1.
2521          */
2522
2523         if (!cinode->clientCanCacheAll)
2524                 return cifs_user_writev(iocb, iov, nr_segs, pos);
2525
2526         if (cap_unix(tcon->ses) &&
2527             (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
2528             ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
2529                 return generic_file_aio_write(iocb, iov, nr_segs, pos);
2530
2531         return cifs_writev(iocb, iov, nr_segs, pos);
2532 }
2533
2534 static struct cifs_readdata *
2535 cifs_readdata_alloc(unsigned int nr_pages, work_func_t complete)
2536 {
2537         struct cifs_readdata *rdata;
2538
2539         rdata = kzalloc(sizeof(*rdata) + (sizeof(struct page *) * nr_pages),
2540                         GFP_KERNEL);
2541         if (rdata != NULL) {
2542                 kref_init(&rdata->refcount);
2543                 INIT_LIST_HEAD(&rdata->list);
2544                 init_completion(&rdata->done);
2545                 INIT_WORK(&rdata->work, complete);
2546         }
2547
2548         return rdata;
2549 }
2550
2551 void
2552 cifs_readdata_release(struct kref *refcount)
2553 {
2554         struct cifs_readdata *rdata = container_of(refcount,
2555                                         struct cifs_readdata, refcount);
2556
2557         if (rdata->cfile)
2558                 cifsFileInfo_put(rdata->cfile);
2559
2560         kfree(rdata);
2561 }
2562
2563 static int
2564 cifs_read_allocate_pages(struct cifs_readdata *rdata, unsigned int nr_pages)
2565 {
2566         int rc = 0;
2567         struct page *page;
2568         unsigned int i;
2569
2570         for (i = 0; i < nr_pages; i++) {
2571                 page = alloc_page(GFP_KERNEL|__GFP_HIGHMEM);
2572                 if (!page) {
2573                         rc = -ENOMEM;
2574                         break;
2575                 }
2576                 rdata->pages[i] = page;
2577         }
2578
2579         if (rc) {
2580                 for (i = 0; i < nr_pages; i++) {
2581                         put_page(rdata->pages[i]);
2582                         rdata->pages[i] = NULL;
2583                 }
2584         }
2585         return rc;
2586 }
2587
2588 static void
2589 cifs_uncached_readdata_release(struct kref *refcount)
2590 {
2591         struct cifs_readdata *rdata = container_of(refcount,
2592                                         struct cifs_readdata, refcount);
2593         unsigned int i;
2594
2595         for (i = 0; i < rdata->nr_pages; i++) {
2596                 put_page(rdata->pages[i]);
2597                 rdata->pages[i] = NULL;
2598         }
2599         cifs_readdata_release(refcount);
2600 }
2601
2602 static int
2603 cifs_retry_async_readv(struct cifs_readdata *rdata)
2604 {
2605         int rc;
2606         struct TCP_Server_Info *server;
2607
2608         server = tlink_tcon(rdata->cfile->tlink)->ses->server;
2609
2610         do {
2611                 if (rdata->cfile->invalidHandle) {
2612                         rc = cifs_reopen_file(rdata->cfile, true);
2613                         if (rc != 0)
2614                                 continue;
2615                 }
2616                 rc = server->ops->async_readv(rdata);
2617         } while (rc == -EAGAIN);
2618
2619         return rc;
2620 }
2621
2622 /**
2623  * cifs_readdata_to_iov - copy data from pages in response to an iovec
2624  * @rdata:      the readdata response with list of pages holding data
2625  * @iov:        vector in which we should copy the data
2626  * @nr_segs:    number of segments in vector
2627  * @offset:     offset into file of the first iovec
2628  * @copied:     used to return the amount of data copied to the iov
2629  *
2630  * This function copies data from a list of pages in a readdata response into
2631  * an array of iovecs. It will first calculate where the data should go
2632  * based on the info in the readdata and then copy the data into that spot.
2633  */
2634 static ssize_t
2635 cifs_readdata_to_iov(struct cifs_readdata *rdata, const struct iovec *iov,
2636                         unsigned long nr_segs, loff_t offset, ssize_t *copied)
2637 {
2638         int rc = 0;
2639         struct iov_iter ii;
2640         size_t pos = rdata->offset - offset;
2641         ssize_t remaining = rdata->bytes;
2642         unsigned char *pdata;
2643         unsigned int i;
2644
2645         /* set up iov_iter and advance to the correct offset */
2646         iov_iter_init(&ii, iov, nr_segs, iov_length(iov, nr_segs), 0);
2647         iov_iter_advance(&ii, pos);
2648
2649         *copied = 0;
2650         for (i = 0; i < rdata->nr_pages; i++) {
2651                 ssize_t copy;
2652                 struct page *page = rdata->pages[i];
2653
2654                 /* copy a whole page or whatever's left */
2655                 copy = min_t(ssize_t, remaining, PAGE_SIZE);
2656
2657                 /* ...but limit it to whatever space is left in the iov */
2658                 copy = min_t(ssize_t, copy, iov_iter_count(&ii));
2659
2660                 /* go while there's data to be copied and no errors */
2661                 if (copy && !rc) {
2662                         pdata = kmap(page);
2663                         rc = memcpy_toiovecend(ii.iov, pdata, ii.iov_offset,
2664                                                 (int)copy);
2665                         kunmap(page);
2666                         if (!rc) {
2667                                 *copied += copy;
2668                                 remaining -= copy;
2669                                 iov_iter_advance(&ii, copy);
2670                         }
2671                 }
2672         }
2673
2674         return rc;
2675 }
2676
2677 static void
2678 cifs_uncached_readv_complete(struct work_struct *work)
2679 {
2680         struct cifs_readdata *rdata = container_of(work,
2681                                                 struct cifs_readdata, work);
2682
2683         complete(&rdata->done);
2684         kref_put(&rdata->refcount, cifs_uncached_readdata_release);
2685 }
2686
2687 static int
2688 cifs_uncached_read_into_pages(struct TCP_Server_Info *server,
2689                         struct cifs_readdata *rdata, unsigned int len)
2690 {
2691         int total_read = 0, result = 0;
2692         unsigned int i;
2693         unsigned int nr_pages = rdata->nr_pages;
2694         struct kvec iov;
2695
2696         rdata->tailsz = PAGE_SIZE;
2697         for (i = 0; i < nr_pages; i++) {
2698                 struct page *page = rdata->pages[i];
2699
2700                 if (len >= PAGE_SIZE) {
2701                         /* enough data to fill the page */
2702                         iov.iov_base = kmap(page);
2703                         iov.iov_len = PAGE_SIZE;
2704                         cFYI(1, "%u: iov_base=%p iov_len=%zu",
2705                                 i, iov.iov_base, iov.iov_len);
2706                         len -= PAGE_SIZE;
2707                 } else if (len > 0) {
2708                         /* enough for partial page, fill and zero the rest */
2709                         iov.iov_base = kmap(page);
2710                         iov.iov_len = len;
2711                         cFYI(1, "%u: iov_base=%p iov_len=%zu",
2712                                 i, iov.iov_base, iov.iov_len);
2713                         memset(iov.iov_base + len, '\0', PAGE_SIZE - len);
2714                         rdata->tailsz = len;
2715                         len = 0;
2716                 } else {
2717                         /* no need to hold page hostage */
2718                         rdata->pages[i] = NULL;
2719                         rdata->nr_pages--;
2720                         put_page(page);
2721                         continue;
2722                 }
2723
2724                 result = cifs_readv_from_socket(server, &iov, 1, iov.iov_len);
2725                 kunmap(page);
2726                 if (result < 0)
2727                         break;
2728
2729                 total_read += result;
2730         }
2731
2732         return total_read > 0 ? total_read : result;
2733 }
2734
2735 static ssize_t
2736 cifs_iovec_read(struct file *file, const struct iovec *iov,
2737                  unsigned long nr_segs, loff_t *poffset)
2738 {
2739         ssize_t rc;
2740         size_t len, cur_len;
2741         ssize_t total_read = 0;
2742         loff_t offset = *poffset;
2743         unsigned int npages;
2744         struct cifs_sb_info *cifs_sb;
2745         struct cifs_tcon *tcon;
2746         struct cifsFileInfo *open_file;
2747         struct cifs_readdata *rdata, *tmp;
2748         struct list_head rdata_list;
2749         pid_t pid;
2750
2751         if (!nr_segs)
2752                 return 0;
2753
2754         len = iov_length(iov, nr_segs);
2755         if (!len)
2756                 return 0;
2757
2758         INIT_LIST_HEAD(&rdata_list);
2759         cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
2760         open_file = file->private_data;
2761         tcon = tlink_tcon(open_file->tlink);
2762
2763         if (!tcon->ses->server->ops->async_readv)
2764                 return -ENOSYS;
2765
2766         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
2767                 pid = open_file->pid;
2768         else
2769                 pid = current->tgid;
2770
2771         if ((file->f_flags & O_ACCMODE) == O_WRONLY)
2772                 cFYI(1, "attempting read on write only file instance");
2773
2774         do {
2775                 cur_len = min_t(const size_t, len - total_read, cifs_sb->rsize);
2776                 npages = DIV_ROUND_UP(cur_len, PAGE_SIZE);
2777
2778                 /* allocate a readdata struct */
2779                 rdata = cifs_readdata_alloc(npages,
2780                                             cifs_uncached_readv_complete);
2781                 if (!rdata) {
2782                         rc = -ENOMEM;
2783                         goto error;
2784                 }
2785
2786                 rc = cifs_read_allocate_pages(rdata, npages);
2787                 if (rc)
2788                         goto error;
2789
2790                 rdata->cfile = cifsFileInfo_get(open_file);
2791                 rdata->nr_pages = npages;
2792                 rdata->offset = offset;
2793                 rdata->bytes = cur_len;
2794                 rdata->pid = pid;
2795                 rdata->pagesz = PAGE_SIZE;
2796                 rdata->read_into_pages = cifs_uncached_read_into_pages;
2797
2798                 rc = cifs_retry_async_readv(rdata);
2799 error:
2800                 if (rc) {
2801                         kref_put(&rdata->refcount,
2802                                  cifs_uncached_readdata_release);
2803                         break;
2804                 }
2805
2806                 list_add_tail(&rdata->list, &rdata_list);
2807                 offset += cur_len;
2808                 len -= cur_len;
2809         } while (len > 0);
2810
2811         /* if at least one read request send succeeded, then reset rc */
2812         if (!list_empty(&rdata_list))
2813                 rc = 0;
2814
2815         /* the loop below should proceed in the order of increasing offsets */
2816 restart_loop:
2817         list_for_each_entry_safe(rdata, tmp, &rdata_list, list) {
2818                 if (!rc) {
2819                         ssize_t copied;
2820
2821                         /* FIXME: freezable sleep too? */
2822                         rc = wait_for_completion_killable(&rdata->done);
2823                         if (rc)
2824                                 rc = -EINTR;
2825                         else if (rdata->result)
2826                                 rc = rdata->result;
2827                         else {
2828                                 rc = cifs_readdata_to_iov(rdata, iov,
2829                                                         nr_segs, *poffset,
2830                                                         &copied);
2831                                 total_read += copied;
2832                         }
2833
2834                         /* resend call if it's a retryable error */
2835                         if (rc == -EAGAIN) {
2836                                 rc = cifs_retry_async_readv(rdata);
2837                                 goto restart_loop;
2838                         }
2839                 }
2840                 list_del_init(&rdata->list);
2841                 kref_put(&rdata->refcount, cifs_uncached_readdata_release);
2842         }
2843
2844         cifs_stats_bytes_read(tcon, total_read);
2845         *poffset += total_read;
2846
2847         /* mask nodata case */
2848         if (rc == -ENODATA)
2849                 rc = 0;
2850
2851         return total_read ? total_read : rc;
2852 }
2853
2854 ssize_t cifs_user_readv(struct kiocb *iocb, const struct iovec *iov,
2855                                unsigned long nr_segs, loff_t pos)
2856 {
2857         ssize_t read;
2858
2859         read = cifs_iovec_read(iocb->ki_filp, iov, nr_segs, &pos);
2860         if (read > 0)
2861                 iocb->ki_pos = pos;
2862
2863         return read;
2864 }
2865
2866 ssize_t
2867 cifs_strict_readv(struct kiocb *iocb, const struct iovec *iov,
2868                   unsigned long nr_segs, loff_t pos)
2869 {
2870         struct inode *inode = iocb->ki_filp->f_path.dentry->d_inode;
2871         struct cifsInodeInfo *cinode = CIFS_I(inode);
2872         struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
2873         struct cifsFileInfo *cfile = (struct cifsFileInfo *)
2874                                                 iocb->ki_filp->private_data;
2875         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
2876         int rc = -EACCES;
2877
2878         /*
2879          * In strict cache mode we need to read from the server all the time
2880          * if we don't have level II oplock because the server can delay mtime
2881          * change - so we can't make a decision about inode invalidating.
2882          * And we can also fail with pagereading if there are mandatory locks
2883          * on pages affected by this read but not on the region from pos to
2884          * pos+len-1.
2885          */
2886         if (!cinode->clientCanCacheRead)
2887                 return cifs_user_readv(iocb, iov, nr_segs, pos);
2888
2889         if (cap_unix(tcon->ses) &&
2890             (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
2891             ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
2892                 return generic_file_aio_read(iocb, iov, nr_segs, pos);
2893
2894         /*
2895          * We need to hold the sem to be sure nobody modifies lock list
2896          * with a brlock that prevents reading.
2897          */
2898         down_read(&cinode->lock_sem);
2899         if (!cifs_find_lock_conflict(cfile, pos, iov_length(iov, nr_segs),
2900                                      tcon->ses->server->vals->shared_lock_type,
2901                                      NULL, true))
2902                 rc = generic_file_aio_read(iocb, iov, nr_segs, pos);
2903         up_read(&cinode->lock_sem);
2904         return rc;
2905 }
2906
2907 static ssize_t
2908 cifs_read(struct file *file, char *read_data, size_t read_size, loff_t *offset)
2909 {
2910         int rc = -EACCES;
2911         unsigned int bytes_read = 0;
2912         unsigned int total_read;
2913         unsigned int current_read_size;
2914         unsigned int rsize;
2915         struct cifs_sb_info *cifs_sb;
2916         struct cifs_tcon *tcon;
2917         struct TCP_Server_Info *server;
2918         unsigned int xid;
2919         char *cur_offset;
2920         struct cifsFileInfo *open_file;
2921         struct cifs_io_parms io_parms;
2922         int buf_type = CIFS_NO_BUFFER;
2923         __u32 pid;
2924
2925         xid = get_xid();
2926         cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
2927
2928         /* FIXME: set up handlers for larger reads and/or convert to async */
2929         rsize = min_t(unsigned int, cifs_sb->rsize, CIFSMaxBufSize);
2930
2931         if (file->private_data == NULL) {
2932                 rc = -EBADF;
2933                 free_xid(xid);
2934                 return rc;
2935         }
2936         open_file = file->private_data;
2937         tcon = tlink_tcon(open_file->tlink);
2938         server = tcon->ses->server;
2939
2940         if (!server->ops->sync_read) {
2941                 free_xid(xid);
2942                 return -ENOSYS;
2943         }
2944
2945         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
2946                 pid = open_file->pid;
2947         else
2948                 pid = current->tgid;
2949
2950         if ((file->f_flags & O_ACCMODE) == O_WRONLY)
2951                 cFYI(1, "attempting read on write only file instance");
2952
2953         for (total_read = 0, cur_offset = read_data; read_size > total_read;
2954              total_read += bytes_read, cur_offset += bytes_read) {
2955                 current_read_size = min_t(uint, read_size - total_read, rsize);
2956                 /*
2957                  * For windows me and 9x we do not want to request more than it
2958                  * negotiated since it will refuse the read then.
2959                  */
2960                 if ((tcon->ses) && !(tcon->ses->capabilities &
2961                                 tcon->ses->server->vals->cap_large_files)) {
2962                         current_read_size = min_t(uint, current_read_size,
2963                                         CIFSMaxBufSize);
2964                 }
2965                 rc = -EAGAIN;
2966                 while (rc == -EAGAIN) {
2967                         if (open_file->invalidHandle) {
2968                                 rc = cifs_reopen_file(open_file, true);
2969                                 if (rc != 0)
2970                                         break;
2971                         }
2972                         io_parms.pid = pid;
2973                         io_parms.tcon = tcon;
2974                         io_parms.offset = *offset;
2975                         io_parms.length = current_read_size;
2976                         rc = server->ops->sync_read(xid, open_file, &io_parms,
2977                                                     &bytes_read, &cur_offset,
2978                                                     &buf_type);
2979                 }
2980                 if (rc || (bytes_read == 0)) {
2981                         if (total_read) {
2982                                 break;
2983                         } else {
2984                                 free_xid(xid);
2985                                 return rc;
2986                         }
2987                 } else {
2988                         cifs_stats_bytes_read(tcon, total_read);
2989                         *offset += bytes_read;
2990                 }
2991         }
2992         free_xid(xid);
2993         return total_read;
2994 }
2995
2996 /*
2997  * If the page is mmap'ed into a process' page tables, then we need to make
2998  * sure that it doesn't change while being written back.
2999  */
3000 static int
3001 cifs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
3002 {
3003         struct page *page = vmf->page;
3004
3005         lock_page(page);
3006         return VM_FAULT_LOCKED;
3007 }
3008
3009 static struct vm_operations_struct cifs_file_vm_ops = {
3010         .fault = filemap_fault,
3011         .page_mkwrite = cifs_page_mkwrite,
3012         .remap_pages = generic_file_remap_pages,
3013 };
3014
3015 int cifs_file_strict_mmap(struct file *file, struct vm_area_struct *vma)
3016 {
3017         int rc, xid;
3018         struct inode *inode = file->f_path.dentry->d_inode;
3019
3020         xid = get_xid();
3021
3022         if (!CIFS_I(inode)->clientCanCacheRead) {
3023                 rc = cifs_invalidate_mapping(inode);
3024                 if (rc)
3025                         return rc;
3026         }
3027
3028         rc = generic_file_mmap(file, vma);
3029         if (rc == 0)
3030                 vma->vm_ops = &cifs_file_vm_ops;
3031         free_xid(xid);
3032         return rc;
3033 }
3034
3035 int cifs_file_mmap(struct file *file, struct vm_area_struct *vma)
3036 {
3037         int rc, xid;
3038
3039         xid = get_xid();
3040         rc = cifs_revalidate_file(file);
3041         if (rc) {
3042                 cFYI(1, "Validation prior to mmap failed, error=%d", rc);
3043                 free_xid(xid);
3044                 return rc;
3045         }
3046         rc = generic_file_mmap(file, vma);
3047         if (rc == 0)
3048                 vma->vm_ops = &cifs_file_vm_ops;
3049         free_xid(xid);
3050         return rc;
3051 }
3052
3053 static void
3054 cifs_readv_complete(struct work_struct *work)
3055 {
3056         unsigned int i;
3057         struct cifs_readdata *rdata = container_of(work,
3058                                                 struct cifs_readdata, work);
3059
3060         for (i = 0; i < rdata->nr_pages; i++) {
3061                 struct page *page = rdata->pages[i];
3062
3063                 lru_cache_add_file(page);
3064
3065                 if (rdata->result == 0) {
3066                         flush_dcache_page(page);
3067                         SetPageUptodate(page);
3068                 }
3069
3070                 unlock_page(page);
3071
3072                 if (rdata->result == 0)
3073                         cifs_readpage_to_fscache(rdata->mapping->host, page);
3074
3075                 page_cache_release(page);
3076                 rdata->pages[i] = NULL;
3077         }
3078         kref_put(&rdata->refcount, cifs_readdata_release);
3079 }
3080
3081 static int
3082 cifs_readpages_read_into_pages(struct TCP_Server_Info *server,
3083                         struct cifs_readdata *rdata, unsigned int len)
3084 {
3085         int total_read = 0, result = 0;
3086         unsigned int i;
3087         u64 eof;
3088         pgoff_t eof_index;
3089         unsigned int nr_pages = rdata->nr_pages;
3090         struct kvec iov;
3091
3092         /* determine the eof that the server (probably) has */
3093         eof = CIFS_I(rdata->mapping->host)->server_eof;
3094         eof_index = eof ? (eof - 1) >> PAGE_CACHE_SHIFT : 0;
3095         cFYI(1, "eof=%llu eof_index=%lu", eof, eof_index);
3096
3097         rdata->tailsz = PAGE_CACHE_SIZE;
3098         for (i = 0; i < nr_pages; i++) {
3099                 struct page *page = rdata->pages[i];
3100
3101                 if (len >= PAGE_CACHE_SIZE) {
3102                         /* enough data to fill the page */
3103                         iov.iov_base = kmap(page);
3104                         iov.iov_len = PAGE_CACHE_SIZE;
3105                         cFYI(1, "%u: idx=%lu iov_base=%p iov_len=%zu",
3106                                 i, page->index, iov.iov_base, iov.iov_len);
3107                         len -= PAGE_CACHE_SIZE;
3108                 } else if (len > 0) {
3109                         /* enough for partial page, fill and zero the rest */
3110                         iov.iov_base = kmap(page);
3111                         iov.iov_len = len;
3112                         cFYI(1, "%u: idx=%lu iov_base=%p iov_len=%zu",
3113                                 i, page->index, iov.iov_base, iov.iov_len);
3114                         memset(iov.iov_base + len,
3115                                 '\0', PAGE_CACHE_SIZE - len);
3116                         rdata->tailsz = len;
3117                         len = 0;
3118                 } else if (page->index > eof_index) {
3119                         /*
3120                          * The VFS will not try to do readahead past the
3121                          * i_size, but it's possible that we have outstanding
3122                          * writes with gaps in the middle and the i_size hasn't
3123                          * caught up yet. Populate those with zeroed out pages
3124                          * to prevent the VFS from repeatedly attempting to
3125                          * fill them until the writes are flushed.
3126                          */
3127                         zero_user(page, 0, PAGE_CACHE_SIZE);
3128                         lru_cache_add_file(page);
3129                         flush_dcache_page(page);
3130                         SetPageUptodate(page);
3131                         unlock_page(page);
3132                         page_cache_release(page);
3133                         rdata->pages[i] = NULL;
3134                         rdata->nr_pages--;
3135                         continue;
3136                 } else {
3137                         /* no need to hold page hostage */
3138                         lru_cache_add_file(page);
3139                         unlock_page(page);
3140                         page_cache_release(page);
3141                         rdata->pages[i] = NULL;
3142                         rdata->nr_pages--;
3143                         continue;
3144                 }
3145
3146                 result = cifs_readv_from_socket(server, &iov, 1, iov.iov_len);
3147                 kunmap(page);
3148                 if (result < 0)
3149                         break;
3150
3151                 total_read += result;
3152         }
3153
3154         return total_read > 0 ? total_read : result;
3155 }
3156
3157 static int cifs_readpages(struct file *file, struct address_space *mapping,
3158         struct list_head *page_list, unsigned num_pages)
3159 {
3160         int rc;
3161         struct list_head tmplist;
3162         struct cifsFileInfo *open_file = file->private_data;
3163         struct cifs_sb_info *cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
3164         unsigned int rsize = cifs_sb->rsize;
3165         pid_t pid;
3166
3167         /*
3168          * Give up immediately if rsize is too small to read an entire page.
3169          * The VFS will fall back to readpage. We should never reach this
3170          * point however since we set ra_pages to 0 when the rsize is smaller
3171          * than a cache page.
3172          */
3173         if (unlikely(rsize < PAGE_CACHE_SIZE))
3174                 return 0;
3175
3176         /*
3177          * Reads as many pages as possible from fscache. Returns -ENOBUFS
3178          * immediately if the cookie is negative
3179          */
3180         rc = cifs_readpages_from_fscache(mapping->host, mapping, page_list,
3181                                          &num_pages);
3182         if (rc == 0)
3183                 return rc;
3184
3185         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
3186                 pid = open_file->pid;
3187         else
3188                 pid = current->tgid;
3189
3190         rc = 0;
3191         INIT_LIST_HEAD(&tmplist);
3192
3193         cFYI(1, "%s: file=%p mapping=%p num_pages=%u", __func__, file,
3194                 mapping, num_pages);
3195
3196         /*
3197          * Start with the page at end of list and move it to private
3198          * list. Do the same with any following pages until we hit
3199          * the rsize limit, hit an index discontinuity, or run out of
3200          * pages. Issue the async read and then start the loop again
3201          * until the list is empty.
3202          *
3203          * Note that list order is important. The page_list is in
3204          * the order of declining indexes. When we put the pages in
3205          * the rdata->pages, then we want them in increasing order.
3206          */
3207         while (!list_empty(page_list)) {
3208                 unsigned int i;
3209                 unsigned int bytes = PAGE_CACHE_SIZE;
3210                 unsigned int expected_index;
3211                 unsigned int nr_pages = 1;
3212                 loff_t offset;
3213                 struct page *page, *tpage;
3214                 struct cifs_readdata *rdata;
3215
3216                 page = list_entry(page_list->prev, struct page, lru);
3217
3218                 /*
3219                  * Lock the page and put it in the cache. Since no one else
3220                  * should have access to this page, we're safe to simply set
3221                  * PG_locked without checking it first.
3222                  */
3223                 __set_page_locked(page);
3224                 rc = add_to_page_cache_locked(page, mapping,
3225                                               page->index, GFP_KERNEL);
3226
3227                 /* give up if we can't stick it in the cache */
3228                 if (rc) {
3229                         __clear_page_locked(page);
3230                         break;
3231                 }
3232
3233                 /* move first page to the tmplist */
3234                 offset = (loff_t)page->index << PAGE_CACHE_SHIFT;
3235                 list_move_tail(&page->lru, &tmplist);
3236
3237                 /* now try and add more pages onto the request */
3238                 expected_index = page->index + 1;
3239                 list_for_each_entry_safe_reverse(page, tpage, page_list, lru) {
3240                         /* discontinuity ? */
3241                         if (page->index != expected_index)
3242                                 break;
3243
3244                         /* would this page push the read over the rsize? */
3245                         if (bytes + PAGE_CACHE_SIZE > rsize)
3246                                 break;
3247
3248                         __set_page_locked(page);
3249                         if (add_to_page_cache_locked(page, mapping,
3250                                                 page->index, GFP_KERNEL)) {
3251                                 __clear_page_locked(page);
3252                                 break;
3253                         }
3254                         list_move_tail(&page->lru, &tmplist);
3255                         bytes += PAGE_CACHE_SIZE;
3256                         expected_index++;
3257                         nr_pages++;
3258                 }
3259
3260                 rdata = cifs_readdata_alloc(nr_pages, cifs_readv_complete);
3261                 if (!rdata) {
3262                         /* best to give up if we're out of mem */
3263                         list_for_each_entry_safe(page, tpage, &tmplist, lru) {
3264                                 list_del(&page->lru);
3265                                 lru_cache_add_file(page);
3266                                 unlock_page(page);
3267                                 page_cache_release(page);
3268                         }
3269                         rc = -ENOMEM;
3270                         break;
3271                 }
3272
3273                 rdata->cfile = cifsFileInfo_get(open_file);
3274                 rdata->mapping = mapping;
3275                 rdata->offset = offset;
3276                 rdata->bytes = bytes;
3277                 rdata->pid = pid;
3278                 rdata->pagesz = PAGE_CACHE_SIZE;
3279                 rdata->read_into_pages = cifs_readpages_read_into_pages;
3280
3281                 list_for_each_entry_safe(page, tpage, &tmplist, lru) {
3282                         list_del(&page->lru);
3283                         rdata->pages[rdata->nr_pages++] = page;
3284                 }
3285
3286                 rc = cifs_retry_async_readv(rdata);
3287                 if (rc != 0) {
3288                         for (i = 0; i < rdata->nr_pages; i++) {
3289                                 page = rdata->pages[i];
3290                                 lru_cache_add_file(page);
3291                                 unlock_page(page);
3292                                 page_cache_release(page);
3293                         }
3294                         kref_put(&rdata->refcount, cifs_readdata_release);
3295                         break;
3296                 }
3297
3298                 kref_put(&rdata->refcount, cifs_readdata_release);
3299         }
3300
3301         return rc;
3302 }
3303
3304 static int cifs_readpage_worker(struct file *file, struct page *page,
3305         loff_t *poffset)
3306 {
3307         char *read_data;
3308         int rc;
3309
3310         /* Is the page cached? */
3311         rc = cifs_readpage_from_fscache(file->f_path.dentry->d_inode, page);
3312         if (rc == 0)
3313                 goto read_complete;
3314
3315         page_cache_get(page);
3316         read_data = kmap(page);
3317         /* for reads over a certain size could initiate async read ahead */
3318
3319         rc = cifs_read(file, read_data, PAGE_CACHE_SIZE, poffset);
3320
3321         if (rc < 0)
3322                 goto io_error;
3323         else
3324                 cFYI(1, "Bytes read %d", rc);
3325
3326         file->f_path.dentry->d_inode->i_atime =
3327                 current_fs_time(file->f_path.dentry->d_inode->i_sb);
3328
3329         if (PAGE_CACHE_SIZE > rc)
3330                 memset(read_data + rc, 0, PAGE_CACHE_SIZE - rc);
3331
3332         flush_dcache_page(page);
3333         SetPageUptodate(page);
3334
3335         /* send this page to the cache */
3336         cifs_readpage_to_fscache(file->f_path.dentry->d_inode, page);
3337
3338         rc = 0;
3339
3340 io_error:
3341         kunmap(page);
3342         page_cache_release(page);
3343
3344 read_complete:
3345         return rc;
3346 }
3347
3348 static int cifs_readpage(struct file *file, struct page *page)
3349 {
3350         loff_t offset = (loff_t)page->index << PAGE_CACHE_SHIFT;
3351         int rc = -EACCES;
3352         unsigned int xid;
3353
3354         xid = get_xid();
3355
3356         if (file->private_data == NULL) {
3357                 rc = -EBADF;
3358                 free_xid(xid);
3359                 return rc;
3360         }
3361
3362         cFYI(1, "readpage %p at offset %d 0x%x",
3363                  page, (int)offset, (int)offset);
3364
3365         rc = cifs_readpage_worker(file, page, &offset);
3366
3367         unlock_page(page);
3368
3369         free_xid(xid);
3370         return rc;
3371 }
3372
3373 static int is_inode_writable(struct cifsInodeInfo *cifs_inode)
3374 {
3375         struct cifsFileInfo *open_file;
3376
3377         spin_lock(&cifs_file_list_lock);
3378         list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
3379                 if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) {
3380                         spin_unlock(&cifs_file_list_lock);
3381                         return 1;
3382                 }
3383         }
3384         spin_unlock(&cifs_file_list_lock);
3385         return 0;
3386 }
3387
3388 /* We do not want to update the file size from server for inodes
3389    open for write - to avoid races with writepage extending
3390    the file - in the future we could consider allowing
3391    refreshing the inode only on increases in the file size
3392    but this is tricky to do without racing with writebehind
3393    page caching in the current Linux kernel design */
3394 bool is_size_safe_to_change(struct cifsInodeInfo *cifsInode, __u64 end_of_file)
3395 {
3396         if (!cifsInode)
3397                 return true;
3398
3399         if (is_inode_writable(cifsInode)) {
3400                 /* This inode is open for write at least once */
3401                 struct cifs_sb_info *cifs_sb;
3402
3403                 cifs_sb = CIFS_SB(cifsInode->vfs_inode.i_sb);
3404                 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DIRECT_IO) {
3405                         /* since no page cache to corrupt on directio
3406                         we can change size safely */
3407                         return true;
3408                 }
3409
3410                 if (i_size_read(&cifsInode->vfs_inode) < end_of_file)
3411                         return true;
3412
3413                 return false;
3414         } else
3415                 return true;
3416 }
3417
3418 static int cifs_write_begin(struct file *file, struct address_space *mapping,
3419                         loff_t pos, unsigned len, unsigned flags,
3420                         struct page **pagep, void **fsdata)
3421 {
3422         pgoff_t index = pos >> PAGE_CACHE_SHIFT;
3423         loff_t offset = pos & (PAGE_CACHE_SIZE - 1);
3424         loff_t page_start = pos & PAGE_MASK;
3425         loff_t i_size;
3426         struct page *page;
3427         int rc = 0;
3428
3429         cFYI(1, "write_begin from %lld len %d", (long long)pos, len);
3430
3431         page = grab_cache_page_write_begin(mapping, index, flags);
3432         if (!page) {
3433                 rc = -ENOMEM;
3434                 goto out;
3435         }
3436
3437         if (PageUptodate(page))
3438                 goto out;
3439
3440         /*
3441          * If we write a full page it will be up to date, no need to read from
3442          * the server. If the write is short, we'll end up doing a sync write
3443          * instead.
3444          */
3445         if (len == PAGE_CACHE_SIZE)
3446                 goto out;
3447
3448         /*
3449          * optimize away the read when we have an oplock, and we're not
3450          * expecting to use any of the data we'd be reading in. That
3451          * is, when the page lies beyond the EOF, or straddles the EOF
3452          * and the write will cover all of the existing data.
3453          */
3454         if (CIFS_I(mapping->host)->clientCanCacheRead) {
3455                 i_size = i_size_read(mapping->host);
3456                 if (page_start >= i_size ||
3457                     (offset == 0 && (pos + len) >= i_size)) {
3458                         zero_user_segments(page, 0, offset,
3459                                            offset + len,
3460                                            PAGE_CACHE_SIZE);
3461                         /*
3462                          * PageChecked means that the parts of the page
3463                          * to which we're not writing are considered up
3464                          * to date. Once the data is copied to the
3465                          * page, it can be set uptodate.
3466                          */
3467                         SetPageChecked(page);
3468                         goto out;
3469                 }
3470         }
3471
3472         if ((file->f_flags & O_ACCMODE) != O_WRONLY) {
3473                 /*
3474                  * might as well read a page, it is fast enough. If we get
3475                  * an error, we don't need to return it. cifs_write_end will
3476                  * do a sync write instead since PG_uptodate isn't set.
3477                  */
3478                 cifs_readpage_worker(file, page, &page_start);
3479         } else {
3480                 /* we could try using another file handle if there is one -
3481                    but how would we lock it to prevent close of that handle
3482                    racing with this read? In any case
3483                    this will be written out by write_end so is fine */
3484         }
3485 out:
3486         *pagep = page;
3487         return rc;
3488 }
3489
3490 static int cifs_release_page(struct page *page, gfp_t gfp)
3491 {
3492         if (PagePrivate(page))
3493                 return 0;
3494
3495         return cifs_fscache_release_page(page, gfp);
3496 }
3497
3498 static void cifs_invalidate_page(struct page *page, unsigned long offset)
3499 {
3500         struct cifsInodeInfo *cifsi = CIFS_I(page->mapping->host);
3501
3502         if (offset == 0)
3503                 cifs_fscache_invalidate_page(page, &cifsi->vfs_inode);
3504 }
3505
3506 static int cifs_launder_page(struct page *page)
3507 {
3508         int rc = 0;
3509         loff_t range_start = page_offset(page);
3510         loff_t range_end = range_start + (loff_t)(PAGE_CACHE_SIZE - 1);
3511         struct writeback_control wbc = {
3512                 .sync_mode = WB_SYNC_ALL,
3513                 .nr_to_write = 0,
3514                 .range_start = range_start,
3515                 .range_end = range_end,
3516         };
3517
3518         cFYI(1, "Launder page: %p", page);
3519
3520         if (clear_page_dirty_for_io(page))
3521                 rc = cifs_writepage_locked(page, &wbc);
3522
3523         cifs_fscache_invalidate_page(page, page->mapping->host);
3524         return rc;
3525 }
3526
3527 void cifs_oplock_break(struct work_struct *work)
3528 {
3529         struct cifsFileInfo *cfile = container_of(work, struct cifsFileInfo,
3530                                                   oplock_break);
3531         struct inode *inode = cfile->dentry->d_inode;
3532         struct cifsInodeInfo *cinode = CIFS_I(inode);
3533         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
3534         int rc = 0;
3535
3536         if (inode && S_ISREG(inode->i_mode)) {
3537                 if (cinode->clientCanCacheRead)
3538                         break_lease(inode, O_RDONLY);
3539                 else
3540                         break_lease(inode, O_WRONLY);
3541                 rc = filemap_fdatawrite(inode->i_mapping);
3542                 if (cinode->clientCanCacheRead == 0) {
3543                         rc = filemap_fdatawait(inode->i_mapping);
3544                         mapping_set_error(inode->i_mapping, rc);
3545                         invalidate_remote_inode(inode);
3546                 }
3547                 cFYI(1, "Oplock flush inode %p rc %d", inode, rc);
3548         }
3549
3550         rc = cifs_push_locks(cfile);
3551         if (rc)
3552                 cERROR(1, "Push locks rc = %d", rc);
3553
3554         /*
3555          * releasing stale oplock after recent reconnect of smb session using
3556          * a now incorrect file handle is not a data integrity issue but do
3557          * not bother sending an oplock release if session to server still is
3558          * disconnected since oplock already released by the server
3559          */
3560         if (!cfile->oplock_break_cancelled) {
3561                 rc = tcon->ses->server->ops->oplock_response(tcon, &cfile->fid,
3562                                                              cinode);
3563                 cFYI(1, "Oplock release rc = %d", rc);
3564         }
3565 }
3566
3567 const struct address_space_operations cifs_addr_ops = {
3568         .readpage = cifs_readpage,
3569         .readpages = cifs_readpages,
3570         .writepage = cifs_writepage,
3571         .writepages = cifs_writepages,
3572         .write_begin = cifs_write_begin,
3573         .write_end = cifs_write_end,
3574         .set_page_dirty = __set_page_dirty_nobuffers,
3575         .releasepage = cifs_release_page,
3576         .invalidatepage = cifs_invalidate_page,
3577         .launder_page = cifs_launder_page,
3578 };
3579
3580 /*
3581  * cifs_readpages requires the server to support a buffer large enough to
3582  * contain the header plus one complete page of data.  Otherwise, we need
3583  * to leave cifs_readpages out of the address space operations.
3584  */
3585 const struct address_space_operations cifs_addr_ops_smallbuf = {
3586         .readpage = cifs_readpage,
3587         .writepage = cifs_writepage,
3588         .writepages = cifs_writepages,
3589         .write_begin = cifs_write_begin,
3590         .write_end = cifs_write_end,
3591         .set_page_dirty = __set_page_dirty_nobuffers,
3592         .releasepage = cifs_release_page,
3593         .invalidatepage = cifs_invalidate_page,
3594         .launder_page = cifs_launder_page,
3595 };