Merge branch 'kvm-arm/vgic-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git...
[platform/adaptation/renesas_rcar/renesas_kernel.git] / fs / fuse / dev.c
1 /*
2   FUSE: Filesystem in Userspace
3   Copyright (C) 2001-2008  Miklos Szeredi <miklos@szeredi.hu>
4
5   This program can be distributed under the terms of the GNU GPL.
6   See the file COPYING.
7 */
8
9 #include "fuse_i.h"
10
11 #include <linux/init.h>
12 #include <linux/module.h>
13 #include <linux/poll.h>
14 #include <linux/uio.h>
15 #include <linux/miscdevice.h>
16 #include <linux/pagemap.h>
17 #include <linux/file.h>
18 #include <linux/slab.h>
19 #include <linux/pipe_fs_i.h>
20 #include <linux/swap.h>
21 #include <linux/splice.h>
22
23 MODULE_ALIAS_MISCDEV(FUSE_MINOR);
24 MODULE_ALIAS("devname:fuse");
25
26 static struct kmem_cache *fuse_req_cachep;
27
28 static struct fuse_conn *fuse_get_conn(struct file *file)
29 {
30         /*
31          * Lockless access is OK, because file->private data is set
32          * once during mount and is valid until the file is released.
33          */
34         return file->private_data;
35 }
36
37 static void fuse_request_init(struct fuse_req *req, struct page **pages,
38                               struct fuse_page_desc *page_descs,
39                               unsigned npages)
40 {
41         memset(req, 0, sizeof(*req));
42         memset(pages, 0, sizeof(*pages) * npages);
43         memset(page_descs, 0, sizeof(*page_descs) * npages);
44         INIT_LIST_HEAD(&req->list);
45         INIT_LIST_HEAD(&req->intr_entry);
46         init_waitqueue_head(&req->waitq);
47         atomic_set(&req->count, 1);
48         req->pages = pages;
49         req->page_descs = page_descs;
50         req->max_pages = npages;
51 }
52
53 static struct fuse_req *__fuse_request_alloc(unsigned npages, gfp_t flags)
54 {
55         struct fuse_req *req = kmem_cache_alloc(fuse_req_cachep, flags);
56         if (req) {
57                 struct page **pages;
58                 struct fuse_page_desc *page_descs;
59
60                 if (npages <= FUSE_REQ_INLINE_PAGES) {
61                         pages = req->inline_pages;
62                         page_descs = req->inline_page_descs;
63                 } else {
64                         pages = kmalloc(sizeof(struct page *) * npages, flags);
65                         page_descs = kmalloc(sizeof(struct fuse_page_desc) *
66                                              npages, flags);
67                 }
68
69                 if (!pages || !page_descs) {
70                         kfree(pages);
71                         kfree(page_descs);
72                         kmem_cache_free(fuse_req_cachep, req);
73                         return NULL;
74                 }
75
76                 fuse_request_init(req, pages, page_descs, npages);
77         }
78         return req;
79 }
80
81 struct fuse_req *fuse_request_alloc(unsigned npages)
82 {
83         return __fuse_request_alloc(npages, GFP_KERNEL);
84 }
85 EXPORT_SYMBOL_GPL(fuse_request_alloc);
86
87 struct fuse_req *fuse_request_alloc_nofs(unsigned npages)
88 {
89         return __fuse_request_alloc(npages, GFP_NOFS);
90 }
91
92 void fuse_request_free(struct fuse_req *req)
93 {
94         if (req->pages != req->inline_pages) {
95                 kfree(req->pages);
96                 kfree(req->page_descs);
97         }
98         kmem_cache_free(fuse_req_cachep, req);
99 }
100
101 static void block_sigs(sigset_t *oldset)
102 {
103         sigset_t mask;
104
105         siginitsetinv(&mask, sigmask(SIGKILL));
106         sigprocmask(SIG_BLOCK, &mask, oldset);
107 }
108
109 static void restore_sigs(sigset_t *oldset)
110 {
111         sigprocmask(SIG_SETMASK, oldset, NULL);
112 }
113
114 static void __fuse_get_request(struct fuse_req *req)
115 {
116         atomic_inc(&req->count);
117 }
118
119 /* Must be called with > 1 refcount */
120 static void __fuse_put_request(struct fuse_req *req)
121 {
122         BUG_ON(atomic_read(&req->count) < 2);
123         atomic_dec(&req->count);
124 }
125
126 static void fuse_req_init_context(struct fuse_req *req)
127 {
128         req->in.h.uid = from_kuid_munged(&init_user_ns, current_fsuid());
129         req->in.h.gid = from_kgid_munged(&init_user_ns, current_fsgid());
130         req->in.h.pid = current->pid;
131 }
132
133 struct fuse_req *fuse_get_req(struct fuse_conn *fc, unsigned npages)
134 {
135         struct fuse_req *req;
136         sigset_t oldset;
137         int intr;
138         int err;
139
140         atomic_inc(&fc->num_waiting);
141         block_sigs(&oldset);
142         intr = wait_event_interruptible(fc->blocked_waitq, !fc->blocked);
143         restore_sigs(&oldset);
144         err = -EINTR;
145         if (intr)
146                 goto out;
147
148         err = -ENOTCONN;
149         if (!fc->connected)
150                 goto out;
151
152         req = fuse_request_alloc(npages);
153         err = -ENOMEM;
154         if (!req)
155                 goto out;
156
157         fuse_req_init_context(req);
158         req->waiting = 1;
159         return req;
160
161  out:
162         atomic_dec(&fc->num_waiting);
163         return ERR_PTR(err);
164 }
165 EXPORT_SYMBOL_GPL(fuse_get_req);
166
167 /*
168  * Return request in fuse_file->reserved_req.  However that may
169  * currently be in use.  If that is the case, wait for it to become
170  * available.
171  */
172 static struct fuse_req *get_reserved_req(struct fuse_conn *fc,
173                                          struct file *file)
174 {
175         struct fuse_req *req = NULL;
176         struct fuse_file *ff = file->private_data;
177
178         do {
179                 wait_event(fc->reserved_req_waitq, ff->reserved_req);
180                 spin_lock(&fc->lock);
181                 if (ff->reserved_req) {
182                         req = ff->reserved_req;
183                         ff->reserved_req = NULL;
184                         req->stolen_file = get_file(file);
185                 }
186                 spin_unlock(&fc->lock);
187         } while (!req);
188
189         return req;
190 }
191
192 /*
193  * Put stolen request back into fuse_file->reserved_req
194  */
195 static void put_reserved_req(struct fuse_conn *fc, struct fuse_req *req)
196 {
197         struct file *file = req->stolen_file;
198         struct fuse_file *ff = file->private_data;
199
200         spin_lock(&fc->lock);
201         fuse_request_init(req, req->pages, req->page_descs, req->max_pages);
202         BUG_ON(ff->reserved_req);
203         ff->reserved_req = req;
204         wake_up_all(&fc->reserved_req_waitq);
205         spin_unlock(&fc->lock);
206         fput(file);
207 }
208
209 /*
210  * Gets a requests for a file operation, always succeeds
211  *
212  * This is used for sending the FLUSH request, which must get to
213  * userspace, due to POSIX locks which may need to be unlocked.
214  *
215  * If allocation fails due to OOM, use the reserved request in
216  * fuse_file.
217  *
218  * This is very unlikely to deadlock accidentally, since the
219  * filesystem should not have it's own file open.  If deadlock is
220  * intentional, it can still be broken by "aborting" the filesystem.
221  */
222 struct fuse_req *fuse_get_req_nofail_nopages(struct fuse_conn *fc,
223                                              struct file *file)
224 {
225         struct fuse_req *req;
226
227         atomic_inc(&fc->num_waiting);
228         wait_event(fc->blocked_waitq, !fc->blocked);
229         req = fuse_request_alloc(0);
230         if (!req)
231                 req = get_reserved_req(fc, file);
232
233         fuse_req_init_context(req);
234         req->waiting = 1;
235         return req;
236 }
237
238 void fuse_put_request(struct fuse_conn *fc, struct fuse_req *req)
239 {
240         if (atomic_dec_and_test(&req->count)) {
241                 if (req->waiting)
242                         atomic_dec(&fc->num_waiting);
243
244                 if (req->stolen_file)
245                         put_reserved_req(fc, req);
246                 else
247                         fuse_request_free(req);
248         }
249 }
250 EXPORT_SYMBOL_GPL(fuse_put_request);
251
252 static unsigned len_args(unsigned numargs, struct fuse_arg *args)
253 {
254         unsigned nbytes = 0;
255         unsigned i;
256
257         for (i = 0; i < numargs; i++)
258                 nbytes += args[i].size;
259
260         return nbytes;
261 }
262
263 static u64 fuse_get_unique(struct fuse_conn *fc)
264 {
265         fc->reqctr++;
266         /* zero is special */
267         if (fc->reqctr == 0)
268                 fc->reqctr = 1;
269
270         return fc->reqctr;
271 }
272
273 static void queue_request(struct fuse_conn *fc, struct fuse_req *req)
274 {
275         req->in.h.len = sizeof(struct fuse_in_header) +
276                 len_args(req->in.numargs, (struct fuse_arg *) req->in.args);
277         list_add_tail(&req->list, &fc->pending);
278         req->state = FUSE_REQ_PENDING;
279         if (!req->waiting) {
280                 req->waiting = 1;
281                 atomic_inc(&fc->num_waiting);
282         }
283         wake_up(&fc->waitq);
284         kill_fasync(&fc->fasync, SIGIO, POLL_IN);
285 }
286
287 void fuse_queue_forget(struct fuse_conn *fc, struct fuse_forget_link *forget,
288                        u64 nodeid, u64 nlookup)
289 {
290         forget->forget_one.nodeid = nodeid;
291         forget->forget_one.nlookup = nlookup;
292
293         spin_lock(&fc->lock);
294         if (fc->connected) {
295                 fc->forget_list_tail->next = forget;
296                 fc->forget_list_tail = forget;
297                 wake_up(&fc->waitq);
298                 kill_fasync(&fc->fasync, SIGIO, POLL_IN);
299         } else {
300                 kfree(forget);
301         }
302         spin_unlock(&fc->lock);
303 }
304
305 static void flush_bg_queue(struct fuse_conn *fc)
306 {
307         while (fc->active_background < fc->max_background &&
308                !list_empty(&fc->bg_queue)) {
309                 struct fuse_req *req;
310
311                 req = list_entry(fc->bg_queue.next, struct fuse_req, list);
312                 list_del(&req->list);
313                 fc->active_background++;
314                 req->in.h.unique = fuse_get_unique(fc);
315                 queue_request(fc, req);
316         }
317 }
318
319 /*
320  * This function is called when a request is finished.  Either a reply
321  * has arrived or it was aborted (and not yet sent) or some error
322  * occurred during communication with userspace, or the device file
323  * was closed.  The requester thread is woken up (if still waiting),
324  * the 'end' callback is called if given, else the reference to the
325  * request is released
326  *
327  * Called with fc->lock, unlocks it
328  */
329 static void request_end(struct fuse_conn *fc, struct fuse_req *req)
330 __releases(fc->lock)
331 {
332         void (*end) (struct fuse_conn *, struct fuse_req *) = req->end;
333         req->end = NULL;
334         list_del(&req->list);
335         list_del(&req->intr_entry);
336         req->state = FUSE_REQ_FINISHED;
337         if (req->background) {
338                 if (fc->num_background == fc->max_background) {
339                         fc->blocked = 0;
340                         wake_up_all(&fc->blocked_waitq);
341                 }
342                 if (fc->num_background == fc->congestion_threshold &&
343                     fc->connected && fc->bdi_initialized) {
344                         clear_bdi_congested(&fc->bdi, BLK_RW_SYNC);
345                         clear_bdi_congested(&fc->bdi, BLK_RW_ASYNC);
346                 }
347                 fc->num_background--;
348                 fc->active_background--;
349                 flush_bg_queue(fc);
350         }
351         spin_unlock(&fc->lock);
352         wake_up(&req->waitq);
353         if (end)
354                 end(fc, req);
355         fuse_put_request(fc, req);
356 }
357
358 static void wait_answer_interruptible(struct fuse_conn *fc,
359                                       struct fuse_req *req)
360 __releases(fc->lock)
361 __acquires(fc->lock)
362 {
363         if (signal_pending(current))
364                 return;
365
366         spin_unlock(&fc->lock);
367         wait_event_interruptible(req->waitq, req->state == FUSE_REQ_FINISHED);
368         spin_lock(&fc->lock);
369 }
370
371 static void queue_interrupt(struct fuse_conn *fc, struct fuse_req *req)
372 {
373         list_add_tail(&req->intr_entry, &fc->interrupts);
374         wake_up(&fc->waitq);
375         kill_fasync(&fc->fasync, SIGIO, POLL_IN);
376 }
377
378 static void request_wait_answer(struct fuse_conn *fc, struct fuse_req *req)
379 __releases(fc->lock)
380 __acquires(fc->lock)
381 {
382         if (!fc->no_interrupt) {
383                 /* Any signal may interrupt this */
384                 wait_answer_interruptible(fc, req);
385
386                 if (req->aborted)
387                         goto aborted;
388                 if (req->state == FUSE_REQ_FINISHED)
389                         return;
390
391                 req->interrupted = 1;
392                 if (req->state == FUSE_REQ_SENT)
393                         queue_interrupt(fc, req);
394         }
395
396         if (!req->force) {
397                 sigset_t oldset;
398
399                 /* Only fatal signals may interrupt this */
400                 block_sigs(&oldset);
401                 wait_answer_interruptible(fc, req);
402                 restore_sigs(&oldset);
403
404                 if (req->aborted)
405                         goto aborted;
406                 if (req->state == FUSE_REQ_FINISHED)
407                         return;
408
409                 /* Request is not yet in userspace, bail out */
410                 if (req->state == FUSE_REQ_PENDING) {
411                         list_del(&req->list);
412                         __fuse_put_request(req);
413                         req->out.h.error = -EINTR;
414                         return;
415                 }
416         }
417
418         /*
419          * Either request is already in userspace, or it was forced.
420          * Wait it out.
421          */
422         spin_unlock(&fc->lock);
423         wait_event(req->waitq, req->state == FUSE_REQ_FINISHED);
424         spin_lock(&fc->lock);
425
426         if (!req->aborted)
427                 return;
428
429  aborted:
430         BUG_ON(req->state != FUSE_REQ_FINISHED);
431         if (req->locked) {
432                 /* This is uninterruptible sleep, because data is
433                    being copied to/from the buffers of req.  During
434                    locked state, there mustn't be any filesystem
435                    operation (e.g. page fault), since that could lead
436                    to deadlock */
437                 spin_unlock(&fc->lock);
438                 wait_event(req->waitq, !req->locked);
439                 spin_lock(&fc->lock);
440         }
441 }
442
443 static void __fuse_request_send(struct fuse_conn *fc, struct fuse_req *req)
444 {
445         spin_lock(&fc->lock);
446         if (!fc->connected)
447                 req->out.h.error = -ENOTCONN;
448         else if (fc->conn_error)
449                 req->out.h.error = -ECONNREFUSED;
450         else {
451                 req->in.h.unique = fuse_get_unique(fc);
452                 queue_request(fc, req);
453                 /* acquire extra reference, since request is still needed
454                    after request_end() */
455                 __fuse_get_request(req);
456
457                 request_wait_answer(fc, req);
458         }
459         spin_unlock(&fc->lock);
460 }
461
462 void fuse_request_send(struct fuse_conn *fc, struct fuse_req *req)
463 {
464         req->isreply = 1;
465         __fuse_request_send(fc, req);
466 }
467 EXPORT_SYMBOL_GPL(fuse_request_send);
468
469 static void fuse_request_send_nowait_locked(struct fuse_conn *fc,
470                                             struct fuse_req *req)
471 {
472         req->background = 1;
473         fc->num_background++;
474         if (fc->num_background == fc->max_background)
475                 fc->blocked = 1;
476         if (fc->num_background == fc->congestion_threshold &&
477             fc->bdi_initialized) {
478                 set_bdi_congested(&fc->bdi, BLK_RW_SYNC);
479                 set_bdi_congested(&fc->bdi, BLK_RW_ASYNC);
480         }
481         list_add_tail(&req->list, &fc->bg_queue);
482         flush_bg_queue(fc);
483 }
484
485 static void fuse_request_send_nowait(struct fuse_conn *fc, struct fuse_req *req)
486 {
487         spin_lock(&fc->lock);
488         if (fc->connected) {
489                 fuse_request_send_nowait_locked(fc, req);
490                 spin_unlock(&fc->lock);
491         } else {
492                 req->out.h.error = -ENOTCONN;
493                 request_end(fc, req);
494         }
495 }
496
497 void fuse_request_send_background(struct fuse_conn *fc, struct fuse_req *req)
498 {
499         req->isreply = 1;
500         fuse_request_send_nowait(fc, req);
501 }
502 EXPORT_SYMBOL_GPL(fuse_request_send_background);
503
504 static int fuse_request_send_notify_reply(struct fuse_conn *fc,
505                                           struct fuse_req *req, u64 unique)
506 {
507         int err = -ENODEV;
508
509         req->isreply = 0;
510         req->in.h.unique = unique;
511         spin_lock(&fc->lock);
512         if (fc->connected) {
513                 queue_request(fc, req);
514                 err = 0;
515         }
516         spin_unlock(&fc->lock);
517
518         return err;
519 }
520
521 /*
522  * Called under fc->lock
523  *
524  * fc->connected must have been checked previously
525  */
526 void fuse_request_send_background_locked(struct fuse_conn *fc,
527                                          struct fuse_req *req)
528 {
529         req->isreply = 1;
530         fuse_request_send_nowait_locked(fc, req);
531 }
532
533 void fuse_force_forget(struct file *file, u64 nodeid)
534 {
535         struct inode *inode = file_inode(file);
536         struct fuse_conn *fc = get_fuse_conn(inode);
537         struct fuse_req *req;
538         struct fuse_forget_in inarg;
539
540         memset(&inarg, 0, sizeof(inarg));
541         inarg.nlookup = 1;
542         req = fuse_get_req_nofail_nopages(fc, file);
543         req->in.h.opcode = FUSE_FORGET;
544         req->in.h.nodeid = nodeid;
545         req->in.numargs = 1;
546         req->in.args[0].size = sizeof(inarg);
547         req->in.args[0].value = &inarg;
548         req->isreply = 0;
549         __fuse_request_send(fc, req);
550         /* ignore errors */
551         fuse_put_request(fc, req);
552 }
553
554 /*
555  * Lock the request.  Up to the next unlock_request() there mustn't be
556  * anything that could cause a page-fault.  If the request was already
557  * aborted bail out.
558  */
559 static int lock_request(struct fuse_conn *fc, struct fuse_req *req)
560 {
561         int err = 0;
562         if (req) {
563                 spin_lock(&fc->lock);
564                 if (req->aborted)
565                         err = -ENOENT;
566                 else
567                         req->locked = 1;
568                 spin_unlock(&fc->lock);
569         }
570         return err;
571 }
572
573 /*
574  * Unlock request.  If it was aborted during being locked, the
575  * requester thread is currently waiting for it to be unlocked, so
576  * wake it up.
577  */
578 static void unlock_request(struct fuse_conn *fc, struct fuse_req *req)
579 {
580         if (req) {
581                 spin_lock(&fc->lock);
582                 req->locked = 0;
583                 if (req->aborted)
584                         wake_up(&req->waitq);
585                 spin_unlock(&fc->lock);
586         }
587 }
588
589 struct fuse_copy_state {
590         struct fuse_conn *fc;
591         int write;
592         struct fuse_req *req;
593         const struct iovec *iov;
594         struct pipe_buffer *pipebufs;
595         struct pipe_buffer *currbuf;
596         struct pipe_inode_info *pipe;
597         unsigned long nr_segs;
598         unsigned long seglen;
599         unsigned long addr;
600         struct page *pg;
601         void *mapaddr;
602         void *buf;
603         unsigned len;
604         unsigned move_pages:1;
605 };
606
607 static void fuse_copy_init(struct fuse_copy_state *cs, struct fuse_conn *fc,
608                            int write,
609                            const struct iovec *iov, unsigned long nr_segs)
610 {
611         memset(cs, 0, sizeof(*cs));
612         cs->fc = fc;
613         cs->write = write;
614         cs->iov = iov;
615         cs->nr_segs = nr_segs;
616 }
617
618 /* Unmap and put previous page of userspace buffer */
619 static void fuse_copy_finish(struct fuse_copy_state *cs)
620 {
621         if (cs->currbuf) {
622                 struct pipe_buffer *buf = cs->currbuf;
623
624                 if (!cs->write) {
625                         buf->ops->unmap(cs->pipe, buf, cs->mapaddr);
626                 } else {
627                         kunmap(buf->page);
628                         buf->len = PAGE_SIZE - cs->len;
629                 }
630                 cs->currbuf = NULL;
631                 cs->mapaddr = NULL;
632         } else if (cs->mapaddr) {
633                 kunmap(cs->pg);
634                 if (cs->write) {
635                         flush_dcache_page(cs->pg);
636                         set_page_dirty_lock(cs->pg);
637                 }
638                 put_page(cs->pg);
639                 cs->mapaddr = NULL;
640         }
641 }
642
643 /*
644  * Get another pagefull of userspace buffer, and map it to kernel
645  * address space, and lock request
646  */
647 static int fuse_copy_fill(struct fuse_copy_state *cs)
648 {
649         unsigned long offset;
650         int err;
651
652         unlock_request(cs->fc, cs->req);
653         fuse_copy_finish(cs);
654         if (cs->pipebufs) {
655                 struct pipe_buffer *buf = cs->pipebufs;
656
657                 if (!cs->write) {
658                         err = buf->ops->confirm(cs->pipe, buf);
659                         if (err)
660                                 return err;
661
662                         BUG_ON(!cs->nr_segs);
663                         cs->currbuf = buf;
664                         cs->mapaddr = buf->ops->map(cs->pipe, buf, 0);
665                         cs->len = buf->len;
666                         cs->buf = cs->mapaddr + buf->offset;
667                         cs->pipebufs++;
668                         cs->nr_segs--;
669                 } else {
670                         struct page *page;
671
672                         if (cs->nr_segs == cs->pipe->buffers)
673                                 return -EIO;
674
675                         page = alloc_page(GFP_HIGHUSER);
676                         if (!page)
677                                 return -ENOMEM;
678
679                         buf->page = page;
680                         buf->offset = 0;
681                         buf->len = 0;
682
683                         cs->currbuf = buf;
684                         cs->mapaddr = kmap(page);
685                         cs->buf = cs->mapaddr;
686                         cs->len = PAGE_SIZE;
687                         cs->pipebufs++;
688                         cs->nr_segs++;
689                 }
690         } else {
691                 if (!cs->seglen) {
692                         BUG_ON(!cs->nr_segs);
693                         cs->seglen = cs->iov[0].iov_len;
694                         cs->addr = (unsigned long) cs->iov[0].iov_base;
695                         cs->iov++;
696                         cs->nr_segs--;
697                 }
698                 err = get_user_pages_fast(cs->addr, 1, cs->write, &cs->pg);
699                 if (err < 0)
700                         return err;
701                 BUG_ON(err != 1);
702                 offset = cs->addr % PAGE_SIZE;
703                 cs->mapaddr = kmap(cs->pg);
704                 cs->buf = cs->mapaddr + offset;
705                 cs->len = min(PAGE_SIZE - offset, cs->seglen);
706                 cs->seglen -= cs->len;
707                 cs->addr += cs->len;
708         }
709
710         return lock_request(cs->fc, cs->req);
711 }
712
713 /* Do as much copy to/from userspace buffer as we can */
714 static int fuse_copy_do(struct fuse_copy_state *cs, void **val, unsigned *size)
715 {
716         unsigned ncpy = min(*size, cs->len);
717         if (val) {
718                 if (cs->write)
719                         memcpy(cs->buf, *val, ncpy);
720                 else
721                         memcpy(*val, cs->buf, ncpy);
722                 *val += ncpy;
723         }
724         *size -= ncpy;
725         cs->len -= ncpy;
726         cs->buf += ncpy;
727         return ncpy;
728 }
729
730 static int fuse_check_page(struct page *page)
731 {
732         if (page_mapcount(page) ||
733             page->mapping != NULL ||
734             page_count(page) != 1 ||
735             (page->flags & PAGE_FLAGS_CHECK_AT_PREP &
736              ~(1 << PG_locked |
737                1 << PG_referenced |
738                1 << PG_uptodate |
739                1 << PG_lru |
740                1 << PG_active |
741                1 << PG_reclaim))) {
742                 printk(KERN_WARNING "fuse: trying to steal weird page\n");
743                 printk(KERN_WARNING "  page=%p index=%li flags=%08lx, count=%i, mapcount=%i, mapping=%p\n", page, page->index, page->flags, page_count(page), page_mapcount(page), page->mapping);
744                 return 1;
745         }
746         return 0;
747 }
748
749 static int fuse_try_move_page(struct fuse_copy_state *cs, struct page **pagep)
750 {
751         int err;
752         struct page *oldpage = *pagep;
753         struct page *newpage;
754         struct pipe_buffer *buf = cs->pipebufs;
755
756         unlock_request(cs->fc, cs->req);
757         fuse_copy_finish(cs);
758
759         err = buf->ops->confirm(cs->pipe, buf);
760         if (err)
761                 return err;
762
763         BUG_ON(!cs->nr_segs);
764         cs->currbuf = buf;
765         cs->len = buf->len;
766         cs->pipebufs++;
767         cs->nr_segs--;
768
769         if (cs->len != PAGE_SIZE)
770                 goto out_fallback;
771
772         if (buf->ops->steal(cs->pipe, buf) != 0)
773                 goto out_fallback;
774
775         newpage = buf->page;
776
777         if (WARN_ON(!PageUptodate(newpage)))
778                 return -EIO;
779
780         ClearPageMappedToDisk(newpage);
781
782         if (fuse_check_page(newpage) != 0)
783                 goto out_fallback_unlock;
784
785         /*
786          * This is a new and locked page, it shouldn't be mapped or
787          * have any special flags on it
788          */
789         if (WARN_ON(page_mapped(oldpage)))
790                 goto out_fallback_unlock;
791         if (WARN_ON(page_has_private(oldpage)))
792                 goto out_fallback_unlock;
793         if (WARN_ON(PageDirty(oldpage) || PageWriteback(oldpage)))
794                 goto out_fallback_unlock;
795         if (WARN_ON(PageMlocked(oldpage)))
796                 goto out_fallback_unlock;
797
798         err = replace_page_cache_page(oldpage, newpage, GFP_KERNEL);
799         if (err) {
800                 unlock_page(newpage);
801                 return err;
802         }
803
804         page_cache_get(newpage);
805
806         if (!(buf->flags & PIPE_BUF_FLAG_LRU))
807                 lru_cache_add_file(newpage);
808
809         err = 0;
810         spin_lock(&cs->fc->lock);
811         if (cs->req->aborted)
812                 err = -ENOENT;
813         else
814                 *pagep = newpage;
815         spin_unlock(&cs->fc->lock);
816
817         if (err) {
818                 unlock_page(newpage);
819                 page_cache_release(newpage);
820                 return err;
821         }
822
823         unlock_page(oldpage);
824         page_cache_release(oldpage);
825         cs->len = 0;
826
827         return 0;
828
829 out_fallback_unlock:
830         unlock_page(newpage);
831 out_fallback:
832         cs->mapaddr = buf->ops->map(cs->pipe, buf, 1);
833         cs->buf = cs->mapaddr + buf->offset;
834
835         err = lock_request(cs->fc, cs->req);
836         if (err)
837                 return err;
838
839         return 1;
840 }
841
842 static int fuse_ref_page(struct fuse_copy_state *cs, struct page *page,
843                          unsigned offset, unsigned count)
844 {
845         struct pipe_buffer *buf;
846
847         if (cs->nr_segs == cs->pipe->buffers)
848                 return -EIO;
849
850         unlock_request(cs->fc, cs->req);
851         fuse_copy_finish(cs);
852
853         buf = cs->pipebufs;
854         page_cache_get(page);
855         buf->page = page;
856         buf->offset = offset;
857         buf->len = count;
858
859         cs->pipebufs++;
860         cs->nr_segs++;
861         cs->len = 0;
862
863         return 0;
864 }
865
866 /*
867  * Copy a page in the request to/from the userspace buffer.  Must be
868  * done atomically
869  */
870 static int fuse_copy_page(struct fuse_copy_state *cs, struct page **pagep,
871                           unsigned offset, unsigned count, int zeroing)
872 {
873         int err;
874         struct page *page = *pagep;
875
876         if (page && zeroing && count < PAGE_SIZE)
877                 clear_highpage(page);
878
879         while (count) {
880                 if (cs->write && cs->pipebufs && page) {
881                         return fuse_ref_page(cs, page, offset, count);
882                 } else if (!cs->len) {
883                         if (cs->move_pages && page &&
884                             offset == 0 && count == PAGE_SIZE) {
885                                 err = fuse_try_move_page(cs, pagep);
886                                 if (err <= 0)
887                                         return err;
888                         } else {
889                                 err = fuse_copy_fill(cs);
890                                 if (err)
891                                         return err;
892                         }
893                 }
894                 if (page) {
895                         void *mapaddr = kmap_atomic(page);
896                         void *buf = mapaddr + offset;
897                         offset += fuse_copy_do(cs, &buf, &count);
898                         kunmap_atomic(mapaddr);
899                 } else
900                         offset += fuse_copy_do(cs, NULL, &count);
901         }
902         if (page && !cs->write)
903                 flush_dcache_page(page);
904         return 0;
905 }
906
907 /* Copy pages in the request to/from userspace buffer */
908 static int fuse_copy_pages(struct fuse_copy_state *cs, unsigned nbytes,
909                            int zeroing)
910 {
911         unsigned i;
912         struct fuse_req *req = cs->req;
913
914         for (i = 0; i < req->num_pages && (nbytes || zeroing); i++) {
915                 int err;
916                 unsigned offset = req->page_descs[i].offset;
917                 unsigned count = min(nbytes, req->page_descs[i].length);
918
919                 err = fuse_copy_page(cs, &req->pages[i], offset, count,
920                                      zeroing);
921                 if (err)
922                         return err;
923
924                 nbytes -= count;
925         }
926         return 0;
927 }
928
929 /* Copy a single argument in the request to/from userspace buffer */
930 static int fuse_copy_one(struct fuse_copy_state *cs, void *val, unsigned size)
931 {
932         while (size) {
933                 if (!cs->len) {
934                         int err = fuse_copy_fill(cs);
935                         if (err)
936                                 return err;
937                 }
938                 fuse_copy_do(cs, &val, &size);
939         }
940         return 0;
941 }
942
943 /* Copy request arguments to/from userspace buffer */
944 static int fuse_copy_args(struct fuse_copy_state *cs, unsigned numargs,
945                           unsigned argpages, struct fuse_arg *args,
946                           int zeroing)
947 {
948         int err = 0;
949         unsigned i;
950
951         for (i = 0; !err && i < numargs; i++)  {
952                 struct fuse_arg *arg = &args[i];
953                 if (i == numargs - 1 && argpages)
954                         err = fuse_copy_pages(cs, arg->size, zeroing);
955                 else
956                         err = fuse_copy_one(cs, arg->value, arg->size);
957         }
958         return err;
959 }
960
961 static int forget_pending(struct fuse_conn *fc)
962 {
963         return fc->forget_list_head.next != NULL;
964 }
965
966 static int request_pending(struct fuse_conn *fc)
967 {
968         return !list_empty(&fc->pending) || !list_empty(&fc->interrupts) ||
969                 forget_pending(fc);
970 }
971
972 /* Wait until a request is available on the pending list */
973 static void request_wait(struct fuse_conn *fc)
974 __releases(fc->lock)
975 __acquires(fc->lock)
976 {
977         DECLARE_WAITQUEUE(wait, current);
978
979         add_wait_queue_exclusive(&fc->waitq, &wait);
980         while (fc->connected && !request_pending(fc)) {
981                 set_current_state(TASK_INTERRUPTIBLE);
982                 if (signal_pending(current))
983                         break;
984
985                 spin_unlock(&fc->lock);
986                 schedule();
987                 spin_lock(&fc->lock);
988         }
989         set_current_state(TASK_RUNNING);
990         remove_wait_queue(&fc->waitq, &wait);
991 }
992
993 /*
994  * Transfer an interrupt request to userspace
995  *
996  * Unlike other requests this is assembled on demand, without a need
997  * to allocate a separate fuse_req structure.
998  *
999  * Called with fc->lock held, releases it
1000  */
1001 static int fuse_read_interrupt(struct fuse_conn *fc, struct fuse_copy_state *cs,
1002                                size_t nbytes, struct fuse_req *req)
1003 __releases(fc->lock)
1004 {
1005         struct fuse_in_header ih;
1006         struct fuse_interrupt_in arg;
1007         unsigned reqsize = sizeof(ih) + sizeof(arg);
1008         int err;
1009
1010         list_del_init(&req->intr_entry);
1011         req->intr_unique = fuse_get_unique(fc);
1012         memset(&ih, 0, sizeof(ih));
1013         memset(&arg, 0, sizeof(arg));
1014         ih.len = reqsize;
1015         ih.opcode = FUSE_INTERRUPT;
1016         ih.unique = req->intr_unique;
1017         arg.unique = req->in.h.unique;
1018
1019         spin_unlock(&fc->lock);
1020         if (nbytes < reqsize)
1021                 return -EINVAL;
1022
1023         err = fuse_copy_one(cs, &ih, sizeof(ih));
1024         if (!err)
1025                 err = fuse_copy_one(cs, &arg, sizeof(arg));
1026         fuse_copy_finish(cs);
1027
1028         return err ? err : reqsize;
1029 }
1030
1031 static struct fuse_forget_link *dequeue_forget(struct fuse_conn *fc,
1032                                                unsigned max,
1033                                                unsigned *countp)
1034 {
1035         struct fuse_forget_link *head = fc->forget_list_head.next;
1036         struct fuse_forget_link **newhead = &head;
1037         unsigned count;
1038
1039         for (count = 0; *newhead != NULL && count < max; count++)
1040                 newhead = &(*newhead)->next;
1041
1042         fc->forget_list_head.next = *newhead;
1043         *newhead = NULL;
1044         if (fc->forget_list_head.next == NULL)
1045                 fc->forget_list_tail = &fc->forget_list_head;
1046
1047         if (countp != NULL)
1048                 *countp = count;
1049
1050         return head;
1051 }
1052
1053 static int fuse_read_single_forget(struct fuse_conn *fc,
1054                                    struct fuse_copy_state *cs,
1055                                    size_t nbytes)
1056 __releases(fc->lock)
1057 {
1058         int err;
1059         struct fuse_forget_link *forget = dequeue_forget(fc, 1, NULL);
1060         struct fuse_forget_in arg = {
1061                 .nlookup = forget->forget_one.nlookup,
1062         };
1063         struct fuse_in_header ih = {
1064                 .opcode = FUSE_FORGET,
1065                 .nodeid = forget->forget_one.nodeid,
1066                 .unique = fuse_get_unique(fc),
1067                 .len = sizeof(ih) + sizeof(arg),
1068         };
1069
1070         spin_unlock(&fc->lock);
1071         kfree(forget);
1072         if (nbytes < ih.len)
1073                 return -EINVAL;
1074
1075         err = fuse_copy_one(cs, &ih, sizeof(ih));
1076         if (!err)
1077                 err = fuse_copy_one(cs, &arg, sizeof(arg));
1078         fuse_copy_finish(cs);
1079
1080         if (err)
1081                 return err;
1082
1083         return ih.len;
1084 }
1085
1086 static int fuse_read_batch_forget(struct fuse_conn *fc,
1087                                    struct fuse_copy_state *cs, size_t nbytes)
1088 __releases(fc->lock)
1089 {
1090         int err;
1091         unsigned max_forgets;
1092         unsigned count;
1093         struct fuse_forget_link *head;
1094         struct fuse_batch_forget_in arg = { .count = 0 };
1095         struct fuse_in_header ih = {
1096                 .opcode = FUSE_BATCH_FORGET,
1097                 .unique = fuse_get_unique(fc),
1098                 .len = sizeof(ih) + sizeof(arg),
1099         };
1100
1101         if (nbytes < ih.len) {
1102                 spin_unlock(&fc->lock);
1103                 return -EINVAL;
1104         }
1105
1106         max_forgets = (nbytes - ih.len) / sizeof(struct fuse_forget_one);
1107         head = dequeue_forget(fc, max_forgets, &count);
1108         spin_unlock(&fc->lock);
1109
1110         arg.count = count;
1111         ih.len += count * sizeof(struct fuse_forget_one);
1112         err = fuse_copy_one(cs, &ih, sizeof(ih));
1113         if (!err)
1114                 err = fuse_copy_one(cs, &arg, sizeof(arg));
1115
1116         while (head) {
1117                 struct fuse_forget_link *forget = head;
1118
1119                 if (!err) {
1120                         err = fuse_copy_one(cs, &forget->forget_one,
1121                                             sizeof(forget->forget_one));
1122                 }
1123                 head = forget->next;
1124                 kfree(forget);
1125         }
1126
1127         fuse_copy_finish(cs);
1128
1129         if (err)
1130                 return err;
1131
1132         return ih.len;
1133 }
1134
1135 static int fuse_read_forget(struct fuse_conn *fc, struct fuse_copy_state *cs,
1136                             size_t nbytes)
1137 __releases(fc->lock)
1138 {
1139         if (fc->minor < 16 || fc->forget_list_head.next->next == NULL)
1140                 return fuse_read_single_forget(fc, cs, nbytes);
1141         else
1142                 return fuse_read_batch_forget(fc, cs, nbytes);
1143 }
1144
1145 /*
1146  * Read a single request into the userspace filesystem's buffer.  This
1147  * function waits until a request is available, then removes it from
1148  * the pending list and copies request data to userspace buffer.  If
1149  * no reply is needed (FORGET) or request has been aborted or there
1150  * was an error during the copying then it's finished by calling
1151  * request_end().  Otherwise add it to the processing list, and set
1152  * the 'sent' flag.
1153  */
1154 static ssize_t fuse_dev_do_read(struct fuse_conn *fc, struct file *file,
1155                                 struct fuse_copy_state *cs, size_t nbytes)
1156 {
1157         int err;
1158         struct fuse_req *req;
1159         struct fuse_in *in;
1160         unsigned reqsize;
1161
1162  restart:
1163         spin_lock(&fc->lock);
1164         err = -EAGAIN;
1165         if ((file->f_flags & O_NONBLOCK) && fc->connected &&
1166             !request_pending(fc))
1167                 goto err_unlock;
1168
1169         request_wait(fc);
1170         err = -ENODEV;
1171         if (!fc->connected)
1172                 goto err_unlock;
1173         err = -ERESTARTSYS;
1174         if (!request_pending(fc))
1175                 goto err_unlock;
1176
1177         if (!list_empty(&fc->interrupts)) {
1178                 req = list_entry(fc->interrupts.next, struct fuse_req,
1179                                  intr_entry);
1180                 return fuse_read_interrupt(fc, cs, nbytes, req);
1181         }
1182
1183         if (forget_pending(fc)) {
1184                 if (list_empty(&fc->pending) || fc->forget_batch-- > 0)
1185                         return fuse_read_forget(fc, cs, nbytes);
1186
1187                 if (fc->forget_batch <= -8)
1188                         fc->forget_batch = 16;
1189         }
1190
1191         req = list_entry(fc->pending.next, struct fuse_req, list);
1192         req->state = FUSE_REQ_READING;
1193         list_move(&req->list, &fc->io);
1194
1195         in = &req->in;
1196         reqsize = in->h.len;
1197         /* If request is too large, reply with an error and restart the read */
1198         if (nbytes < reqsize) {
1199                 req->out.h.error = -EIO;
1200                 /* SETXATTR is special, since it may contain too large data */
1201                 if (in->h.opcode == FUSE_SETXATTR)
1202                         req->out.h.error = -E2BIG;
1203                 request_end(fc, req);
1204                 goto restart;
1205         }
1206         spin_unlock(&fc->lock);
1207         cs->req = req;
1208         err = fuse_copy_one(cs, &in->h, sizeof(in->h));
1209         if (!err)
1210                 err = fuse_copy_args(cs, in->numargs, in->argpages,
1211                                      (struct fuse_arg *) in->args, 0);
1212         fuse_copy_finish(cs);
1213         spin_lock(&fc->lock);
1214         req->locked = 0;
1215         if (req->aborted) {
1216                 request_end(fc, req);
1217                 return -ENODEV;
1218         }
1219         if (err) {
1220                 req->out.h.error = -EIO;
1221                 request_end(fc, req);
1222                 return err;
1223         }
1224         if (!req->isreply)
1225                 request_end(fc, req);
1226         else {
1227                 req->state = FUSE_REQ_SENT;
1228                 list_move_tail(&req->list, &fc->processing);
1229                 if (req->interrupted)
1230                         queue_interrupt(fc, req);
1231                 spin_unlock(&fc->lock);
1232         }
1233         return reqsize;
1234
1235  err_unlock:
1236         spin_unlock(&fc->lock);
1237         return err;
1238 }
1239
1240 static ssize_t fuse_dev_read(struct kiocb *iocb, const struct iovec *iov,
1241                               unsigned long nr_segs, loff_t pos)
1242 {
1243         struct fuse_copy_state cs;
1244         struct file *file = iocb->ki_filp;
1245         struct fuse_conn *fc = fuse_get_conn(file);
1246         if (!fc)
1247                 return -EPERM;
1248
1249         fuse_copy_init(&cs, fc, 1, iov, nr_segs);
1250
1251         return fuse_dev_do_read(fc, file, &cs, iov_length(iov, nr_segs));
1252 }
1253
1254 static int fuse_dev_pipe_buf_steal(struct pipe_inode_info *pipe,
1255                                    struct pipe_buffer *buf)
1256 {
1257         return 1;
1258 }
1259
1260 static const struct pipe_buf_operations fuse_dev_pipe_buf_ops = {
1261         .can_merge = 0,
1262         .map = generic_pipe_buf_map,
1263         .unmap = generic_pipe_buf_unmap,
1264         .confirm = generic_pipe_buf_confirm,
1265         .release = generic_pipe_buf_release,
1266         .steal = fuse_dev_pipe_buf_steal,
1267         .get = generic_pipe_buf_get,
1268 };
1269
1270 static ssize_t fuse_dev_splice_read(struct file *in, loff_t *ppos,
1271                                     struct pipe_inode_info *pipe,
1272                                     size_t len, unsigned int flags)
1273 {
1274         int ret;
1275         int page_nr = 0;
1276         int do_wakeup = 0;
1277         struct pipe_buffer *bufs;
1278         struct fuse_copy_state cs;
1279         struct fuse_conn *fc = fuse_get_conn(in);
1280         if (!fc)
1281                 return -EPERM;
1282
1283         bufs = kmalloc(pipe->buffers * sizeof(struct pipe_buffer), GFP_KERNEL);
1284         if (!bufs)
1285                 return -ENOMEM;
1286
1287         fuse_copy_init(&cs, fc, 1, NULL, 0);
1288         cs.pipebufs = bufs;
1289         cs.pipe = pipe;
1290         ret = fuse_dev_do_read(fc, in, &cs, len);
1291         if (ret < 0)
1292                 goto out;
1293
1294         ret = 0;
1295         pipe_lock(pipe);
1296
1297         if (!pipe->readers) {
1298                 send_sig(SIGPIPE, current, 0);
1299                 if (!ret)
1300                         ret = -EPIPE;
1301                 goto out_unlock;
1302         }
1303
1304         if (pipe->nrbufs + cs.nr_segs > pipe->buffers) {
1305                 ret = -EIO;
1306                 goto out_unlock;
1307         }
1308
1309         while (page_nr < cs.nr_segs) {
1310                 int newbuf = (pipe->curbuf + pipe->nrbufs) & (pipe->buffers - 1);
1311                 struct pipe_buffer *buf = pipe->bufs + newbuf;
1312
1313                 buf->page = bufs[page_nr].page;
1314                 buf->offset = bufs[page_nr].offset;
1315                 buf->len = bufs[page_nr].len;
1316                 buf->ops = &fuse_dev_pipe_buf_ops;
1317
1318                 pipe->nrbufs++;
1319                 page_nr++;
1320                 ret += buf->len;
1321
1322                 if (pipe->inode)
1323                         do_wakeup = 1;
1324         }
1325
1326 out_unlock:
1327         pipe_unlock(pipe);
1328
1329         if (do_wakeup) {
1330                 smp_mb();
1331                 if (waitqueue_active(&pipe->wait))
1332                         wake_up_interruptible(&pipe->wait);
1333                 kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
1334         }
1335
1336 out:
1337         for (; page_nr < cs.nr_segs; page_nr++)
1338                 page_cache_release(bufs[page_nr].page);
1339
1340         kfree(bufs);
1341         return ret;
1342 }
1343
1344 static int fuse_notify_poll(struct fuse_conn *fc, unsigned int size,
1345                             struct fuse_copy_state *cs)
1346 {
1347         struct fuse_notify_poll_wakeup_out outarg;
1348         int err = -EINVAL;
1349
1350         if (size != sizeof(outarg))
1351                 goto err;
1352
1353         err = fuse_copy_one(cs, &outarg, sizeof(outarg));
1354         if (err)
1355                 goto err;
1356
1357         fuse_copy_finish(cs);
1358         return fuse_notify_poll_wakeup(fc, &outarg);
1359
1360 err:
1361         fuse_copy_finish(cs);
1362         return err;
1363 }
1364
1365 static int fuse_notify_inval_inode(struct fuse_conn *fc, unsigned int size,
1366                                    struct fuse_copy_state *cs)
1367 {
1368         struct fuse_notify_inval_inode_out outarg;
1369         int err = -EINVAL;
1370
1371         if (size != sizeof(outarg))
1372                 goto err;
1373
1374         err = fuse_copy_one(cs, &outarg, sizeof(outarg));
1375         if (err)
1376                 goto err;
1377         fuse_copy_finish(cs);
1378
1379         down_read(&fc->killsb);
1380         err = -ENOENT;
1381         if (fc->sb) {
1382                 err = fuse_reverse_inval_inode(fc->sb, outarg.ino,
1383                                                outarg.off, outarg.len);
1384         }
1385         up_read(&fc->killsb);
1386         return err;
1387
1388 err:
1389         fuse_copy_finish(cs);
1390         return err;
1391 }
1392
1393 static int fuse_notify_inval_entry(struct fuse_conn *fc, unsigned int size,
1394                                    struct fuse_copy_state *cs)
1395 {
1396         struct fuse_notify_inval_entry_out outarg;
1397         int err = -ENOMEM;
1398         char *buf;
1399         struct qstr name;
1400
1401         buf = kzalloc(FUSE_NAME_MAX + 1, GFP_KERNEL);
1402         if (!buf)
1403                 goto err;
1404
1405         err = -EINVAL;
1406         if (size < sizeof(outarg))
1407                 goto err;
1408
1409         err = fuse_copy_one(cs, &outarg, sizeof(outarg));
1410         if (err)
1411                 goto err;
1412
1413         err = -ENAMETOOLONG;
1414         if (outarg.namelen > FUSE_NAME_MAX)
1415                 goto err;
1416
1417         err = -EINVAL;
1418         if (size != sizeof(outarg) + outarg.namelen + 1)
1419                 goto err;
1420
1421         name.name = buf;
1422         name.len = outarg.namelen;
1423         err = fuse_copy_one(cs, buf, outarg.namelen + 1);
1424         if (err)
1425                 goto err;
1426         fuse_copy_finish(cs);
1427         buf[outarg.namelen] = 0;
1428         name.hash = full_name_hash(name.name, name.len);
1429
1430         down_read(&fc->killsb);
1431         err = -ENOENT;
1432         if (fc->sb)
1433                 err = fuse_reverse_inval_entry(fc->sb, outarg.parent, 0, &name);
1434         up_read(&fc->killsb);
1435         kfree(buf);
1436         return err;
1437
1438 err:
1439         kfree(buf);
1440         fuse_copy_finish(cs);
1441         return err;
1442 }
1443
1444 static int fuse_notify_delete(struct fuse_conn *fc, unsigned int size,
1445                               struct fuse_copy_state *cs)
1446 {
1447         struct fuse_notify_delete_out outarg;
1448         int err = -ENOMEM;
1449         char *buf;
1450         struct qstr name;
1451
1452         buf = kzalloc(FUSE_NAME_MAX + 1, GFP_KERNEL);
1453         if (!buf)
1454                 goto err;
1455
1456         err = -EINVAL;
1457         if (size < sizeof(outarg))
1458                 goto err;
1459
1460         err = fuse_copy_one(cs, &outarg, sizeof(outarg));
1461         if (err)
1462                 goto err;
1463
1464         err = -ENAMETOOLONG;
1465         if (outarg.namelen > FUSE_NAME_MAX)
1466                 goto err;
1467
1468         err = -EINVAL;
1469         if (size != sizeof(outarg) + outarg.namelen + 1)
1470                 goto err;
1471
1472         name.name = buf;
1473         name.len = outarg.namelen;
1474         err = fuse_copy_one(cs, buf, outarg.namelen + 1);
1475         if (err)
1476                 goto err;
1477         fuse_copy_finish(cs);
1478         buf[outarg.namelen] = 0;
1479         name.hash = full_name_hash(name.name, name.len);
1480
1481         down_read(&fc->killsb);
1482         err = -ENOENT;
1483         if (fc->sb)
1484                 err = fuse_reverse_inval_entry(fc->sb, outarg.parent,
1485                                                outarg.child, &name);
1486         up_read(&fc->killsb);
1487         kfree(buf);
1488         return err;
1489
1490 err:
1491         kfree(buf);
1492         fuse_copy_finish(cs);
1493         return err;
1494 }
1495
1496 static int fuse_notify_store(struct fuse_conn *fc, unsigned int size,
1497                              struct fuse_copy_state *cs)
1498 {
1499         struct fuse_notify_store_out outarg;
1500         struct inode *inode;
1501         struct address_space *mapping;
1502         u64 nodeid;
1503         int err;
1504         pgoff_t index;
1505         unsigned int offset;
1506         unsigned int num;
1507         loff_t file_size;
1508         loff_t end;
1509
1510         err = -EINVAL;
1511         if (size < sizeof(outarg))
1512                 goto out_finish;
1513
1514         err = fuse_copy_one(cs, &outarg, sizeof(outarg));
1515         if (err)
1516                 goto out_finish;
1517
1518         err = -EINVAL;
1519         if (size - sizeof(outarg) != outarg.size)
1520                 goto out_finish;
1521
1522         nodeid = outarg.nodeid;
1523
1524         down_read(&fc->killsb);
1525
1526         err = -ENOENT;
1527         if (!fc->sb)
1528                 goto out_up_killsb;
1529
1530         inode = ilookup5(fc->sb, nodeid, fuse_inode_eq, &nodeid);
1531         if (!inode)
1532                 goto out_up_killsb;
1533
1534         mapping = inode->i_mapping;
1535         index = outarg.offset >> PAGE_CACHE_SHIFT;
1536         offset = outarg.offset & ~PAGE_CACHE_MASK;
1537         file_size = i_size_read(inode);
1538         end = outarg.offset + outarg.size;
1539         if (end > file_size) {
1540                 file_size = end;
1541                 fuse_write_update_size(inode, file_size);
1542         }
1543
1544         num = outarg.size;
1545         while (num) {
1546                 struct page *page;
1547                 unsigned int this_num;
1548
1549                 err = -ENOMEM;
1550                 page = find_or_create_page(mapping, index,
1551                                            mapping_gfp_mask(mapping));
1552                 if (!page)
1553                         goto out_iput;
1554
1555                 this_num = min_t(unsigned, num, PAGE_CACHE_SIZE - offset);
1556                 err = fuse_copy_page(cs, &page, offset, this_num, 0);
1557                 if (!err && offset == 0 && (num != 0 || file_size == end))
1558                         SetPageUptodate(page);
1559                 unlock_page(page);
1560                 page_cache_release(page);
1561
1562                 if (err)
1563                         goto out_iput;
1564
1565                 num -= this_num;
1566                 offset = 0;
1567                 index++;
1568         }
1569
1570         err = 0;
1571
1572 out_iput:
1573         iput(inode);
1574 out_up_killsb:
1575         up_read(&fc->killsb);
1576 out_finish:
1577         fuse_copy_finish(cs);
1578         return err;
1579 }
1580
1581 static void fuse_retrieve_end(struct fuse_conn *fc, struct fuse_req *req)
1582 {
1583         release_pages(req->pages, req->num_pages, 0);
1584 }
1585
1586 static int fuse_retrieve(struct fuse_conn *fc, struct inode *inode,
1587                          struct fuse_notify_retrieve_out *outarg)
1588 {
1589         int err;
1590         struct address_space *mapping = inode->i_mapping;
1591         struct fuse_req *req;
1592         pgoff_t index;
1593         loff_t file_size;
1594         unsigned int num;
1595         unsigned int offset;
1596         size_t total_len = 0;
1597         int num_pages;
1598
1599         offset = outarg->offset & ~PAGE_CACHE_MASK;
1600         file_size = i_size_read(inode);
1601
1602         num = outarg->size;
1603         if (outarg->offset > file_size)
1604                 num = 0;
1605         else if (outarg->offset + num > file_size)
1606                 num = file_size - outarg->offset;
1607
1608         num_pages = (num + offset + PAGE_SIZE - 1) >> PAGE_SHIFT;
1609         num_pages = min(num_pages, FUSE_MAX_PAGES_PER_REQ);
1610
1611         req = fuse_get_req(fc, num_pages);
1612         if (IS_ERR(req))
1613                 return PTR_ERR(req);
1614
1615         req->in.h.opcode = FUSE_NOTIFY_REPLY;
1616         req->in.h.nodeid = outarg->nodeid;
1617         req->in.numargs = 2;
1618         req->in.argpages = 1;
1619         req->page_descs[0].offset = offset;
1620         req->end = fuse_retrieve_end;
1621
1622         index = outarg->offset >> PAGE_CACHE_SHIFT;
1623
1624         while (num && req->num_pages < num_pages) {
1625                 struct page *page;
1626                 unsigned int this_num;
1627
1628                 page = find_get_page(mapping, index);
1629                 if (!page)
1630                         break;
1631
1632                 this_num = min_t(unsigned, num, PAGE_CACHE_SIZE - offset);
1633                 req->pages[req->num_pages] = page;
1634                 req->page_descs[req->num_pages].length = this_num;
1635                 req->num_pages++;
1636
1637                 offset = 0;
1638                 num -= this_num;
1639                 total_len += this_num;
1640                 index++;
1641         }
1642         req->misc.retrieve_in.offset = outarg->offset;
1643         req->misc.retrieve_in.size = total_len;
1644         req->in.args[0].size = sizeof(req->misc.retrieve_in);
1645         req->in.args[0].value = &req->misc.retrieve_in;
1646         req->in.args[1].size = total_len;
1647
1648         err = fuse_request_send_notify_reply(fc, req, outarg->notify_unique);
1649         if (err)
1650                 fuse_retrieve_end(fc, req);
1651
1652         return err;
1653 }
1654
1655 static int fuse_notify_retrieve(struct fuse_conn *fc, unsigned int size,
1656                                 struct fuse_copy_state *cs)
1657 {
1658         struct fuse_notify_retrieve_out outarg;
1659         struct inode *inode;
1660         int err;
1661
1662         err = -EINVAL;
1663         if (size != sizeof(outarg))
1664                 goto copy_finish;
1665
1666         err = fuse_copy_one(cs, &outarg, sizeof(outarg));
1667         if (err)
1668                 goto copy_finish;
1669
1670         fuse_copy_finish(cs);
1671
1672         down_read(&fc->killsb);
1673         err = -ENOENT;
1674         if (fc->sb) {
1675                 u64 nodeid = outarg.nodeid;
1676
1677                 inode = ilookup5(fc->sb, nodeid, fuse_inode_eq, &nodeid);
1678                 if (inode) {
1679                         err = fuse_retrieve(fc, inode, &outarg);
1680                         iput(inode);
1681                 }
1682         }
1683         up_read(&fc->killsb);
1684
1685         return err;
1686
1687 copy_finish:
1688         fuse_copy_finish(cs);
1689         return err;
1690 }
1691
1692 static int fuse_notify(struct fuse_conn *fc, enum fuse_notify_code code,
1693                        unsigned int size, struct fuse_copy_state *cs)
1694 {
1695         switch (code) {
1696         case FUSE_NOTIFY_POLL:
1697                 return fuse_notify_poll(fc, size, cs);
1698
1699         case FUSE_NOTIFY_INVAL_INODE:
1700                 return fuse_notify_inval_inode(fc, size, cs);
1701
1702         case FUSE_NOTIFY_INVAL_ENTRY:
1703                 return fuse_notify_inval_entry(fc, size, cs);
1704
1705         case FUSE_NOTIFY_STORE:
1706                 return fuse_notify_store(fc, size, cs);
1707
1708         case FUSE_NOTIFY_RETRIEVE:
1709                 return fuse_notify_retrieve(fc, size, cs);
1710
1711         case FUSE_NOTIFY_DELETE:
1712                 return fuse_notify_delete(fc, size, cs);
1713
1714         default:
1715                 fuse_copy_finish(cs);
1716                 return -EINVAL;
1717         }
1718 }
1719
1720 /* Look up request on processing list by unique ID */
1721 static struct fuse_req *request_find(struct fuse_conn *fc, u64 unique)
1722 {
1723         struct list_head *entry;
1724
1725         list_for_each(entry, &fc->processing) {
1726                 struct fuse_req *req;
1727                 req = list_entry(entry, struct fuse_req, list);
1728                 if (req->in.h.unique == unique || req->intr_unique == unique)
1729                         return req;
1730         }
1731         return NULL;
1732 }
1733
1734 static int copy_out_args(struct fuse_copy_state *cs, struct fuse_out *out,
1735                          unsigned nbytes)
1736 {
1737         unsigned reqsize = sizeof(struct fuse_out_header);
1738
1739         if (out->h.error)
1740                 return nbytes != reqsize ? -EINVAL : 0;
1741
1742         reqsize += len_args(out->numargs, out->args);
1743
1744         if (reqsize < nbytes || (reqsize > nbytes && !out->argvar))
1745                 return -EINVAL;
1746         else if (reqsize > nbytes) {
1747                 struct fuse_arg *lastarg = &out->args[out->numargs-1];
1748                 unsigned diffsize = reqsize - nbytes;
1749                 if (diffsize > lastarg->size)
1750                         return -EINVAL;
1751                 lastarg->size -= diffsize;
1752         }
1753         return fuse_copy_args(cs, out->numargs, out->argpages, out->args,
1754                               out->page_zeroing);
1755 }
1756
1757 /*
1758  * Write a single reply to a request.  First the header is copied from
1759  * the write buffer.  The request is then searched on the processing
1760  * list by the unique ID found in the header.  If found, then remove
1761  * it from the list and copy the rest of the buffer to the request.
1762  * The request is finished by calling request_end()
1763  */
1764 static ssize_t fuse_dev_do_write(struct fuse_conn *fc,
1765                                  struct fuse_copy_state *cs, size_t nbytes)
1766 {
1767         int err;
1768         struct fuse_req *req;
1769         struct fuse_out_header oh;
1770
1771         if (nbytes < sizeof(struct fuse_out_header))
1772                 return -EINVAL;
1773
1774         err = fuse_copy_one(cs, &oh, sizeof(oh));
1775         if (err)
1776                 goto err_finish;
1777
1778         err = -EINVAL;
1779         if (oh.len != nbytes)
1780                 goto err_finish;
1781
1782         /*
1783          * Zero oh.unique indicates unsolicited notification message
1784          * and error contains notification code.
1785          */
1786         if (!oh.unique) {
1787                 err = fuse_notify(fc, oh.error, nbytes - sizeof(oh), cs);
1788                 return err ? err : nbytes;
1789         }
1790
1791         err = -EINVAL;
1792         if (oh.error <= -1000 || oh.error > 0)
1793                 goto err_finish;
1794
1795         spin_lock(&fc->lock);
1796         err = -ENOENT;
1797         if (!fc->connected)
1798                 goto err_unlock;
1799
1800         req = request_find(fc, oh.unique);
1801         if (!req)
1802                 goto err_unlock;
1803
1804         if (req->aborted) {
1805                 spin_unlock(&fc->lock);
1806                 fuse_copy_finish(cs);
1807                 spin_lock(&fc->lock);
1808                 request_end(fc, req);
1809                 return -ENOENT;
1810         }
1811         /* Is it an interrupt reply? */
1812         if (req->intr_unique == oh.unique) {
1813                 err = -EINVAL;
1814                 if (nbytes != sizeof(struct fuse_out_header))
1815                         goto err_unlock;
1816
1817                 if (oh.error == -ENOSYS)
1818                         fc->no_interrupt = 1;
1819                 else if (oh.error == -EAGAIN)
1820                         queue_interrupt(fc, req);
1821
1822                 spin_unlock(&fc->lock);
1823                 fuse_copy_finish(cs);
1824                 return nbytes;
1825         }
1826
1827         req->state = FUSE_REQ_WRITING;
1828         list_move(&req->list, &fc->io);
1829         req->out.h = oh;
1830         req->locked = 1;
1831         cs->req = req;
1832         if (!req->out.page_replace)
1833                 cs->move_pages = 0;
1834         spin_unlock(&fc->lock);
1835
1836         err = copy_out_args(cs, &req->out, nbytes);
1837         fuse_copy_finish(cs);
1838
1839         spin_lock(&fc->lock);
1840         req->locked = 0;
1841         if (!err) {
1842                 if (req->aborted)
1843                         err = -ENOENT;
1844         } else if (!req->aborted)
1845                 req->out.h.error = -EIO;
1846         request_end(fc, req);
1847
1848         return err ? err : nbytes;
1849
1850  err_unlock:
1851         spin_unlock(&fc->lock);
1852  err_finish:
1853         fuse_copy_finish(cs);
1854         return err;
1855 }
1856
1857 static ssize_t fuse_dev_write(struct kiocb *iocb, const struct iovec *iov,
1858                               unsigned long nr_segs, loff_t pos)
1859 {
1860         struct fuse_copy_state cs;
1861         struct fuse_conn *fc = fuse_get_conn(iocb->ki_filp);
1862         if (!fc)
1863                 return -EPERM;
1864
1865         fuse_copy_init(&cs, fc, 0, iov, nr_segs);
1866
1867         return fuse_dev_do_write(fc, &cs, iov_length(iov, nr_segs));
1868 }
1869
1870 static ssize_t fuse_dev_splice_write(struct pipe_inode_info *pipe,
1871                                      struct file *out, loff_t *ppos,
1872                                      size_t len, unsigned int flags)
1873 {
1874         unsigned nbuf;
1875         unsigned idx;
1876         struct pipe_buffer *bufs;
1877         struct fuse_copy_state cs;
1878         struct fuse_conn *fc;
1879         size_t rem;
1880         ssize_t ret;
1881
1882         fc = fuse_get_conn(out);
1883         if (!fc)
1884                 return -EPERM;
1885
1886         bufs = kmalloc(pipe->buffers * sizeof(struct pipe_buffer), GFP_KERNEL);
1887         if (!bufs)
1888                 return -ENOMEM;
1889
1890         pipe_lock(pipe);
1891         nbuf = 0;
1892         rem = 0;
1893         for (idx = 0; idx < pipe->nrbufs && rem < len; idx++)
1894                 rem += pipe->bufs[(pipe->curbuf + idx) & (pipe->buffers - 1)].len;
1895
1896         ret = -EINVAL;
1897         if (rem < len) {
1898                 pipe_unlock(pipe);
1899                 goto out;
1900         }
1901
1902         rem = len;
1903         while (rem) {
1904                 struct pipe_buffer *ibuf;
1905                 struct pipe_buffer *obuf;
1906
1907                 BUG_ON(nbuf >= pipe->buffers);
1908                 BUG_ON(!pipe->nrbufs);
1909                 ibuf = &pipe->bufs[pipe->curbuf];
1910                 obuf = &bufs[nbuf];
1911
1912                 if (rem >= ibuf->len) {
1913                         *obuf = *ibuf;
1914                         ibuf->ops = NULL;
1915                         pipe->curbuf = (pipe->curbuf + 1) & (pipe->buffers - 1);
1916                         pipe->nrbufs--;
1917                 } else {
1918                         ibuf->ops->get(pipe, ibuf);
1919                         *obuf = *ibuf;
1920                         obuf->flags &= ~PIPE_BUF_FLAG_GIFT;
1921                         obuf->len = rem;
1922                         ibuf->offset += obuf->len;
1923                         ibuf->len -= obuf->len;
1924                 }
1925                 nbuf++;
1926                 rem -= obuf->len;
1927         }
1928         pipe_unlock(pipe);
1929
1930         fuse_copy_init(&cs, fc, 0, NULL, nbuf);
1931         cs.pipebufs = bufs;
1932         cs.pipe = pipe;
1933
1934         if (flags & SPLICE_F_MOVE)
1935                 cs.move_pages = 1;
1936
1937         ret = fuse_dev_do_write(fc, &cs, len);
1938
1939         for (idx = 0; idx < nbuf; idx++) {
1940                 struct pipe_buffer *buf = &bufs[idx];
1941                 buf->ops->release(pipe, buf);
1942         }
1943 out:
1944         kfree(bufs);
1945         return ret;
1946 }
1947
1948 static unsigned fuse_dev_poll(struct file *file, poll_table *wait)
1949 {
1950         unsigned mask = POLLOUT | POLLWRNORM;
1951         struct fuse_conn *fc = fuse_get_conn(file);
1952         if (!fc)
1953                 return POLLERR;
1954
1955         poll_wait(file, &fc->waitq, wait);
1956
1957         spin_lock(&fc->lock);
1958         if (!fc->connected)
1959                 mask = POLLERR;
1960         else if (request_pending(fc))
1961                 mask |= POLLIN | POLLRDNORM;
1962         spin_unlock(&fc->lock);
1963
1964         return mask;
1965 }
1966
1967 /*
1968  * Abort all requests on the given list (pending or processing)
1969  *
1970  * This function releases and reacquires fc->lock
1971  */
1972 static void end_requests(struct fuse_conn *fc, struct list_head *head)
1973 __releases(fc->lock)
1974 __acquires(fc->lock)
1975 {
1976         while (!list_empty(head)) {
1977                 struct fuse_req *req;
1978                 req = list_entry(head->next, struct fuse_req, list);
1979                 req->out.h.error = -ECONNABORTED;
1980                 request_end(fc, req);
1981                 spin_lock(&fc->lock);
1982         }
1983 }
1984
1985 /*
1986  * Abort requests under I/O
1987  *
1988  * The requests are set to aborted and finished, and the request
1989  * waiter is woken up.  This will make request_wait_answer() wait
1990  * until the request is unlocked and then return.
1991  *
1992  * If the request is asynchronous, then the end function needs to be
1993  * called after waiting for the request to be unlocked (if it was
1994  * locked).
1995  */
1996 static void end_io_requests(struct fuse_conn *fc)
1997 __releases(fc->lock)
1998 __acquires(fc->lock)
1999 {
2000         while (!list_empty(&fc->io)) {
2001                 struct fuse_req *req =
2002                         list_entry(fc->io.next, struct fuse_req, list);
2003                 void (*end) (struct fuse_conn *, struct fuse_req *) = req->end;
2004
2005                 req->aborted = 1;
2006                 req->out.h.error = -ECONNABORTED;
2007                 req->state = FUSE_REQ_FINISHED;
2008                 list_del_init(&req->list);
2009                 wake_up(&req->waitq);
2010                 if (end) {
2011                         req->end = NULL;
2012                         __fuse_get_request(req);
2013                         spin_unlock(&fc->lock);
2014                         wait_event(req->waitq, !req->locked);
2015                         end(fc, req);
2016                         fuse_put_request(fc, req);
2017                         spin_lock(&fc->lock);
2018                 }
2019         }
2020 }
2021
2022 static void end_queued_requests(struct fuse_conn *fc)
2023 __releases(fc->lock)
2024 __acquires(fc->lock)
2025 {
2026         fc->max_background = UINT_MAX;
2027         flush_bg_queue(fc);
2028         end_requests(fc, &fc->pending);
2029         end_requests(fc, &fc->processing);
2030         while (forget_pending(fc))
2031                 kfree(dequeue_forget(fc, 1, NULL));
2032 }
2033
2034 static void end_polls(struct fuse_conn *fc)
2035 {
2036         struct rb_node *p;
2037
2038         p = rb_first(&fc->polled_files);
2039
2040         while (p) {
2041                 struct fuse_file *ff;
2042                 ff = rb_entry(p, struct fuse_file, polled_node);
2043                 wake_up_interruptible_all(&ff->poll_wait);
2044
2045                 p = rb_next(p);
2046         }
2047 }
2048
2049 /*
2050  * Abort all requests.
2051  *
2052  * Emergency exit in case of a malicious or accidental deadlock, or
2053  * just a hung filesystem.
2054  *
2055  * The same effect is usually achievable through killing the
2056  * filesystem daemon and all users of the filesystem.  The exception
2057  * is the combination of an asynchronous request and the tricky
2058  * deadlock (see Documentation/filesystems/fuse.txt).
2059  *
2060  * During the aborting, progression of requests from the pending and
2061  * processing lists onto the io list, and progression of new requests
2062  * onto the pending list is prevented by req->connected being false.
2063  *
2064  * Progression of requests under I/O to the processing list is
2065  * prevented by the req->aborted flag being true for these requests.
2066  * For this reason requests on the io list must be aborted first.
2067  */
2068 void fuse_abort_conn(struct fuse_conn *fc)
2069 {
2070         spin_lock(&fc->lock);
2071         if (fc->connected) {
2072                 fc->connected = 0;
2073                 fc->blocked = 0;
2074                 end_io_requests(fc);
2075                 end_queued_requests(fc);
2076                 end_polls(fc);
2077                 wake_up_all(&fc->waitq);
2078                 wake_up_all(&fc->blocked_waitq);
2079                 kill_fasync(&fc->fasync, SIGIO, POLL_IN);
2080         }
2081         spin_unlock(&fc->lock);
2082 }
2083 EXPORT_SYMBOL_GPL(fuse_abort_conn);
2084
2085 int fuse_dev_release(struct inode *inode, struct file *file)
2086 {
2087         struct fuse_conn *fc = fuse_get_conn(file);
2088         if (fc) {
2089                 spin_lock(&fc->lock);
2090                 fc->connected = 0;
2091                 fc->blocked = 0;
2092                 end_queued_requests(fc);
2093                 end_polls(fc);
2094                 wake_up_all(&fc->blocked_waitq);
2095                 spin_unlock(&fc->lock);
2096                 fuse_conn_put(fc);
2097         }
2098
2099         return 0;
2100 }
2101 EXPORT_SYMBOL_GPL(fuse_dev_release);
2102
2103 static int fuse_dev_fasync(int fd, struct file *file, int on)
2104 {
2105         struct fuse_conn *fc = fuse_get_conn(file);
2106         if (!fc)
2107                 return -EPERM;
2108
2109         /* No locking - fasync_helper does its own locking */
2110         return fasync_helper(fd, file, on, &fc->fasync);
2111 }
2112
2113 const struct file_operations fuse_dev_operations = {
2114         .owner          = THIS_MODULE,
2115         .llseek         = no_llseek,
2116         .read           = do_sync_read,
2117         .aio_read       = fuse_dev_read,
2118         .splice_read    = fuse_dev_splice_read,
2119         .write          = do_sync_write,
2120         .aio_write      = fuse_dev_write,
2121         .splice_write   = fuse_dev_splice_write,
2122         .poll           = fuse_dev_poll,
2123         .release        = fuse_dev_release,
2124         .fasync         = fuse_dev_fasync,
2125 };
2126 EXPORT_SYMBOL_GPL(fuse_dev_operations);
2127
2128 static struct miscdevice fuse_miscdevice = {
2129         .minor = FUSE_MINOR,
2130         .name  = "fuse",
2131         .fops = &fuse_dev_operations,
2132 };
2133
2134 int __init fuse_dev_init(void)
2135 {
2136         int err = -ENOMEM;
2137         fuse_req_cachep = kmem_cache_create("fuse_request",
2138                                             sizeof(struct fuse_req),
2139                                             0, 0, NULL);
2140         if (!fuse_req_cachep)
2141                 goto out;
2142
2143         err = misc_register(&fuse_miscdevice);
2144         if (err)
2145                 goto out_cache_clean;
2146
2147         return 0;
2148
2149  out_cache_clean:
2150         kmem_cache_destroy(fuse_req_cachep);
2151  out:
2152         return err;
2153 }
2154
2155 void fuse_dev_cleanup(void)
2156 {
2157         misc_deregister(&fuse_miscdevice);
2158         kmem_cache_destroy(fuse_req_cachep);
2159 }