317f9ee9c99178d7afd774ff424bbd63fab2ff17
[platform/adaptation/renesas_rcar/renesas_kernel.git] / fs / cachefiles / rdwr.c
1 /* Storage object read/write
2  *
3  * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
4  * Written by David Howells (dhowells@redhat.com)
5  *
6  * This program is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU General Public Licence
8  * as published by the Free Software Foundation; either version
9  * 2 of the Licence, or (at your option) any later version.
10  */
11
12 #include <linux/mount.h>
13 #include <linux/slab.h>
14 #include <linux/file.h>
15 #include "internal.h"
16
17 /*
18  * detect wake up events generated by the unlocking of pages in which we're
19  * interested
20  * - we use this to detect read completion of backing pages
21  * - the caller holds the waitqueue lock
22  */
23 static int cachefiles_read_waiter(wait_queue_t *wait, unsigned mode,
24                                   int sync, void *_key)
25 {
26         struct cachefiles_one_read *monitor =
27                 container_of(wait, struct cachefiles_one_read, monitor);
28         struct cachefiles_object *object;
29         struct wait_bit_key *key = _key;
30         struct page *page = wait->private;
31
32         ASSERT(key);
33
34         _enter("{%lu},%u,%d,{%p,%u}",
35                monitor->netfs_page->index, mode, sync,
36                key->flags, key->bit_nr);
37
38         if (key->flags != &page->flags ||
39             key->bit_nr != PG_locked)
40                 return 0;
41
42         _debug("--- monitor %p %lx ---", page, page->flags);
43
44         if (!PageUptodate(page) && !PageError(page)) {
45                 /* unlocked, not uptodate and not erronous? */
46                 _debug("page probably truncated");
47         }
48
49         /* remove from the waitqueue */
50         list_del(&wait->task_list);
51
52         /* move onto the action list and queue for FS-Cache thread pool */
53         ASSERT(monitor->op);
54
55         object = container_of(monitor->op->op.object,
56                               struct cachefiles_object, fscache);
57
58         spin_lock(&object->work_lock);
59         list_add_tail(&monitor->op_link, &monitor->op->to_do);
60         spin_unlock(&object->work_lock);
61
62         fscache_enqueue_retrieval(monitor->op);
63         return 0;
64 }
65
66 /*
67  * handle a probably truncated page
68  * - check to see if the page is still relevant and reissue the read if
69  *   possible
70  * - return -EIO on error, -ENODATA if the page is gone, -EINPROGRESS if we
71  *   must wait again and 0 if successful
72  */
73 static int cachefiles_read_reissue(struct cachefiles_object *object,
74                                    struct cachefiles_one_read *monitor)
75 {
76         struct address_space *bmapping = object->backer->d_inode->i_mapping;
77         struct page *backpage = monitor->back_page, *backpage2;
78         int ret;
79
80         _enter("{ino=%lx},{%lx,%lx}",
81                object->backer->d_inode->i_ino,
82                backpage->index, backpage->flags);
83
84         /* skip if the page was truncated away completely */
85         if (backpage->mapping != bmapping) {
86                 _leave(" = -ENODATA [mapping]");
87                 return -ENODATA;
88         }
89
90         backpage2 = find_get_page(bmapping, backpage->index);
91         if (!backpage2) {
92                 _leave(" = -ENODATA [gone]");
93                 return -ENODATA;
94         }
95
96         if (backpage != backpage2) {
97                 put_page(backpage2);
98                 _leave(" = -ENODATA [different]");
99                 return -ENODATA;
100         }
101
102         /* the page is still there and we already have a ref on it, so we don't
103          * need a second */
104         put_page(backpage2);
105
106         INIT_LIST_HEAD(&monitor->op_link);
107         add_page_wait_queue(backpage, &monitor->monitor);
108
109         if (trylock_page(backpage)) {
110                 ret = -EIO;
111                 if (PageError(backpage))
112                         goto unlock_discard;
113                 ret = 0;
114                 if (PageUptodate(backpage))
115                         goto unlock_discard;
116
117                 _debug("reissue read");
118                 ret = bmapping->a_ops->readpage(NULL, backpage);
119                 if (ret < 0)
120                         goto unlock_discard;
121         }
122
123         /* but the page may have been read before the monitor was installed, so
124          * the monitor may miss the event - so we have to ensure that we do get
125          * one in such a case */
126         if (trylock_page(backpage)) {
127                 _debug("jumpstart %p {%lx}", backpage, backpage->flags);
128                 unlock_page(backpage);
129         }
130
131         /* it'll reappear on the todo list */
132         _leave(" = -EINPROGRESS");
133         return -EINPROGRESS;
134
135 unlock_discard:
136         unlock_page(backpage);
137         spin_lock_irq(&object->work_lock);
138         list_del(&monitor->op_link);
139         spin_unlock_irq(&object->work_lock);
140         _leave(" = %d", ret);
141         return ret;
142 }
143
144 /*
145  * copy data from backing pages to netfs pages to complete a read operation
146  * - driven by FS-Cache's thread pool
147  */
148 static void cachefiles_read_copier(struct fscache_operation *_op)
149 {
150         struct cachefiles_one_read *monitor;
151         struct cachefiles_object *object;
152         struct fscache_retrieval *op;
153         struct pagevec pagevec;
154         int error, max;
155
156         op = container_of(_op, struct fscache_retrieval, op);
157         object = container_of(op->op.object,
158                               struct cachefiles_object, fscache);
159
160         _enter("{ino=%lu}", object->backer->d_inode->i_ino);
161
162         pagevec_init(&pagevec, 0);
163
164         max = 8;
165         spin_lock_irq(&object->work_lock);
166
167         while (!list_empty(&op->to_do)) {
168                 monitor = list_entry(op->to_do.next,
169                                      struct cachefiles_one_read, op_link);
170                 list_del(&monitor->op_link);
171
172                 spin_unlock_irq(&object->work_lock);
173
174                 _debug("- copy {%lu}", monitor->back_page->index);
175
176         recheck:
177                 if (test_bit(FSCACHE_COOKIE_INVALIDATING,
178                              &object->fscache.cookie->flags)) {
179                         error = -ESTALE;
180                 } else if (PageUptodate(monitor->back_page)) {
181                         copy_highpage(monitor->netfs_page, monitor->back_page);
182                         fscache_mark_page_cached(monitor->op,
183                                                  monitor->netfs_page);
184                         error = 0;
185                 } else if (!PageError(monitor->back_page)) {
186                         /* the page has probably been truncated */
187                         error = cachefiles_read_reissue(object, monitor);
188                         if (error == -EINPROGRESS)
189                                 goto next;
190                         goto recheck;
191                 } else {
192                         cachefiles_io_error_obj(
193                                 object,
194                                 "Readpage failed on backing file %lx",
195                                 (unsigned long) monitor->back_page->flags);
196                         error = -EIO;
197                 }
198
199                 page_cache_release(monitor->back_page);
200
201                 fscache_end_io(op, monitor->netfs_page, error);
202                 page_cache_release(monitor->netfs_page);
203                 fscache_retrieval_complete(op, 1);
204                 fscache_put_retrieval(op);
205                 kfree(monitor);
206
207         next:
208                 /* let the thread pool have some air occasionally */
209                 max--;
210                 if (max < 0 || need_resched()) {
211                         if (!list_empty(&op->to_do))
212                                 fscache_enqueue_retrieval(op);
213                         _leave(" [maxed out]");
214                         return;
215                 }
216
217                 spin_lock_irq(&object->work_lock);
218         }
219
220         spin_unlock_irq(&object->work_lock);
221         _leave("");
222 }
223
224 /*
225  * read the corresponding page to the given set from the backing file
226  * - an uncertain page is simply discarded, to be tried again another time
227  */
228 static int cachefiles_read_backing_file_one(struct cachefiles_object *object,
229                                             struct fscache_retrieval *op,
230                                             struct page *netpage,
231                                             struct pagevec *pagevec)
232 {
233         struct cachefiles_one_read *monitor;
234         struct address_space *bmapping;
235         struct page *newpage, *backpage;
236         int ret;
237
238         _enter("");
239
240         pagevec_reinit(pagevec);
241
242         _debug("read back %p{%lu,%d}",
243                netpage, netpage->index, page_count(netpage));
244
245         monitor = kzalloc(sizeof(*monitor), cachefiles_gfp);
246         if (!monitor)
247                 goto nomem;
248
249         monitor->netfs_page = netpage;
250         monitor->op = fscache_get_retrieval(op);
251
252         init_waitqueue_func_entry(&monitor->monitor, cachefiles_read_waiter);
253
254         /* attempt to get hold of the backing page */
255         bmapping = object->backer->d_inode->i_mapping;
256         newpage = NULL;
257
258         for (;;) {
259                 backpage = find_get_page(bmapping, netpage->index);
260                 if (backpage)
261                         goto backing_page_already_present;
262
263                 if (!newpage) {
264                         newpage = __page_cache_alloc(cachefiles_gfp |
265                                                      __GFP_COLD);
266                         if (!newpage)
267                                 goto nomem_monitor;
268                 }
269
270                 ret = add_to_page_cache(newpage, bmapping,
271                                         netpage->index, cachefiles_gfp);
272                 if (ret == 0)
273                         goto installed_new_backing_page;
274                 if (ret != -EEXIST)
275                         goto nomem_page;
276         }
277
278         /* we've installed a new backing page, so now we need to add it
279          * to the LRU list and start it reading */
280 installed_new_backing_page:
281         _debug("- new %p", newpage);
282
283         backpage = newpage;
284         newpage = NULL;
285
286         page_cache_get(backpage);
287         pagevec_add(pagevec, backpage);
288         __pagevec_lru_add_file(pagevec);
289
290 read_backing_page:
291         ret = bmapping->a_ops->readpage(NULL, backpage);
292         if (ret < 0)
293                 goto read_error;
294
295         /* set the monitor to transfer the data across */
296 monitor_backing_page:
297         _debug("- monitor add");
298
299         /* install the monitor */
300         page_cache_get(monitor->netfs_page);
301         page_cache_get(backpage);
302         monitor->back_page = backpage;
303         monitor->monitor.private = backpage;
304         add_page_wait_queue(backpage, &monitor->monitor);
305         monitor = NULL;
306
307         /* but the page may have been read before the monitor was installed, so
308          * the monitor may miss the event - so we have to ensure that we do get
309          * one in such a case */
310         if (trylock_page(backpage)) {
311                 _debug("jumpstart %p {%lx}", backpage, backpage->flags);
312                 unlock_page(backpage);
313         }
314         goto success;
315
316         /* if the backing page is already present, it can be in one of
317          * three states: read in progress, read failed or read okay */
318 backing_page_already_present:
319         _debug("- present");
320
321         if (newpage) {
322                 page_cache_release(newpage);
323                 newpage = NULL;
324         }
325
326         if (PageError(backpage))
327                 goto io_error;
328
329         if (PageUptodate(backpage))
330                 goto backing_page_already_uptodate;
331
332         if (!trylock_page(backpage))
333                 goto monitor_backing_page;
334         _debug("read %p {%lx}", backpage, backpage->flags);
335         goto read_backing_page;
336
337         /* the backing page is already up to date, attach the netfs
338          * page to the pagecache and LRU and copy the data across */
339 backing_page_already_uptodate:
340         _debug("- uptodate");
341
342         fscache_mark_page_cached(op, netpage);
343
344         copy_highpage(netpage, backpage);
345         fscache_end_io(op, netpage, 0);
346         fscache_retrieval_complete(op, 1);
347
348 success:
349         _debug("success");
350         ret = 0;
351
352 out:
353         if (backpage)
354                 page_cache_release(backpage);
355         if (monitor) {
356                 fscache_put_retrieval(monitor->op);
357                 kfree(monitor);
358         }
359         _leave(" = %d", ret);
360         return ret;
361
362 read_error:
363         _debug("read error %d", ret);
364         if (ret == -ENOMEM) {
365                 fscache_retrieval_complete(op, 1);
366                 goto out;
367         }
368 io_error:
369         cachefiles_io_error_obj(object, "Page read error on backing file");
370         fscache_retrieval_complete(op, 1);
371         ret = -ENOBUFS;
372         goto out;
373
374 nomem_page:
375         page_cache_release(newpage);
376 nomem_monitor:
377         fscache_put_retrieval(monitor->op);
378         kfree(monitor);
379 nomem:
380         fscache_retrieval_complete(op, 1);
381         _leave(" = -ENOMEM");
382         return -ENOMEM;
383 }
384
385 /*
386  * read a page from the cache or allocate a block in which to store it
387  * - cache withdrawal is prevented by the caller
388  * - returns -EINTR if interrupted
389  * - returns -ENOMEM if ran out of memory
390  * - returns -ENOBUFS if no buffers can be made available
391  * - returns -ENOBUFS if page is beyond EOF
392  * - if the page is backed by a block in the cache:
393  *   - a read will be started which will call the callback on completion
394  *   - 0 will be returned
395  * - else if the page is unbacked:
396  *   - the metadata will be retained
397  *   - -ENODATA will be returned
398  */
399 int cachefiles_read_or_alloc_page(struct fscache_retrieval *op,
400                                   struct page *page,
401                                   gfp_t gfp)
402 {
403         struct cachefiles_object *object;
404         struct cachefiles_cache *cache;
405         struct pagevec pagevec;
406         struct inode *inode;
407         sector_t block0, block;
408         unsigned shift;
409         int ret;
410
411         object = container_of(op->op.object,
412                               struct cachefiles_object, fscache);
413         cache = container_of(object->fscache.cache,
414                              struct cachefiles_cache, cache);
415
416         _enter("{%p},{%lx},,,", object, page->index);
417
418         if (!object->backer)
419                 goto enobufs;
420
421         inode = object->backer->d_inode;
422         ASSERT(S_ISREG(inode->i_mode));
423         ASSERT(inode->i_mapping->a_ops->bmap);
424         ASSERT(inode->i_mapping->a_ops->readpages);
425
426         /* calculate the shift required to use bmap */
427         if (inode->i_sb->s_blocksize > PAGE_SIZE)
428                 goto enobufs;
429
430         shift = PAGE_SHIFT - inode->i_sb->s_blocksize_bits;
431
432         op->op.flags &= FSCACHE_OP_KEEP_FLAGS;
433         op->op.flags |= FSCACHE_OP_ASYNC;
434         op->op.processor = cachefiles_read_copier;
435
436         pagevec_init(&pagevec, 0);
437
438         /* we assume the absence or presence of the first block is a good
439          * enough indication for the page as a whole
440          * - TODO: don't use bmap() for this as it is _not_ actually good
441          *   enough for this as it doesn't indicate errors, but it's all we've
442          *   got for the moment
443          */
444         block0 = page->index;
445         block0 <<= shift;
446
447         block = inode->i_mapping->a_ops->bmap(inode->i_mapping, block0);
448         _debug("%llx -> %llx",
449                (unsigned long long) block0,
450                (unsigned long long) block);
451
452         if (block) {
453                 /* submit the apparently valid page to the backing fs to be
454                  * read from disk */
455                 ret = cachefiles_read_backing_file_one(object, op, page,
456                                                        &pagevec);
457         } else if (cachefiles_has_space(cache, 0, 1) == 0) {
458                 /* there's space in the cache we can use */
459                 fscache_mark_page_cached(op, page);
460                 fscache_retrieval_complete(op, 1);
461                 ret = -ENODATA;
462         } else {
463                 goto enobufs;
464         }
465
466         _leave(" = %d", ret);
467         return ret;
468
469 enobufs:
470         fscache_retrieval_complete(op, 1);
471         _leave(" = -ENOBUFS");
472         return -ENOBUFS;
473 }
474
475 /*
476  * read the corresponding pages to the given set from the backing file
477  * - any uncertain pages are simply discarded, to be tried again another time
478  */
479 static int cachefiles_read_backing_file(struct cachefiles_object *object,
480                                         struct fscache_retrieval *op,
481                                         struct list_head *list)
482 {
483         struct cachefiles_one_read *monitor = NULL;
484         struct address_space *bmapping = object->backer->d_inode->i_mapping;
485         struct pagevec lru_pvec;
486         struct page *newpage = NULL, *netpage, *_n, *backpage = NULL;
487         int ret = 0;
488
489         _enter("");
490
491         pagevec_init(&lru_pvec, 0);
492
493         list_for_each_entry_safe(netpage, _n, list, lru) {
494                 list_del(&netpage->lru);
495
496                 _debug("read back %p{%lu,%d}",
497                        netpage, netpage->index, page_count(netpage));
498
499                 if (!monitor) {
500                         monitor = kzalloc(sizeof(*monitor), cachefiles_gfp);
501                         if (!monitor)
502                                 goto nomem;
503
504                         monitor->op = fscache_get_retrieval(op);
505                         init_waitqueue_func_entry(&monitor->monitor,
506                                                   cachefiles_read_waiter);
507                 }
508
509                 for (;;) {
510                         backpage = find_get_page(bmapping, netpage->index);
511                         if (backpage)
512                                 goto backing_page_already_present;
513
514                         if (!newpage) {
515                                 newpage = __page_cache_alloc(cachefiles_gfp |
516                                                              __GFP_COLD);
517                                 if (!newpage)
518                                         goto nomem;
519                         }
520
521                         ret = add_to_page_cache(newpage, bmapping,
522                                                 netpage->index, cachefiles_gfp);
523                         if (ret == 0)
524                                 goto installed_new_backing_page;
525                         if (ret != -EEXIST)
526                                 goto nomem;
527                 }
528
529                 /* we've installed a new backing page, so now we need to add it
530                  * to the LRU list and start it reading */
531         installed_new_backing_page:
532                 _debug("- new %p", newpage);
533
534                 backpage = newpage;
535                 newpage = NULL;
536
537                 page_cache_get(backpage);
538                 if (!pagevec_add(&lru_pvec, backpage))
539                         __pagevec_lru_add_file(&lru_pvec);
540
541         reread_backing_page:
542                 ret = bmapping->a_ops->readpage(NULL, backpage);
543                 if (ret < 0)
544                         goto read_error;
545
546                 /* add the netfs page to the pagecache and LRU, and set the
547                  * monitor to transfer the data across */
548         monitor_backing_page:
549                 _debug("- monitor add");
550
551                 ret = add_to_page_cache(netpage, op->mapping, netpage->index,
552                                         cachefiles_gfp);
553                 if (ret < 0) {
554                         if (ret == -EEXIST) {
555                                 page_cache_release(netpage);
556                                 fscache_retrieval_complete(op, 1);
557                                 continue;
558                         }
559                         goto nomem;
560                 }
561
562                 page_cache_get(netpage);
563                 if (!pagevec_add(&lru_pvec, netpage))
564                         __pagevec_lru_add_file(&lru_pvec);
565
566                 /* install a monitor */
567                 page_cache_get(netpage);
568                 monitor->netfs_page = netpage;
569
570                 page_cache_get(backpage);
571                 monitor->back_page = backpage;
572                 monitor->monitor.private = backpage;
573                 add_page_wait_queue(backpage, &monitor->monitor);
574                 monitor = NULL;
575
576                 /* but the page may have been read before the monitor was
577                  * installed, so the monitor may miss the event - so we have to
578                  * ensure that we do get one in such a case */
579                 if (trylock_page(backpage)) {
580                         _debug("2unlock %p {%lx}", backpage, backpage->flags);
581                         unlock_page(backpage);
582                 }
583
584                 page_cache_release(backpage);
585                 backpage = NULL;
586
587                 page_cache_release(netpage);
588                 netpage = NULL;
589                 continue;
590
591                 /* if the backing page is already present, it can be in one of
592                  * three states: read in progress, read failed or read okay */
593         backing_page_already_present:
594                 _debug("- present %p", backpage);
595
596                 if (PageError(backpage))
597                         goto io_error;
598
599                 if (PageUptodate(backpage))
600                         goto backing_page_already_uptodate;
601
602                 _debug("- not ready %p{%lx}", backpage, backpage->flags);
603
604                 if (!trylock_page(backpage))
605                         goto monitor_backing_page;
606
607                 if (PageError(backpage)) {
608                         _debug("error %lx", backpage->flags);
609                         unlock_page(backpage);
610                         goto io_error;
611                 }
612
613                 if (PageUptodate(backpage))
614                         goto backing_page_already_uptodate_unlock;
615
616                 /* we've locked a page that's neither up to date nor erroneous,
617                  * so we need to attempt to read it again */
618                 goto reread_backing_page;
619
620                 /* the backing page is already up to date, attach the netfs
621                  * page to the pagecache and LRU and copy the data across */
622         backing_page_already_uptodate_unlock:
623                 _debug("uptodate %lx", backpage->flags);
624                 unlock_page(backpage);
625         backing_page_already_uptodate:
626                 _debug("- uptodate");
627
628                 ret = add_to_page_cache(netpage, op->mapping, netpage->index,
629                                         cachefiles_gfp);
630                 if (ret < 0) {
631                         if (ret == -EEXIST) {
632                                 page_cache_release(netpage);
633                                 fscache_retrieval_complete(op, 1);
634                                 continue;
635                         }
636                         goto nomem;
637                 }
638
639                 copy_highpage(netpage, backpage);
640
641                 page_cache_release(backpage);
642                 backpage = NULL;
643
644                 fscache_mark_page_cached(op, netpage);
645
646                 page_cache_get(netpage);
647                 if (!pagevec_add(&lru_pvec, netpage))
648                         __pagevec_lru_add_file(&lru_pvec);
649
650                 /* the netpage is unlocked and marked up to date here */
651                 fscache_end_io(op, netpage, 0);
652                 page_cache_release(netpage);
653                 netpage = NULL;
654                 fscache_retrieval_complete(op, 1);
655                 continue;
656         }
657
658         netpage = NULL;
659
660         _debug("out");
661
662 out:
663         /* tidy up */
664         pagevec_lru_add_file(&lru_pvec);
665
666         if (newpage)
667                 page_cache_release(newpage);
668         if (netpage)
669                 page_cache_release(netpage);
670         if (backpage)
671                 page_cache_release(backpage);
672         if (monitor) {
673                 fscache_put_retrieval(op);
674                 kfree(monitor);
675         }
676
677         list_for_each_entry_safe(netpage, _n, list, lru) {
678                 list_del(&netpage->lru);
679                 page_cache_release(netpage);
680                 fscache_retrieval_complete(op, 1);
681         }
682
683         _leave(" = %d", ret);
684         return ret;
685
686 nomem:
687         _debug("nomem");
688         ret = -ENOMEM;
689         goto record_page_complete;
690
691 read_error:
692         _debug("read error %d", ret);
693         if (ret == -ENOMEM)
694                 goto record_page_complete;
695 io_error:
696         cachefiles_io_error_obj(object, "Page read error on backing file");
697         ret = -ENOBUFS;
698 record_page_complete:
699         fscache_retrieval_complete(op, 1);
700         goto out;
701 }
702
703 /*
704  * read a list of pages from the cache or allocate blocks in which to store
705  * them
706  */
707 int cachefiles_read_or_alloc_pages(struct fscache_retrieval *op,
708                                    struct list_head *pages,
709                                    unsigned *nr_pages,
710                                    gfp_t gfp)
711 {
712         struct cachefiles_object *object;
713         struct cachefiles_cache *cache;
714         struct list_head backpages;
715         struct pagevec pagevec;
716         struct inode *inode;
717         struct page *page, *_n;
718         unsigned shift, nrbackpages;
719         int ret, ret2, space;
720
721         object = container_of(op->op.object,
722                               struct cachefiles_object, fscache);
723         cache = container_of(object->fscache.cache,
724                              struct cachefiles_cache, cache);
725
726         _enter("{OBJ%x,%d},,%d,,",
727                object->fscache.debug_id, atomic_read(&op->op.usage),
728                *nr_pages);
729
730         if (!object->backer)
731                 goto all_enobufs;
732
733         space = 1;
734         if (cachefiles_has_space(cache, 0, *nr_pages) < 0)
735                 space = 0;
736
737         inode = object->backer->d_inode;
738         ASSERT(S_ISREG(inode->i_mode));
739         ASSERT(inode->i_mapping->a_ops->bmap);
740         ASSERT(inode->i_mapping->a_ops->readpages);
741
742         /* calculate the shift required to use bmap */
743         if (inode->i_sb->s_blocksize > PAGE_SIZE)
744                 goto all_enobufs;
745
746         shift = PAGE_SHIFT - inode->i_sb->s_blocksize_bits;
747
748         pagevec_init(&pagevec, 0);
749
750         op->op.flags &= FSCACHE_OP_KEEP_FLAGS;
751         op->op.flags |= FSCACHE_OP_ASYNC;
752         op->op.processor = cachefiles_read_copier;
753
754         INIT_LIST_HEAD(&backpages);
755         nrbackpages = 0;
756
757         ret = space ? -ENODATA : -ENOBUFS;
758         list_for_each_entry_safe(page, _n, pages, lru) {
759                 sector_t block0, block;
760
761                 /* we assume the absence or presence of the first block is a
762                  * good enough indication for the page as a whole
763                  * - TODO: don't use bmap() for this as it is _not_ actually
764                  *   good enough for this as it doesn't indicate errors, but
765                  *   it's all we've got for the moment
766                  */
767                 block0 = page->index;
768                 block0 <<= shift;
769
770                 block = inode->i_mapping->a_ops->bmap(inode->i_mapping,
771                                                       block0);
772                 _debug("%llx -> %llx",
773                        (unsigned long long) block0,
774                        (unsigned long long) block);
775
776                 if (block) {
777                         /* we have data - add it to the list to give to the
778                          * backing fs */
779                         list_move(&page->lru, &backpages);
780                         (*nr_pages)--;
781                         nrbackpages++;
782                 } else if (space && pagevec_add(&pagevec, page) == 0) {
783                         fscache_mark_pages_cached(op, &pagevec);
784                         fscache_retrieval_complete(op, 1);
785                         ret = -ENODATA;
786                 } else {
787                         fscache_retrieval_complete(op, 1);
788                 }
789         }
790
791         if (pagevec_count(&pagevec) > 0)
792                 fscache_mark_pages_cached(op, &pagevec);
793
794         if (list_empty(pages))
795                 ret = 0;
796
797         /* submit the apparently valid pages to the backing fs to be read from
798          * disk */
799         if (nrbackpages > 0) {
800                 ret2 = cachefiles_read_backing_file(object, op, &backpages);
801                 if (ret2 == -ENOMEM || ret2 == -EINTR)
802                         ret = ret2;
803         }
804
805         _leave(" = %d [nr=%u%s]",
806                ret, *nr_pages, list_empty(pages) ? " empty" : "");
807         return ret;
808
809 all_enobufs:
810         fscache_retrieval_complete(op, *nr_pages);
811         return -ENOBUFS;
812 }
813
814 /*
815  * allocate a block in the cache in which to store a page
816  * - cache withdrawal is prevented by the caller
817  * - returns -EINTR if interrupted
818  * - returns -ENOMEM if ran out of memory
819  * - returns -ENOBUFS if no buffers can be made available
820  * - returns -ENOBUFS if page is beyond EOF
821  * - otherwise:
822  *   - the metadata will be retained
823  *   - 0 will be returned
824  */
825 int cachefiles_allocate_page(struct fscache_retrieval *op,
826                              struct page *page,
827                              gfp_t gfp)
828 {
829         struct cachefiles_object *object;
830         struct cachefiles_cache *cache;
831         int ret;
832
833         object = container_of(op->op.object,
834                               struct cachefiles_object, fscache);
835         cache = container_of(object->fscache.cache,
836                              struct cachefiles_cache, cache);
837
838         _enter("%p,{%lx},", object, page->index);
839
840         ret = cachefiles_has_space(cache, 0, 1);
841         if (ret == 0)
842                 fscache_mark_page_cached(op, page);
843         else
844                 ret = -ENOBUFS;
845
846         fscache_retrieval_complete(op, 1);
847         _leave(" = %d", ret);
848         return ret;
849 }
850
851 /*
852  * allocate blocks in the cache in which to store a set of pages
853  * - cache withdrawal is prevented by the caller
854  * - returns -EINTR if interrupted
855  * - returns -ENOMEM if ran out of memory
856  * - returns -ENOBUFS if some buffers couldn't be made available
857  * - returns -ENOBUFS if some pages are beyond EOF
858  * - otherwise:
859  *   - -ENODATA will be returned
860  * - metadata will be retained for any page marked
861  */
862 int cachefiles_allocate_pages(struct fscache_retrieval *op,
863                               struct list_head *pages,
864                               unsigned *nr_pages,
865                               gfp_t gfp)
866 {
867         struct cachefiles_object *object;
868         struct cachefiles_cache *cache;
869         struct pagevec pagevec;
870         struct page *page;
871         int ret;
872
873         object = container_of(op->op.object,
874                               struct cachefiles_object, fscache);
875         cache = container_of(object->fscache.cache,
876                              struct cachefiles_cache, cache);
877
878         _enter("%p,,,%d,", object, *nr_pages);
879
880         ret = cachefiles_has_space(cache, 0, *nr_pages);
881         if (ret == 0) {
882                 pagevec_init(&pagevec, 0);
883
884                 list_for_each_entry(page, pages, lru) {
885                         if (pagevec_add(&pagevec, page) == 0)
886                                 fscache_mark_pages_cached(op, &pagevec);
887                 }
888
889                 if (pagevec_count(&pagevec) > 0)
890                         fscache_mark_pages_cached(op, &pagevec);
891                 ret = -ENODATA;
892         } else {
893                 ret = -ENOBUFS;
894         }
895
896         fscache_retrieval_complete(op, *nr_pages);
897         _leave(" = %d", ret);
898         return ret;
899 }
900
901 /*
902  * request a page be stored in the cache
903  * - cache withdrawal is prevented by the caller
904  * - this request may be ignored if there's no cache block available, in which
905  *   case -ENOBUFS will be returned
906  * - if the op is in progress, 0 will be returned
907  */
908 int cachefiles_write_page(struct fscache_storage *op, struct page *page)
909 {
910         struct cachefiles_object *object;
911         struct cachefiles_cache *cache;
912         mm_segment_t old_fs;
913         struct file *file;
914         struct path path;
915         loff_t pos, eof;
916         size_t len;
917         void *data;
918         int ret;
919
920         ASSERT(op != NULL);
921         ASSERT(page != NULL);
922
923         object = container_of(op->op.object,
924                               struct cachefiles_object, fscache);
925
926         _enter("%p,%p{%lx},,,", object, page, page->index);
927
928         if (!object->backer) {
929                 _leave(" = -ENOBUFS");
930                 return -ENOBUFS;
931         }
932
933         ASSERT(S_ISREG(object->backer->d_inode->i_mode));
934
935         cache = container_of(object->fscache.cache,
936                              struct cachefiles_cache, cache);
937
938         /* write the page to the backing filesystem and let it store it in its
939          * own time */
940         path.mnt = cache->mnt;
941         path.dentry = object->backer;
942         file = dentry_open(&path, O_RDWR | O_LARGEFILE, cache->cache_cred);
943         if (IS_ERR(file)) {
944                 ret = PTR_ERR(file);
945         } else {
946                 ret = -EIO;
947                 if (file->f_op->write) {
948                         pos = (loff_t) page->index << PAGE_SHIFT;
949
950                         /* we mustn't write more data than we have, so we have
951                          * to beware of a partial page at EOF */
952                         eof = object->fscache.store_limit_l;
953                         len = PAGE_SIZE;
954                         if (eof & ~PAGE_MASK) {
955                                 ASSERTCMP(pos, <, eof);
956                                 if (eof - pos < PAGE_SIZE) {
957                                         _debug("cut short %llx to %llx",
958                                                pos, eof);
959                                         len = eof - pos;
960                                         ASSERTCMP(pos + len, ==, eof);
961                                 }
962                         }
963
964                         data = kmap(page);
965                         file_start_write(file);
966                         old_fs = get_fs();
967                         set_fs(KERNEL_DS);
968                         ret = file->f_op->write(
969                                 file, (const void __user *) data, len, &pos);
970                         set_fs(old_fs);
971                         kunmap(page);
972                         file_end_write(file);
973                         if (ret != len)
974                                 ret = -EIO;
975                 }
976                 fput(file);
977         }
978
979         if (ret < 0) {
980                 if (ret == -EIO)
981                         cachefiles_io_error_obj(
982                                 object, "Write page to backing file failed");
983                 ret = -ENOBUFS;
984         }
985
986         _leave(" = %d", ret);
987         return ret;
988 }
989
990 /*
991  * detach a backing block from a page
992  * - cache withdrawal is prevented by the caller
993  */
994 void cachefiles_uncache_page(struct fscache_object *_object, struct page *page)
995 {
996         struct cachefiles_object *object;
997         struct cachefiles_cache *cache;
998
999         object = container_of(_object, struct cachefiles_object, fscache);
1000         cache = container_of(object->fscache.cache,
1001                              struct cachefiles_cache, cache);
1002
1003         _enter("%p,{%lu}", object, page->index);
1004
1005         spin_unlock(&object->fscache.cookie->lock);
1006 }