1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /* Network filesystem high-level read support.
4 * Copyright (C) 2021 Red Hat, Inc. All Rights Reserved.
5 * Written by David Howells (dhowells@redhat.com)
8 #include <linux/module.h>
9 #include <linux/export.h>
12 #include <linux/pagemap.h>
13 #include <linux/slab.h>
14 #include <linux/uio.h>
15 #include <linux/sched/mm.h>
16 #include <linux/task_io_accounting_ops.h>
18 #define CREATE_TRACE_POINTS
19 #include <trace/events/netfs.h>
21 MODULE_DESCRIPTION("Network fs support");
22 MODULE_AUTHOR("Red Hat, Inc.");
23 MODULE_LICENSE("GPL");
26 module_param_named(debug, netfs_debug, uint, S_IWUSR | S_IRUGO);
27 MODULE_PARM_DESC(netfs_debug, "Netfs support debugging mask");
30 * Clear the unread part of an I/O request.
32 static void netfs_clear_unread(struct netfs_io_subrequest *subreq)
36 iov_iter_xarray(&iter, READ, &subreq->rreq->mapping->i_pages,
37 subreq->start + subreq->transferred,
38 subreq->len - subreq->transferred);
39 iov_iter_zero(iov_iter_count(&iter), &iter);
42 static void netfs_cache_read_terminated(void *priv, ssize_t transferred_or_error,
45 struct netfs_io_subrequest *subreq = priv;
47 netfs_subreq_terminated(subreq, transferred_or_error, was_async);
51 * Issue a read against the cache.
52 * - Eats the caller's ref on subreq.
54 static void netfs_read_from_cache(struct netfs_io_request *rreq,
55 struct netfs_io_subrequest *subreq,
56 enum netfs_read_from_hole read_hole)
58 struct netfs_cache_resources *cres = &rreq->cache_resources;
61 netfs_stat(&netfs_n_rh_read);
62 iov_iter_xarray(&iter, READ, &rreq->mapping->i_pages,
63 subreq->start + subreq->transferred,
64 subreq->len - subreq->transferred);
66 cres->ops->read(cres, subreq->start, &iter, read_hole,
67 netfs_cache_read_terminated, subreq);
71 * Fill a subrequest region with zeroes.
73 static void netfs_fill_with_zeroes(struct netfs_io_request *rreq,
74 struct netfs_io_subrequest *subreq)
76 netfs_stat(&netfs_n_rh_zero);
77 __set_bit(NETFS_SREQ_CLEAR_TAIL, &subreq->flags);
78 netfs_subreq_terminated(subreq, 0, false);
82 * Ask the netfs to issue a read request to the server for us.
84 * The netfs is expected to read from subreq->pos + subreq->transferred to
85 * subreq->pos + subreq->len - 1. It may not backtrack and write data into the
86 * buffer prior to the transferred point as it might clobber dirty data
87 * obtained from the cache.
89 * Alternatively, the netfs is allowed to indicate one of two things:
91 * - NETFS_SREQ_SHORT_READ: A short read - it will get called again to try and
94 * - NETFS_SREQ_CLEAR_TAIL: A short read - the rest of the buffer will be
97 static void netfs_read_from_server(struct netfs_io_request *rreq,
98 struct netfs_io_subrequest *subreq)
100 netfs_stat(&netfs_n_rh_download);
101 rreq->netfs_ops->issue_read(subreq);
105 * Release those waiting.
107 static void netfs_rreq_completed(struct netfs_io_request *rreq, bool was_async)
109 trace_netfs_rreq(rreq, netfs_rreq_trace_done);
110 netfs_clear_subrequests(rreq, was_async);
111 netfs_put_request(rreq, was_async, netfs_rreq_trace_put_complete);
115 * Deal with the completion of writing the data to the cache. We have to clear
116 * the PG_fscache bits on the folios involved and release the caller's ref.
118 * May be called in softirq mode and we inherit a ref from the caller.
120 static void netfs_rreq_unmark_after_write(struct netfs_io_request *rreq,
123 struct netfs_io_subrequest *subreq;
125 pgoff_t unlocked = 0;
126 bool have_unlocked = false;
130 list_for_each_entry(subreq, &rreq->subrequests, rreq_link) {
131 XA_STATE(xas, &rreq->mapping->i_pages, subreq->start / PAGE_SIZE);
133 xas_for_each(&xas, folio, (subreq->start + subreq->len - 1) / PAGE_SIZE) {
134 /* We might have multiple writes from the same huge
135 * folio, but we mustn't unlock a folio more than once.
137 if (have_unlocked && folio_index(folio) <= unlocked)
139 unlocked = folio_index(folio);
140 folio_end_fscache(folio);
141 have_unlocked = true;
146 netfs_rreq_completed(rreq, was_async);
149 static void netfs_rreq_copy_terminated(void *priv, ssize_t transferred_or_error,
152 struct netfs_io_subrequest *subreq = priv;
153 struct netfs_io_request *rreq = subreq->rreq;
155 if (IS_ERR_VALUE(transferred_or_error)) {
156 netfs_stat(&netfs_n_rh_write_failed);
157 trace_netfs_failure(rreq, subreq, transferred_or_error,
158 netfs_fail_copy_to_cache);
160 netfs_stat(&netfs_n_rh_write_done);
163 trace_netfs_sreq(subreq, netfs_sreq_trace_write_term);
165 /* If we decrement nr_copy_ops to 0, the ref belongs to us. */
166 if (atomic_dec_and_test(&rreq->nr_copy_ops))
167 netfs_rreq_unmark_after_write(rreq, was_async);
169 netfs_put_subrequest(subreq, was_async, netfs_sreq_trace_put_terminated);
173 * Perform any outstanding writes to the cache. We inherit a ref from the
176 static void netfs_rreq_do_write_to_cache(struct netfs_io_request *rreq)
178 struct netfs_cache_resources *cres = &rreq->cache_resources;
179 struct netfs_io_subrequest *subreq, *next, *p;
180 struct iov_iter iter;
183 trace_netfs_rreq(rreq, netfs_rreq_trace_copy);
185 /* We don't want terminating writes trying to wake us up whilst we're
186 * still going through the list.
188 atomic_inc(&rreq->nr_copy_ops);
190 list_for_each_entry_safe(subreq, p, &rreq->subrequests, rreq_link) {
191 if (!test_bit(NETFS_SREQ_COPY_TO_CACHE, &subreq->flags)) {
192 list_del_init(&subreq->rreq_link);
193 netfs_put_subrequest(subreq, false,
194 netfs_sreq_trace_put_no_copy);
198 list_for_each_entry(subreq, &rreq->subrequests, rreq_link) {
199 /* Amalgamate adjacent writes */
200 while (!list_is_last(&subreq->rreq_link, &rreq->subrequests)) {
201 next = list_next_entry(subreq, rreq_link);
202 if (next->start != subreq->start + subreq->len)
204 subreq->len += next->len;
205 list_del_init(&next->rreq_link);
206 netfs_put_subrequest(next, false,
207 netfs_sreq_trace_put_merged);
210 ret = cres->ops->prepare_write(cres, &subreq->start, &subreq->len,
213 trace_netfs_failure(rreq, subreq, ret, netfs_fail_prepare_write);
214 trace_netfs_sreq(subreq, netfs_sreq_trace_write_skip);
218 iov_iter_xarray(&iter, WRITE, &rreq->mapping->i_pages,
219 subreq->start, subreq->len);
221 atomic_inc(&rreq->nr_copy_ops);
222 netfs_stat(&netfs_n_rh_write);
223 netfs_get_subrequest(subreq, netfs_sreq_trace_get_copy_to_cache);
224 trace_netfs_sreq(subreq, netfs_sreq_trace_write);
225 cres->ops->write(cres, subreq->start, &iter,
226 netfs_rreq_copy_terminated, subreq);
229 /* If we decrement nr_copy_ops to 0, the usage ref belongs to us. */
230 if (atomic_dec_and_test(&rreq->nr_copy_ops))
231 netfs_rreq_unmark_after_write(rreq, false);
234 static void netfs_rreq_write_to_cache_work(struct work_struct *work)
236 struct netfs_io_request *rreq =
237 container_of(work, struct netfs_io_request, work);
239 netfs_rreq_do_write_to_cache(rreq);
242 static void netfs_rreq_write_to_cache(struct netfs_io_request *rreq)
244 rreq->work.func = netfs_rreq_write_to_cache_work;
245 if (!queue_work(system_unbound_wq, &rreq->work))
250 * Unlock the folios in a read operation. We need to set PG_fscache on any
251 * folios we're going to write back before we unlock them.
253 void netfs_rreq_unlock_folios(struct netfs_io_request *rreq)
255 struct netfs_io_subrequest *subreq;
257 unsigned int iopos, account = 0;
258 pgoff_t start_page = rreq->start / PAGE_SIZE;
259 pgoff_t last_page = ((rreq->start + rreq->len) / PAGE_SIZE) - 1;
260 bool subreq_failed = false;
262 XA_STATE(xas, &rreq->mapping->i_pages, start_page);
264 if (test_bit(NETFS_RREQ_FAILED, &rreq->flags)) {
265 __clear_bit(NETFS_RREQ_COPY_TO_CACHE, &rreq->flags);
266 list_for_each_entry(subreq, &rreq->subrequests, rreq_link) {
267 __clear_bit(NETFS_SREQ_COPY_TO_CACHE, &subreq->flags);
271 /* Walk through the pagecache and the I/O request lists simultaneously.
272 * We may have a mixture of cached and uncached sections and we only
273 * really want to write out the uncached sections. This is slightly
274 * complicated by the possibility that we might have huge pages with a
277 subreq = list_first_entry(&rreq->subrequests,
278 struct netfs_io_subrequest, rreq_link);
280 subreq_failed = (subreq->error < 0);
282 trace_netfs_rreq(rreq, netfs_rreq_trace_unlock);
285 xas_for_each(&xas, folio, last_page) {
286 unsigned int pgpos = (folio_index(folio) - start_page) * PAGE_SIZE;
287 unsigned int pgend = pgpos + folio_size(folio);
288 bool pg_failed = false;
295 if (test_bit(NETFS_SREQ_COPY_TO_CACHE, &subreq->flags))
296 folio_start_fscache(folio);
297 pg_failed |= subreq_failed;
298 if (pgend < iopos + subreq->len)
301 account += subreq->transferred;
302 iopos += subreq->len;
303 if (!list_is_last(&subreq->rreq_link, &rreq->subrequests)) {
304 subreq = list_next_entry(subreq, rreq_link);
305 subreq_failed = (subreq->error < 0);
308 subreq_failed = false;
315 flush_dcache_folio(folio);
316 folio_mark_uptodate(folio);
319 if (!test_bit(NETFS_RREQ_DONT_UNLOCK_FOLIOS, &rreq->flags)) {
320 if (folio_index(folio) == rreq->no_unlock_folio &&
321 test_bit(NETFS_RREQ_NO_UNLOCK_FOLIO, &rreq->flags))
329 task_io_account_read(account);
330 if (rreq->netfs_ops->done)
331 rreq->netfs_ops->done(rreq);
335 * Handle a short read.
337 static void netfs_rreq_short_read(struct netfs_io_request *rreq,
338 struct netfs_io_subrequest *subreq)
340 __clear_bit(NETFS_SREQ_SHORT_IO, &subreq->flags);
341 __set_bit(NETFS_SREQ_SEEK_DATA_READ, &subreq->flags);
343 netfs_stat(&netfs_n_rh_short_read);
344 trace_netfs_sreq(subreq, netfs_sreq_trace_resubmit_short);
346 netfs_get_subrequest(subreq, netfs_sreq_trace_get_short_read);
347 atomic_inc(&rreq->nr_outstanding);
348 if (subreq->source == NETFS_READ_FROM_CACHE)
349 netfs_read_from_cache(rreq, subreq, NETFS_READ_HOLE_CLEAR);
351 netfs_read_from_server(rreq, subreq);
355 * Resubmit any short or failed operations. Returns true if we got the rreq
358 static bool netfs_rreq_perform_resubmissions(struct netfs_io_request *rreq)
360 struct netfs_io_subrequest *subreq;
362 WARN_ON(in_interrupt());
364 trace_netfs_rreq(rreq, netfs_rreq_trace_resubmit);
366 /* We don't want terminating submissions trying to wake us up whilst
367 * we're still going through the list.
369 atomic_inc(&rreq->nr_outstanding);
371 __clear_bit(NETFS_RREQ_INCOMPLETE_IO, &rreq->flags);
372 list_for_each_entry(subreq, &rreq->subrequests, rreq_link) {
374 if (subreq->source != NETFS_READ_FROM_CACHE)
376 subreq->source = NETFS_DOWNLOAD_FROM_SERVER;
378 netfs_stat(&netfs_n_rh_download_instead);
379 trace_netfs_sreq(subreq, netfs_sreq_trace_download_instead);
380 netfs_get_subrequest(subreq, netfs_sreq_trace_get_resubmit);
381 atomic_inc(&rreq->nr_outstanding);
382 netfs_read_from_server(rreq, subreq);
383 } else if (test_bit(NETFS_SREQ_SHORT_IO, &subreq->flags)) {
384 netfs_rreq_short_read(rreq, subreq);
388 /* If we decrement nr_outstanding to 0, the usage ref belongs to us. */
389 if (atomic_dec_and_test(&rreq->nr_outstanding))
392 wake_up_var(&rreq->nr_outstanding);
397 * Check to see if the data read is still valid.
399 static void netfs_rreq_is_still_valid(struct netfs_io_request *rreq)
401 struct netfs_io_subrequest *subreq;
403 if (!rreq->netfs_ops->is_still_valid ||
404 rreq->netfs_ops->is_still_valid(rreq))
407 list_for_each_entry(subreq, &rreq->subrequests, rreq_link) {
408 if (subreq->source == NETFS_READ_FROM_CACHE) {
409 subreq->error = -ESTALE;
410 __set_bit(NETFS_RREQ_INCOMPLETE_IO, &rreq->flags);
416 * Assess the state of a read request and decide what to do next.
418 * Note that we could be in an ordinary kernel thread, on a workqueue or in
419 * softirq context at this point. We inherit a ref from the caller.
421 static void netfs_rreq_assess(struct netfs_io_request *rreq, bool was_async)
423 trace_netfs_rreq(rreq, netfs_rreq_trace_assess);
426 netfs_rreq_is_still_valid(rreq);
428 if (!test_bit(NETFS_RREQ_FAILED, &rreq->flags) &&
429 test_bit(NETFS_RREQ_INCOMPLETE_IO, &rreq->flags)) {
430 if (netfs_rreq_perform_resubmissions(rreq))
435 netfs_rreq_unlock_folios(rreq);
437 clear_bit_unlock(NETFS_RREQ_IN_PROGRESS, &rreq->flags);
438 wake_up_bit(&rreq->flags, NETFS_RREQ_IN_PROGRESS);
440 if (test_bit(NETFS_RREQ_COPY_TO_CACHE, &rreq->flags))
441 return netfs_rreq_write_to_cache(rreq);
443 netfs_rreq_completed(rreq, was_async);
446 static void netfs_rreq_work(struct work_struct *work)
448 struct netfs_io_request *rreq =
449 container_of(work, struct netfs_io_request, work);
450 netfs_rreq_assess(rreq, false);
454 * Handle the completion of all outstanding I/O operations on a read request.
455 * We inherit a ref from the caller.
457 static void netfs_rreq_terminated(struct netfs_io_request *rreq,
460 if (test_bit(NETFS_RREQ_INCOMPLETE_IO, &rreq->flags) &&
462 if (!queue_work(system_unbound_wq, &rreq->work))
465 netfs_rreq_assess(rreq, was_async);
470 * netfs_subreq_terminated - Note the termination of an I/O operation.
471 * @subreq: The I/O request that has terminated.
472 * @transferred_or_error: The amount of data transferred or an error code.
473 * @was_async: The termination was asynchronous
475 * This tells the read helper that a contributory I/O operation has terminated,
476 * one way or another, and that it should integrate the results.
478 * The caller indicates in @transferred_or_error the outcome of the operation,
479 * supplying a positive value to indicate the number of bytes transferred, 0 to
480 * indicate a failure to transfer anything that should be retried or a negative
481 * error code. The helper will look after reissuing I/O operations as
482 * appropriate and writing downloaded data to the cache.
484 * If @was_async is true, the caller might be running in softirq or interrupt
485 * context and we can't sleep.
487 void netfs_subreq_terminated(struct netfs_io_subrequest *subreq,
488 ssize_t transferred_or_error,
491 struct netfs_io_request *rreq = subreq->rreq;
494 _enter("[%u]{%llx,%lx},%zd",
495 subreq->debug_index, subreq->start, subreq->flags,
496 transferred_or_error);
498 switch (subreq->source) {
499 case NETFS_READ_FROM_CACHE:
500 netfs_stat(&netfs_n_rh_read_done);
502 case NETFS_DOWNLOAD_FROM_SERVER:
503 netfs_stat(&netfs_n_rh_download_done);
509 if (IS_ERR_VALUE(transferred_or_error)) {
510 subreq->error = transferred_or_error;
511 trace_netfs_failure(rreq, subreq, transferred_or_error,
516 if (WARN(transferred_or_error > subreq->len - subreq->transferred,
517 "Subreq overread: R%x[%x] %zd > %zu - %zu",
518 rreq->debug_id, subreq->debug_index,
519 transferred_or_error, subreq->len, subreq->transferred))
520 transferred_or_error = subreq->len - subreq->transferred;
523 subreq->transferred += transferred_or_error;
524 if (subreq->transferred < subreq->len)
528 __clear_bit(NETFS_SREQ_NO_PROGRESS, &subreq->flags);
529 if (test_bit(NETFS_SREQ_COPY_TO_CACHE, &subreq->flags))
530 set_bit(NETFS_RREQ_COPY_TO_CACHE, &rreq->flags);
533 trace_netfs_sreq(subreq, netfs_sreq_trace_terminated);
535 /* If we decrement nr_outstanding to 0, the ref belongs to us. */
536 u = atomic_dec_return(&rreq->nr_outstanding);
538 netfs_rreq_terminated(rreq, was_async);
540 wake_up_var(&rreq->nr_outstanding);
542 netfs_put_subrequest(subreq, was_async, netfs_sreq_trace_put_terminated);
546 if (test_bit(NETFS_SREQ_CLEAR_TAIL, &subreq->flags)) {
547 netfs_clear_unread(subreq);
548 subreq->transferred = subreq->len;
552 if (transferred_or_error == 0) {
553 if (__test_and_set_bit(NETFS_SREQ_NO_PROGRESS, &subreq->flags)) {
554 subreq->error = -ENODATA;
558 __clear_bit(NETFS_SREQ_NO_PROGRESS, &subreq->flags);
561 __set_bit(NETFS_SREQ_SHORT_IO, &subreq->flags);
562 set_bit(NETFS_RREQ_INCOMPLETE_IO, &rreq->flags);
566 if (subreq->source == NETFS_READ_FROM_CACHE) {
567 netfs_stat(&netfs_n_rh_read_failed);
568 set_bit(NETFS_RREQ_INCOMPLETE_IO, &rreq->flags);
570 netfs_stat(&netfs_n_rh_download_failed);
571 set_bit(NETFS_RREQ_FAILED, &rreq->flags);
572 rreq->error = subreq->error;
576 EXPORT_SYMBOL(netfs_subreq_terminated);
578 static enum netfs_io_source netfs_cache_prepare_read(struct netfs_io_subrequest *subreq,
581 struct netfs_io_request *rreq = subreq->rreq;
582 struct netfs_cache_resources *cres = &rreq->cache_resources;
585 return cres->ops->prepare_read(subreq, i_size);
586 if (subreq->start >= rreq->i_size)
587 return NETFS_FILL_WITH_ZEROES;
588 return NETFS_DOWNLOAD_FROM_SERVER;
592 * Work out what sort of subrequest the next one will be.
594 static enum netfs_io_source
595 netfs_rreq_prepare_read(struct netfs_io_request *rreq,
596 struct netfs_io_subrequest *subreq)
598 enum netfs_io_source source;
600 _enter("%llx-%llx,%llx", subreq->start, subreq->start + subreq->len, rreq->i_size);
602 source = netfs_cache_prepare_read(subreq, rreq->i_size);
603 if (source == NETFS_INVALID_READ)
606 if (source == NETFS_DOWNLOAD_FROM_SERVER) {
607 /* Call out to the netfs to let it shrink the request to fit
608 * its own I/O sizes and boundaries. If it shinks it here, it
609 * will be called again to make simultaneous calls; if it wants
610 * to make serial calls, it can indicate a short read and then
611 * we will call it again.
613 if (subreq->len > rreq->i_size - subreq->start)
614 subreq->len = rreq->i_size - subreq->start;
616 if (rreq->netfs_ops->clamp_length &&
617 !rreq->netfs_ops->clamp_length(subreq)) {
618 source = NETFS_INVALID_READ;
623 if (WARN_ON(subreq->len == 0))
624 source = NETFS_INVALID_READ;
627 subreq->source = source;
628 trace_netfs_sreq(subreq, netfs_sreq_trace_prepare);
633 * Slice off a piece of a read request and submit an I/O request for it.
635 static bool netfs_rreq_submit_slice(struct netfs_io_request *rreq,
636 unsigned int *_debug_index)
638 struct netfs_io_subrequest *subreq;
639 enum netfs_io_source source;
641 subreq = netfs_alloc_subrequest(rreq);
645 subreq->debug_index = (*_debug_index)++;
646 subreq->start = rreq->start + rreq->submitted;
647 subreq->len = rreq->len - rreq->submitted;
649 _debug("slice %llx,%zx,%zx", subreq->start, subreq->len, rreq->submitted);
650 list_add_tail(&subreq->rreq_link, &rreq->subrequests);
652 /* Call out to the cache to find out what it can do with the remaining
653 * subset. It tells us in subreq->flags what it decided should be done
654 * and adjusts subreq->len down if the subset crosses a cache boundary.
656 * Then when we hand the subset, it can choose to take a subset of that
657 * (the starts must coincide), in which case, we go around the loop
658 * again and ask it to download the next piece.
660 source = netfs_rreq_prepare_read(rreq, subreq);
661 if (source == NETFS_INVALID_READ)
664 atomic_inc(&rreq->nr_outstanding);
666 rreq->submitted += subreq->len;
668 trace_netfs_sreq(subreq, netfs_sreq_trace_submit);
670 case NETFS_FILL_WITH_ZEROES:
671 netfs_fill_with_zeroes(rreq, subreq);
673 case NETFS_DOWNLOAD_FROM_SERVER:
674 netfs_read_from_server(rreq, subreq);
676 case NETFS_READ_FROM_CACHE:
677 netfs_read_from_cache(rreq, subreq, NETFS_READ_HOLE_IGNORE);
686 rreq->error = subreq->error;
687 netfs_put_subrequest(subreq, false, netfs_sreq_trace_put_failed);
692 * Begin the process of reading in a chunk of data, where that data may be
693 * stitched together from multiple sources, including multiple servers and the
696 int netfs_begin_read(struct netfs_io_request *rreq, bool sync)
698 unsigned int debug_index = 0;
701 _enter("R=%x %llx-%llx",
702 rreq->debug_id, rreq->start, rreq->start + rreq->len - 1);
704 if (rreq->len == 0) {
705 pr_err("Zero-sized read [R=%x]\n", rreq->debug_id);
706 netfs_put_request(rreq, false, netfs_rreq_trace_put_zero_len);
710 INIT_WORK(&rreq->work, netfs_rreq_work);
713 netfs_get_request(rreq, netfs_rreq_trace_get_hold);
715 /* Chop the read into slices according to what the cache and the netfs
716 * want and submit each one.
718 atomic_set(&rreq->nr_outstanding, 1);
720 if (!netfs_rreq_submit_slice(rreq, &debug_index))
723 } while (rreq->submitted < rreq->len);
726 /* Keep nr_outstanding incremented so that the ref always belongs to
727 * us, and the service code isn't punted off to a random thread pool to
731 wait_var_event(&rreq->nr_outstanding,
732 atomic_read(&rreq->nr_outstanding) == 1);
733 netfs_rreq_assess(rreq, false);
734 if (!test_bit(NETFS_RREQ_IN_PROGRESS, &rreq->flags))
740 if (ret == 0 && rreq->submitted < rreq->len) {
741 trace_netfs_failure(rreq, NULL, ret, netfs_fail_short_read);
744 netfs_put_request(rreq, false, netfs_rreq_trace_put_hold);
746 /* If we decrement nr_outstanding to 0, the ref belongs to us. */
747 if (atomic_dec_and_test(&rreq->nr_outstanding))
748 netfs_rreq_assess(rreq, false);
754 static void netfs_cache_expand_readahead(struct netfs_io_request *rreq,
755 loff_t *_start, size_t *_len, loff_t i_size)
757 struct netfs_cache_resources *cres = &rreq->cache_resources;
759 if (cres->ops && cres->ops->expand_readahead)
760 cres->ops->expand_readahead(cres, _start, _len, i_size);
763 static void netfs_rreq_expand(struct netfs_io_request *rreq,
764 struct readahead_control *ractl)
766 /* Give the cache a chance to change the request parameters. The
767 * resultant request must contain the original region.
769 netfs_cache_expand_readahead(rreq, &rreq->start, &rreq->len, rreq->i_size);
771 /* Give the netfs a chance to change the request parameters. The
772 * resultant request must contain the original region.
774 if (rreq->netfs_ops->expand_readahead)
775 rreq->netfs_ops->expand_readahead(rreq);
777 /* Expand the request if the cache wants it to start earlier. Note
778 * that the expansion may get further extended if the VM wishes to
779 * insert THPs and the preferred start and/or end wind up in the middle
782 * If this is the case, however, the THP size should be an integer
783 * multiple of the cache granule size, so we get a whole number of
784 * granules to deal with.
786 if (rreq->start != readahead_pos(ractl) ||
787 rreq->len != readahead_length(ractl)) {
788 readahead_expand(ractl, rreq->start, rreq->len);
789 rreq->start = readahead_pos(ractl);
790 rreq->len = readahead_length(ractl);
792 trace_netfs_read(rreq, readahead_pos(ractl), readahead_length(ractl),
793 netfs_read_trace_expanded);
798 * netfs_readahead - Helper to manage a read request
799 * @ractl: The description of the readahead request
801 * Fulfil a readahead request by drawing data from the cache if possible, or
802 * the netfs if not. Space beyond the EOF is zero-filled. Multiple I/O
803 * requests from different sources will get munged together. If necessary, the
804 * readahead window can be expanded in either direction to a more convenient
805 * alighment for RPC efficiency or to make storage in the cache feasible.
807 * The calling netfs must initialise a netfs context contiguous to the vfs
808 * inode before calling this.
810 * This is usable whether or not caching is enabled.
812 void netfs_readahead(struct readahead_control *ractl)
814 struct netfs_io_request *rreq;
815 struct netfs_i_context *ctx = netfs_i_context(ractl->mapping->host);
818 _enter("%lx,%x", readahead_index(ractl), readahead_count(ractl));
820 if (readahead_count(ractl) == 0)
823 rreq = netfs_alloc_request(ractl->mapping, ractl->file,
824 readahead_pos(ractl),
825 readahead_length(ractl),
830 if (ctx->ops->begin_cache_operation) {
831 ret = ctx->ops->begin_cache_operation(rreq);
832 if (ret == -ENOMEM || ret == -EINTR || ret == -ERESTARTSYS)
836 netfs_stat(&netfs_n_rh_readahead);
837 trace_netfs_read(rreq, readahead_pos(ractl), readahead_length(ractl),
838 netfs_read_trace_readahead);
840 netfs_rreq_expand(rreq, ractl);
842 /* Drop the refs on the folios here rather than in the cache or
843 * filesystem. The locks will be dropped in netfs_rreq_unlock().
845 while (readahead_folio(ractl))
848 netfs_begin_read(rreq, false);
852 netfs_put_request(rreq, false, netfs_rreq_trace_put_failed);
855 EXPORT_SYMBOL(netfs_readahead);
858 * netfs_readpage - Helper to manage a readpage request
859 * @file: The file to read from
860 * @subpage: A subpage of the folio to read
862 * Fulfil a readpage request by drawing data from the cache if possible, or the
863 * netfs if not. Space beyond the EOF is zero-filled. Multiple I/O requests
864 * from different sources will get munged together.
866 * The calling netfs must initialise a netfs context contiguous to the vfs
867 * inode before calling this.
869 * This is usable whether or not caching is enabled.
871 int netfs_readpage(struct file *file, struct page *subpage)
873 struct folio *folio = page_folio(subpage);
874 struct address_space *mapping = folio->mapping;
875 struct netfs_io_request *rreq;
876 struct netfs_i_context *ctx = netfs_i_context(mapping->host);
879 _enter("%lx", folio_index(folio));
881 rreq = netfs_alloc_request(mapping, file,
882 folio_file_pos(folio), folio_size(folio),
889 if (ctx->ops->begin_cache_operation) {
890 ret = ctx->ops->begin_cache_operation(rreq);
891 if (ret == -ENOMEM || ret == -EINTR || ret == -ERESTARTSYS)
895 netfs_stat(&netfs_n_rh_readpage);
896 trace_netfs_read(rreq, rreq->start, rreq->len, netfs_read_trace_readpage);
897 return netfs_begin_read(rreq, true);
900 netfs_put_request(rreq, false, netfs_rreq_trace_put_discard);
905 EXPORT_SYMBOL(netfs_readpage);
908 * Prepare a folio for writing without reading first
909 * @folio: The folio being prepared
910 * @pos: starting position for the write
911 * @len: length of write
912 * @always_fill: T if the folio should always be completely filled/cleared
914 * In some cases, write_begin doesn't need to read at all:
916 * - write that lies in a folio that is completely beyond EOF
917 * - write that covers the folio from start to EOF or beyond it
919 * If any of these criteria are met, then zero out the unwritten parts
920 * of the folio and return true. Otherwise, return false.
922 static bool netfs_skip_folio_read(struct folio *folio, loff_t pos, size_t len,
925 struct inode *inode = folio_inode(folio);
926 loff_t i_size = i_size_read(inode);
927 size_t offset = offset_in_folio(folio, pos);
928 size_t plen = folio_size(folio);
930 if (unlikely(always_fill)) {
931 if (pos - offset + len <= i_size)
932 return false; /* Page entirely before EOF */
933 zero_user_segment(&folio->page, 0, plen);
934 folio_mark_uptodate(folio);
938 /* Full folio write */
939 if (offset == 0 && len >= plen)
942 /* Page entirely beyond the end of the file */
943 if (pos - offset >= i_size)
946 /* Write that covers from the start of the folio to EOF or beyond */
947 if (offset == 0 && (pos + len) >= i_size)
952 zero_user_segments(&folio->page, 0, offset, offset + len, plen);
957 * netfs_write_begin - Helper to prepare for writing
958 * @file: The file to read from
959 * @mapping: The mapping to read from
960 * @pos: File position at which the write will begin
961 * @len: The length of the write (may extend beyond the end of the folio chosen)
962 * @aop_flags: AOP_* flags
963 * @_folio: Where to put the resultant folio
964 * @_fsdata: Place for the netfs to store a cookie
966 * Pre-read data for a write-begin request by drawing data from the cache if
967 * possible, or the netfs if not. Space beyond the EOF is zero-filled.
968 * Multiple I/O requests from different sources will get munged together. If
969 * necessary, the readahead window can be expanded in either direction to a
970 * more convenient alighment for RPC efficiency or to make storage in the cache
973 * The calling netfs must provide a table of operations, only one of which,
974 * issue_op, is mandatory.
976 * The check_write_begin() operation can be provided to check for and flush
977 * conflicting writes once the folio is grabbed and locked. It is passed a
978 * pointer to the fsdata cookie that gets returned to the VM to be passed to
979 * write_end. It is permitted to sleep. It should return 0 if the request
980 * should go ahead; unlock the folio and return -EAGAIN to cause the folio to
981 * be regot; or return an error.
983 * The calling netfs must initialise a netfs context contiguous to the vfs
984 * inode before calling this.
986 * This is usable whether or not caching is enabled.
988 int netfs_write_begin(struct file *file, struct address_space *mapping,
989 loff_t pos, unsigned int len, unsigned int aop_flags,
990 struct folio **_folio, void **_fsdata)
992 struct netfs_io_request *rreq;
993 struct netfs_i_context *ctx = netfs_i_context(file_inode(file ));
995 unsigned int fgp_flags;
996 pgoff_t index = pos >> PAGE_SHIFT;
999 DEFINE_READAHEAD(ractl, file, NULL, mapping, index);
1002 fgp_flags = FGP_LOCK | FGP_WRITE | FGP_CREAT | FGP_STABLE;
1003 if (aop_flags & AOP_FLAG_NOFS)
1004 fgp_flags |= FGP_NOFS;
1005 folio = __filemap_get_folio(mapping, index, fgp_flags,
1006 mapping_gfp_mask(mapping));
1010 if (ctx->ops->check_write_begin) {
1011 /* Allow the netfs (eg. ceph) to flush conflicts. */
1012 ret = ctx->ops->check_write_begin(file, pos, len, folio, _fsdata);
1014 trace_netfs_failure(NULL, NULL, ret, netfs_fail_check_write_begin);
1021 if (folio_test_uptodate(folio))
1024 /* If the page is beyond the EOF, we want to clear it - unless it's
1025 * within the cache granule containing the EOF, in which case we need
1026 * to preload the granule.
1028 if (!netfs_is_cache_enabled(ctx) &&
1029 netfs_skip_folio_read(folio, pos, len, false)) {
1030 netfs_stat(&netfs_n_rh_write_zskip);
1031 goto have_folio_no_wait;
1034 rreq = netfs_alloc_request(mapping, file,
1035 folio_file_pos(folio), folio_size(folio),
1036 NETFS_READ_FOR_WRITE);
1038 ret = PTR_ERR(rreq);
1041 rreq->no_unlock_folio = folio_index(folio);
1042 __set_bit(NETFS_RREQ_NO_UNLOCK_FOLIO, &rreq->flags);
1044 if (ctx->ops->begin_cache_operation) {
1045 ret = ctx->ops->begin_cache_operation(rreq);
1046 if (ret == -ENOMEM || ret == -EINTR || ret == -ERESTARTSYS)
1050 netfs_stat(&netfs_n_rh_write_begin);
1051 trace_netfs_read(rreq, pos, len, netfs_read_trace_write_begin);
1053 /* Expand the request to meet caching requirements and download
1056 ractl._nr_pages = folio_nr_pages(folio);
1057 netfs_rreq_expand(rreq, &ractl);
1059 /* We hold the folio locks, so we can drop the references */
1061 while (readahead_folio(&ractl))
1064 ret = netfs_begin_read(rreq, true);
1069 ret = folio_wait_fscache_killable(folio);
1078 netfs_put_request(rreq, false, netfs_rreq_trace_put_failed);
1080 folio_unlock(folio);
1082 _leave(" = %d", ret);
1085 EXPORT_SYMBOL(netfs_write_begin);