1 // SPDX-License-Identifier: GPL-2.0-or-later
3 * Copyright (C) 2022, Alibaba Cloud
4 * Copyright (C) 2022, Bytedance Inc. All rights reserved.
6 #include <linux/fscache.h>
9 static DEFINE_MUTEX(erofs_domain_list_lock);
10 static DEFINE_MUTEX(erofs_domain_cookies_lock);
11 static LIST_HEAD(erofs_domain_list);
12 static struct vfsmount *erofs_pseudo_mnt;
14 static struct netfs_io_request *erofs_fscache_alloc_request(struct address_space *mapping,
15 loff_t start, size_t len)
17 struct netfs_io_request *rreq;
19 rreq = kzalloc(sizeof(struct netfs_io_request), GFP_KERNEL);
21 return ERR_PTR(-ENOMEM);
25 rreq->mapping = mapping;
26 rreq->inode = mapping->host;
27 INIT_LIST_HEAD(&rreq->subrequests);
28 refcount_set(&rreq->ref, 1);
32 static void erofs_fscache_put_request(struct netfs_io_request *rreq)
34 if (!refcount_dec_and_test(&rreq->ref))
36 if (rreq->cache_resources.ops)
37 rreq->cache_resources.ops->end_operation(&rreq->cache_resources);
41 static void erofs_fscache_put_subrequest(struct netfs_io_subrequest *subreq)
43 if (!refcount_dec_and_test(&subreq->ref))
45 erofs_fscache_put_request(subreq->rreq);
49 static void erofs_fscache_clear_subrequests(struct netfs_io_request *rreq)
51 struct netfs_io_subrequest *subreq;
53 while (!list_empty(&rreq->subrequests)) {
54 subreq = list_first_entry(&rreq->subrequests,
55 struct netfs_io_subrequest, rreq_link);
56 list_del(&subreq->rreq_link);
57 erofs_fscache_put_subrequest(subreq);
61 static void erofs_fscache_rreq_unlock_folios(struct netfs_io_request *rreq)
63 struct netfs_io_subrequest *subreq;
65 unsigned int iopos = 0;
66 pgoff_t start_page = rreq->start / PAGE_SIZE;
67 pgoff_t last_page = ((rreq->start + rreq->len) / PAGE_SIZE) - 1;
68 bool subreq_failed = false;
70 XA_STATE(xas, &rreq->mapping->i_pages, start_page);
72 subreq = list_first_entry(&rreq->subrequests,
73 struct netfs_io_subrequest, rreq_link);
74 subreq_failed = (subreq->error < 0);
77 xas_for_each(&xas, folio, last_page) {
78 unsigned int pgpos, pgend;
79 bool pg_failed = false;
81 if (xas_retry(&xas, folio))
84 pgpos = (folio_index(folio) - start_page) * PAGE_SIZE;
85 pgend = pgpos + folio_size(folio);
93 pg_failed |= subreq_failed;
94 if (pgend < iopos + subreq->len)
98 if (!list_is_last(&subreq->rreq_link,
99 &rreq->subrequests)) {
100 subreq = list_next_entry(subreq, rreq_link);
101 subreq_failed = (subreq->error < 0);
104 subreq_failed = false;
111 folio_mark_uptodate(folio);
118 static void erofs_fscache_rreq_complete(struct netfs_io_request *rreq)
120 erofs_fscache_rreq_unlock_folios(rreq);
121 erofs_fscache_clear_subrequests(rreq);
122 erofs_fscache_put_request(rreq);
125 static void erofc_fscache_subreq_complete(void *priv,
126 ssize_t transferred_or_error, bool was_async)
128 struct netfs_io_subrequest *subreq = priv;
129 struct netfs_io_request *rreq = subreq->rreq;
131 if (IS_ERR_VALUE(transferred_or_error))
132 subreq->error = transferred_or_error;
134 if (atomic_dec_and_test(&rreq->nr_outstanding))
135 erofs_fscache_rreq_complete(rreq);
137 erofs_fscache_put_subrequest(subreq);
141 * Read data from fscache and fill the read data into page cache described by
142 * @rreq, which shall be both aligned with PAGE_SIZE. @pstart describes
143 * the start physical address in the cache file.
145 static int erofs_fscache_read_folios_async(struct fscache_cookie *cookie,
146 struct netfs_io_request *rreq, loff_t pstart)
148 enum netfs_io_source source;
149 struct super_block *sb = rreq->mapping->host->i_sb;
150 struct netfs_io_subrequest *subreq;
151 struct netfs_cache_resources *cres = &rreq->cache_resources;
152 struct iov_iter iter;
153 loff_t start = rreq->start;
154 size_t len = rreq->len;
158 atomic_set(&rreq->nr_outstanding, 1);
160 ret = fscache_begin_read_operation(cres, cookie);
165 subreq = kzalloc(sizeof(struct netfs_io_subrequest),
168 INIT_LIST_HEAD(&subreq->rreq_link);
169 refcount_set(&subreq->ref, 2);
171 refcount_inc(&rreq->ref);
177 subreq->start = pstart + done;
178 subreq->len = len - done;
179 subreq->flags = 1 << NETFS_SREQ_ONDEMAND;
181 list_add_tail(&subreq->rreq_link, &rreq->subrequests);
183 source = cres->ops->prepare_read(subreq, LLONG_MAX);
184 if (WARN_ON(subreq->len == 0))
185 source = NETFS_INVALID_READ;
186 if (source != NETFS_READ_FROM_CACHE) {
187 erofs_err(sb, "failed to fscache prepare_read (source %d)",
191 erofs_fscache_put_subrequest(subreq);
195 atomic_inc(&rreq->nr_outstanding);
197 iov_iter_xarray(&iter, READ, &rreq->mapping->i_pages,
198 start + done, subreq->len);
200 ret = fscache_read(cres, subreq->start, &iter,
201 NETFS_READ_HOLE_FAIL,
202 erofc_fscache_subreq_complete, subreq);
203 if (ret == -EIOCBQUEUED)
206 erofs_err(sb, "failed to fscache_read (ret %d)", ret);
213 if (atomic_dec_and_test(&rreq->nr_outstanding))
214 erofs_fscache_rreq_complete(rreq);
219 static int erofs_fscache_meta_read_folio(struct file *data, struct folio *folio)
222 struct super_block *sb = folio_mapping(folio)->host->i_sb;
223 struct netfs_io_request *rreq;
224 struct erofs_map_dev mdev = {
226 .m_pa = folio_pos(folio),
229 ret = erofs_map_dev(sb, &mdev);
233 rreq = erofs_fscache_alloc_request(folio_mapping(folio),
234 folio_pos(folio), folio_size(folio));
240 return erofs_fscache_read_folios_async(mdev.m_fscache->cookie,
248 * Read into page cache in the range described by (@pos, @len).
250 * On return, the caller is responsible for page unlocking if the output @unlock
251 * is true, or the callee will take this responsibility through netfs_io_request
254 * The return value is the number of bytes successfully handled, or negative
255 * error code on failure. The only exception is that, the length of the range
256 * instead of the error code is returned on failure after netfs_io_request is
257 * allocated, so that .readahead() could advance rac accordingly.
259 static int erofs_fscache_data_read(struct address_space *mapping,
260 loff_t pos, size_t len, bool *unlock)
262 struct inode *inode = mapping->host;
263 struct super_block *sb = inode->i_sb;
264 struct netfs_io_request *rreq;
265 struct erofs_map_blocks map;
266 struct erofs_map_dev mdev;
267 struct iov_iter iter;
274 ret = erofs_map_blocks(inode, &map, EROFS_GET_BLOCKS_RAW);
278 if (map.m_flags & EROFS_MAP_META) {
279 struct erofs_buf buf = __EROFS_BUF_INITIALIZER;
284 /* For tail packing layout, the offset may be non-zero. */
285 offset = erofs_blkoff(map.m_pa);
286 blknr = erofs_blknr(map.m_pa);
289 src = erofs_read_metabuf(&buf, sb, blknr, EROFS_KMAP);
293 iov_iter_xarray(&iter, READ, &mapping->i_pages, pos, PAGE_SIZE);
294 if (copy_to_iter(src + offset, size, &iter) != size) {
295 erofs_put_metabuf(&buf);
298 iov_iter_zero(PAGE_SIZE - size, &iter);
299 erofs_put_metabuf(&buf);
303 if (!(map.m_flags & EROFS_MAP_MAPPED)) {
305 iov_iter_xarray(&iter, READ, &mapping->i_pages, pos, count);
306 iov_iter_zero(count, &iter);
310 count = min_t(size_t, map.m_llen - (pos - map.m_la), len);
311 DBG_BUGON(!count || count % PAGE_SIZE);
313 mdev = (struct erofs_map_dev) {
314 .m_deviceid = map.m_deviceid,
317 ret = erofs_map_dev(sb, &mdev);
321 rreq = erofs_fscache_alloc_request(mapping, pos, count);
323 return PTR_ERR(rreq);
326 erofs_fscache_read_folios_async(mdev.m_fscache->cookie,
327 rreq, mdev.m_pa + (pos - map.m_la));
331 static int erofs_fscache_read_folio(struct file *file, struct folio *folio)
336 DBG_BUGON(folio_size(folio) != EROFS_BLKSIZ);
338 ret = erofs_fscache_data_read(folio_mapping(folio), folio_pos(folio),
339 folio_size(folio), &unlock);
342 folio_mark_uptodate(folio);
345 return ret < 0 ? ret : 0;
348 static void erofs_fscache_readahead(struct readahead_control *rac)
351 size_t len, done = 0;
356 if (!readahead_count(rac))
359 start = readahead_pos(rac);
360 len = readahead_length(rac);
364 ret = erofs_fscache_data_read(rac->mapping, pos,
365 len - done, &unlock);
371 folio = readahead_folio(rac);
372 size -= folio_size(folio);
374 folio_mark_uptodate(folio);
378 } while ((done += ret) < len);
381 static const struct address_space_operations erofs_fscache_meta_aops = {
382 .read_folio = erofs_fscache_meta_read_folio,
385 const struct address_space_operations erofs_fscache_access_aops = {
386 .read_folio = erofs_fscache_read_folio,
387 .readahead = erofs_fscache_readahead,
390 static void erofs_fscache_domain_put(struct erofs_domain *domain)
394 mutex_lock(&erofs_domain_list_lock);
395 if (refcount_dec_and_test(&domain->ref)) {
396 list_del(&domain->list);
397 if (list_empty(&erofs_domain_list)) {
398 kern_unmount(erofs_pseudo_mnt);
399 erofs_pseudo_mnt = NULL;
401 mutex_unlock(&erofs_domain_list_lock);
402 fscache_relinquish_volume(domain->volume, NULL, false);
403 kfree(domain->domain_id);
407 mutex_unlock(&erofs_domain_list_lock);
410 static int erofs_fscache_register_volume(struct super_block *sb)
412 struct erofs_sb_info *sbi = EROFS_SB(sb);
413 char *domain_id = sbi->domain_id;
414 struct fscache_volume *volume;
418 name = kasprintf(GFP_KERNEL, "erofs,%s",
419 domain_id ? domain_id : sbi->fsid);
423 volume = fscache_acquire_volume(name, NULL, NULL, 0);
424 if (IS_ERR_OR_NULL(volume)) {
425 erofs_err(sb, "failed to register volume for %s", name);
426 ret = volume ? PTR_ERR(volume) : -EOPNOTSUPP;
430 sbi->volume = volume;
435 static int erofs_fscache_init_domain(struct super_block *sb)
438 struct erofs_domain *domain;
439 struct erofs_sb_info *sbi = EROFS_SB(sb);
441 domain = kzalloc(sizeof(struct erofs_domain), GFP_KERNEL);
445 domain->domain_id = kstrdup(sbi->domain_id, GFP_KERNEL);
446 if (!domain->domain_id) {
451 err = erofs_fscache_register_volume(sb);
455 if (!erofs_pseudo_mnt) {
456 erofs_pseudo_mnt = kern_mount(&erofs_fs_type);
457 if (IS_ERR(erofs_pseudo_mnt)) {
458 err = PTR_ERR(erofs_pseudo_mnt);
463 domain->volume = sbi->volume;
464 refcount_set(&domain->ref, 1);
465 list_add(&domain->list, &erofs_domain_list);
466 sbi->domain = domain;
469 kfree(domain->domain_id);
474 static int erofs_fscache_register_domain(struct super_block *sb)
477 struct erofs_domain *domain;
478 struct erofs_sb_info *sbi = EROFS_SB(sb);
480 mutex_lock(&erofs_domain_list_lock);
481 list_for_each_entry(domain, &erofs_domain_list, list) {
482 if (!strcmp(domain->domain_id, sbi->domain_id)) {
483 sbi->domain = domain;
484 sbi->volume = domain->volume;
485 refcount_inc(&domain->ref);
486 mutex_unlock(&erofs_domain_list_lock);
490 err = erofs_fscache_init_domain(sb);
491 mutex_unlock(&erofs_domain_list_lock);
496 struct erofs_fscache *erofs_fscache_acquire_cookie(struct super_block *sb,
500 struct fscache_volume *volume = EROFS_SB(sb)->volume;
501 struct erofs_fscache *ctx;
502 struct fscache_cookie *cookie;
505 ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
507 return ERR_PTR(-ENOMEM);
509 cookie = fscache_acquire_cookie(volume, FSCACHE_ADV_WANT_CACHE_SIZE,
510 name, strlen(name), NULL, 0, 0);
512 erofs_err(sb, "failed to get cookie for %s", name);
517 fscache_use_cookie(cookie, false);
518 ctx->cookie = cookie;
520 if (flags & EROFS_REG_COOKIE_NEED_INODE) {
521 struct inode *const inode = new_inode(sb);
524 erofs_err(sb, "failed to get anon inode for %s", name);
530 inode->i_size = OFFSET_MAX;
531 inode->i_mapping->a_ops = &erofs_fscache_meta_aops;
532 mapping_set_gfp_mask(inode->i_mapping, GFP_NOFS);
540 fscache_unuse_cookie(ctx->cookie, NULL, NULL);
541 fscache_relinquish_cookie(ctx->cookie, false);
547 static void erofs_fscache_relinquish_cookie(struct erofs_fscache *ctx)
549 fscache_unuse_cookie(ctx->cookie, NULL, NULL);
550 fscache_relinquish_cookie(ctx->cookie, false);
557 struct erofs_fscache *erofs_fscache_domain_init_cookie(struct super_block *sb,
563 struct erofs_fscache *ctx;
564 struct erofs_domain *domain = EROFS_SB(sb)->domain;
566 ctx = erofs_fscache_acquire_cookie(sb, name, flags);
570 ctx->name = kstrdup(name, GFP_KERNEL);
576 inode = new_inode(erofs_pseudo_mnt->mnt_sb);
582 ctx->domain = domain;
583 ctx->anon_inode = inode;
584 inode->i_private = ctx;
585 refcount_inc(&domain->ref);
588 erofs_fscache_relinquish_cookie(ctx);
593 struct erofs_fscache *erofs_domain_register_cookie(struct super_block *sb,
598 struct erofs_fscache *ctx;
599 struct erofs_domain *domain = EROFS_SB(sb)->domain;
600 struct super_block *psb = erofs_pseudo_mnt->mnt_sb;
602 mutex_lock(&erofs_domain_cookies_lock);
603 spin_lock(&psb->s_inode_list_lock);
604 list_for_each_entry(inode, &psb->s_inodes, i_sb_list) {
605 ctx = inode->i_private;
606 if (!ctx || ctx->domain != domain || strcmp(ctx->name, name))
608 if (!(flags & EROFS_REG_COOKIE_NEED_NOEXIST)) {
611 erofs_err(sb, "%s already exists in domain %s", name,
613 ctx = ERR_PTR(-EEXIST);
615 spin_unlock(&psb->s_inode_list_lock);
616 mutex_unlock(&erofs_domain_cookies_lock);
619 spin_unlock(&psb->s_inode_list_lock);
620 ctx = erofs_fscache_domain_init_cookie(sb, name, flags);
621 mutex_unlock(&erofs_domain_cookies_lock);
625 struct erofs_fscache *erofs_fscache_register_cookie(struct super_block *sb,
629 if (EROFS_SB(sb)->domain_id)
630 return erofs_domain_register_cookie(sb, name, flags);
631 return erofs_fscache_acquire_cookie(sb, name, flags);
634 void erofs_fscache_unregister_cookie(struct erofs_fscache *ctx)
637 struct erofs_domain *domain;
641 domain = ctx->domain;
643 mutex_lock(&erofs_domain_cookies_lock);
644 drop = atomic_read(&ctx->anon_inode->i_count) == 1;
645 iput(ctx->anon_inode);
646 mutex_unlock(&erofs_domain_cookies_lock);
651 erofs_fscache_relinquish_cookie(ctx);
652 erofs_fscache_domain_put(domain);
655 int erofs_fscache_register_fs(struct super_block *sb)
658 struct erofs_sb_info *sbi = EROFS_SB(sb);
659 struct erofs_fscache *fscache;
663 ret = erofs_fscache_register_domain(sb);
665 ret = erofs_fscache_register_volume(sb);
670 * When shared domain is enabled, using NEED_NOEXIST to guarantee
671 * the primary data blob (aka fsid) is unique in the shared domain.
673 * For non-shared-domain case, fscache_acquire_volume() invoked by
674 * erofs_fscache_register_volume() has already guaranteed
675 * the uniqueness of primary data blob.
677 * Acquired domain/volume will be relinquished in kill_sb() on error.
679 flags = EROFS_REG_COOKIE_NEED_INODE;
681 flags |= EROFS_REG_COOKIE_NEED_NOEXIST;
682 fscache = erofs_fscache_register_cookie(sb, sbi->fsid, flags);
684 return PTR_ERR(fscache);
686 sbi->s_fscache = fscache;
690 void erofs_fscache_unregister_fs(struct super_block *sb)
692 struct erofs_sb_info *sbi = EROFS_SB(sb);
694 erofs_fscache_unregister_cookie(sbi->s_fscache);
697 erofs_fscache_domain_put(sbi->domain);
699 fscache_relinquish_volume(sbi->volume, NULL, false);
701 sbi->s_fscache = NULL;