1 // SPDX-License-Identifier: GPL-2.0-or-later
3 * Copyright (C) 2022, Alibaba Cloud
4 * Copyright (C) 2022, Bytedance Inc. All rights reserved.
6 #include <linux/fscache.h>
9 static DEFINE_MUTEX(erofs_domain_list_lock);
10 static DEFINE_MUTEX(erofs_domain_cookies_lock);
11 static LIST_HEAD(erofs_domain_list);
12 static LIST_HEAD(erofs_domain_cookies_list);
13 static struct vfsmount *erofs_pseudo_mnt;
15 struct erofs_fscache_request {
16 struct erofs_fscache_request *primary;
17 struct netfs_cache_resources cache_resources;
18 struct address_space *mapping; /* The mapping being accessed */
19 loff_t start; /* Start position */
20 size_t len; /* Length of the request */
21 size_t submitted; /* Length of submitted */
22 short error; /* 0 or error that occurred */
26 static struct erofs_fscache_request *erofs_fscache_req_alloc(struct address_space *mapping,
27 loff_t start, size_t len)
29 struct erofs_fscache_request *req;
31 req = kzalloc(sizeof(struct erofs_fscache_request), GFP_KERNEL);
33 return ERR_PTR(-ENOMEM);
35 req->mapping = mapping;
38 refcount_set(&req->ref, 1);
43 static struct erofs_fscache_request *erofs_fscache_req_chain(struct erofs_fscache_request *primary,
46 struct erofs_fscache_request *req;
48 /* use primary request for the first submission */
49 if (!primary->submitted) {
50 refcount_inc(&primary->ref);
54 req = erofs_fscache_req_alloc(primary->mapping,
55 primary->start + primary->submitted, len);
57 req->primary = primary;
58 refcount_inc(&primary->ref);
63 static void erofs_fscache_req_complete(struct erofs_fscache_request *req)
66 bool failed = req->error;
67 pgoff_t start_page = req->start / PAGE_SIZE;
68 pgoff_t last_page = ((req->start + req->len) / PAGE_SIZE) - 1;
70 XA_STATE(xas, &req->mapping->i_pages, start_page);
73 xas_for_each(&xas, folio, last_page) {
74 if (xas_retry(&xas, folio))
77 folio_mark_uptodate(folio);
83 static void erofs_fscache_req_put(struct erofs_fscache_request *req)
85 if (refcount_dec_and_test(&req->ref)) {
86 if (req->cache_resources.ops)
87 req->cache_resources.ops->end_operation(&req->cache_resources);
89 erofs_fscache_req_complete(req);
91 erofs_fscache_req_put(req->primary);
96 static void erofs_fscache_subreq_complete(void *priv,
97 ssize_t transferred_or_error, bool was_async)
99 struct erofs_fscache_request *req = priv;
101 if (IS_ERR_VALUE(transferred_or_error)) {
103 req->primary->error = transferred_or_error;
105 req->error = transferred_or_error;
107 erofs_fscache_req_put(req);
111 * Read data from fscache (cookie, pstart, len), and fill the read data into
112 * page cache described by (req->mapping, lstart, len). @pstart describeis the
113 * start physical address in the cache file.
115 static int erofs_fscache_read_folios_async(struct fscache_cookie *cookie,
116 struct erofs_fscache_request *req, loff_t pstart, size_t len)
118 enum netfs_io_source source;
119 struct super_block *sb = req->mapping->host->i_sb;
120 struct netfs_cache_resources *cres = &req->cache_resources;
121 struct iov_iter iter;
122 loff_t lstart = req->start + req->submitted;
126 DBG_BUGON(len > req->len - req->submitted);
128 ret = fscache_begin_read_operation(cres, cookie);
133 loff_t sstart = pstart + done;
134 size_t slen = len - done;
135 unsigned long flags = 1 << NETFS_SREQ_ONDEMAND;
137 source = cres->ops->prepare_ondemand_read(cres,
138 sstart, &slen, LLONG_MAX, &flags, 0);
139 if (WARN_ON(slen == 0))
140 source = NETFS_INVALID_READ;
141 if (source != NETFS_READ_FROM_CACHE) {
142 erofs_err(sb, "failed to fscache prepare_read (source %d)", source);
146 refcount_inc(&req->ref);
147 iov_iter_xarray(&iter, ITER_DEST, &req->mapping->i_pages,
148 lstart + done, slen);
150 ret = fscache_read(cres, sstart, &iter, NETFS_READ_HOLE_FAIL,
151 erofs_fscache_subreq_complete, req);
152 if (ret == -EIOCBQUEUED)
155 erofs_err(sb, "failed to fscache_read (ret %d)", ret);
161 DBG_BUGON(done != len);
165 static int erofs_fscache_meta_read_folio(struct file *data, struct folio *folio)
168 struct erofs_fscache *ctx = folio_mapping(folio)->host->i_private;
169 struct erofs_fscache_request *req;
171 req = erofs_fscache_req_alloc(folio_mapping(folio),
172 folio_pos(folio), folio_size(folio));
178 ret = erofs_fscache_read_folios_async(ctx->cookie, req,
179 folio_pos(folio), folio_size(folio));
183 erofs_fscache_req_put(req);
187 static int erofs_fscache_data_read_slice(struct erofs_fscache_request *primary)
189 struct address_space *mapping = primary->mapping;
190 struct inode *inode = mapping->host;
191 struct super_block *sb = inode->i_sb;
192 struct erofs_fscache_request *req;
193 struct erofs_map_blocks map;
194 struct erofs_map_dev mdev;
195 struct iov_iter iter;
196 loff_t pos = primary->start + primary->submitted;
201 ret = erofs_map_blocks(inode, &map);
205 if (map.m_flags & EROFS_MAP_META) {
206 struct erofs_buf buf = __EROFS_BUF_INITIALIZER;
211 /* For tail packing layout, the offset may be non-zero. */
212 offset = erofs_blkoff(sb, map.m_pa);
213 blknr = erofs_blknr(sb, map.m_pa);
216 src = erofs_read_metabuf(&buf, sb, blknr, EROFS_KMAP);
220 iov_iter_xarray(&iter, ITER_DEST, &mapping->i_pages, pos, PAGE_SIZE);
221 if (copy_to_iter(src + offset, size, &iter) != size) {
222 erofs_put_metabuf(&buf);
225 iov_iter_zero(PAGE_SIZE - size, &iter);
226 erofs_put_metabuf(&buf);
227 primary->submitted += PAGE_SIZE;
231 count = primary->len - primary->submitted;
232 if (!(map.m_flags & EROFS_MAP_MAPPED)) {
233 iov_iter_xarray(&iter, ITER_DEST, &mapping->i_pages, pos, count);
234 iov_iter_zero(count, &iter);
235 primary->submitted += count;
239 count = min_t(size_t, map.m_llen - (pos - map.m_la), count);
240 DBG_BUGON(!count || count % PAGE_SIZE);
242 mdev = (struct erofs_map_dev) {
243 .m_deviceid = map.m_deviceid,
246 ret = erofs_map_dev(sb, &mdev);
250 req = erofs_fscache_req_chain(primary, count);
254 ret = erofs_fscache_read_folios_async(mdev.m_fscache->cookie,
255 req, mdev.m_pa + (pos - map.m_la), count);
256 erofs_fscache_req_put(req);
257 primary->submitted += count;
261 static int erofs_fscache_data_read(struct erofs_fscache_request *req)
266 ret = erofs_fscache_data_read_slice(req);
269 } while (!ret && req->submitted < req->len);
274 static int erofs_fscache_read_folio(struct file *file, struct folio *folio)
276 struct erofs_fscache_request *req;
279 req = erofs_fscache_req_alloc(folio_mapping(folio),
280 folio_pos(folio), folio_size(folio));
286 ret = erofs_fscache_data_read(req);
287 erofs_fscache_req_put(req);
291 static void erofs_fscache_readahead(struct readahead_control *rac)
293 struct erofs_fscache_request *req;
295 if (!readahead_count(rac))
298 req = erofs_fscache_req_alloc(rac->mapping,
299 readahead_pos(rac), readahead_length(rac));
303 /* The request completion will drop refs on the folios. */
304 while (readahead_folio(rac))
307 erofs_fscache_data_read(req);
308 erofs_fscache_req_put(req);
311 static const struct address_space_operations erofs_fscache_meta_aops = {
312 .read_folio = erofs_fscache_meta_read_folio,
315 const struct address_space_operations erofs_fscache_access_aops = {
316 .read_folio = erofs_fscache_read_folio,
317 .readahead = erofs_fscache_readahead,
320 static void erofs_fscache_domain_put(struct erofs_domain *domain)
322 mutex_lock(&erofs_domain_list_lock);
323 if (refcount_dec_and_test(&domain->ref)) {
324 list_del(&domain->list);
325 if (list_empty(&erofs_domain_list)) {
326 kern_unmount(erofs_pseudo_mnt);
327 erofs_pseudo_mnt = NULL;
329 fscache_relinquish_volume(domain->volume, NULL, false);
330 mutex_unlock(&erofs_domain_list_lock);
331 kfree(domain->domain_id);
335 mutex_unlock(&erofs_domain_list_lock);
338 static int erofs_fscache_register_volume(struct super_block *sb)
340 struct erofs_sb_info *sbi = EROFS_SB(sb);
341 char *domain_id = sbi->domain_id;
342 struct fscache_volume *volume;
346 name = kasprintf(GFP_KERNEL, "erofs,%s",
347 domain_id ? domain_id : sbi->fsid);
351 volume = fscache_acquire_volume(name, NULL, NULL, 0);
352 if (IS_ERR_OR_NULL(volume)) {
353 erofs_err(sb, "failed to register volume for %s", name);
354 ret = volume ? PTR_ERR(volume) : -EOPNOTSUPP;
358 sbi->volume = volume;
363 static int erofs_fscache_init_domain(struct super_block *sb)
366 struct erofs_domain *domain;
367 struct erofs_sb_info *sbi = EROFS_SB(sb);
369 domain = kzalloc(sizeof(struct erofs_domain), GFP_KERNEL);
373 domain->domain_id = kstrdup(sbi->domain_id, GFP_KERNEL);
374 if (!domain->domain_id) {
379 err = erofs_fscache_register_volume(sb);
383 if (!erofs_pseudo_mnt) {
384 erofs_pseudo_mnt = kern_mount(&erofs_fs_type);
385 if (IS_ERR(erofs_pseudo_mnt)) {
386 err = PTR_ERR(erofs_pseudo_mnt);
391 domain->volume = sbi->volume;
392 refcount_set(&domain->ref, 1);
393 list_add(&domain->list, &erofs_domain_list);
394 sbi->domain = domain;
397 kfree(domain->domain_id);
402 static int erofs_fscache_register_domain(struct super_block *sb)
405 struct erofs_domain *domain;
406 struct erofs_sb_info *sbi = EROFS_SB(sb);
408 mutex_lock(&erofs_domain_list_lock);
409 list_for_each_entry(domain, &erofs_domain_list, list) {
410 if (!strcmp(domain->domain_id, sbi->domain_id)) {
411 sbi->domain = domain;
412 sbi->volume = domain->volume;
413 refcount_inc(&domain->ref);
414 mutex_unlock(&erofs_domain_list_lock);
418 err = erofs_fscache_init_domain(sb);
419 mutex_unlock(&erofs_domain_list_lock);
423 static struct erofs_fscache *erofs_fscache_acquire_cookie(struct super_block *sb,
424 char *name, unsigned int flags)
426 struct fscache_volume *volume = EROFS_SB(sb)->volume;
427 struct erofs_fscache *ctx;
428 struct fscache_cookie *cookie;
429 struct super_block *isb;
433 ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
435 return ERR_PTR(-ENOMEM);
436 INIT_LIST_HEAD(&ctx->node);
437 refcount_set(&ctx->ref, 1);
439 cookie = fscache_acquire_cookie(volume, FSCACHE_ADV_WANT_CACHE_SIZE,
440 name, strlen(name), NULL, 0, 0);
442 erofs_err(sb, "failed to get cookie for %s", name);
446 fscache_use_cookie(cookie, false);
449 * Allocate anonymous inode in global pseudo mount for shareable blobs,
450 * so that they are accessible among erofs fs instances.
452 isb = flags & EROFS_REG_COOKIE_SHARE ? erofs_pseudo_mnt->mnt_sb : sb;
453 inode = new_inode(isb);
455 erofs_err(sb, "failed to get anon inode for %s", name);
460 inode->i_size = OFFSET_MAX;
461 inode->i_mapping->a_ops = &erofs_fscache_meta_aops;
462 mapping_set_gfp_mask(inode->i_mapping, GFP_NOFS);
463 inode->i_blkbits = EROFS_SB(sb)->blkszbits;
464 inode->i_private = ctx;
466 ctx->cookie = cookie;
471 fscache_unuse_cookie(cookie, NULL, NULL);
472 fscache_relinquish_cookie(cookie, false);
478 static void erofs_fscache_relinquish_cookie(struct erofs_fscache *ctx)
480 fscache_unuse_cookie(ctx->cookie, NULL, NULL);
481 fscache_relinquish_cookie(ctx->cookie, false);
487 static struct erofs_fscache *erofs_domain_init_cookie(struct super_block *sb,
488 char *name, unsigned int flags)
490 struct erofs_fscache *ctx;
491 struct erofs_domain *domain = EROFS_SB(sb)->domain;
493 ctx = erofs_fscache_acquire_cookie(sb, name, flags);
497 ctx->name = kstrdup(name, GFP_KERNEL);
499 erofs_fscache_relinquish_cookie(ctx);
500 return ERR_PTR(-ENOMEM);
503 refcount_inc(&domain->ref);
504 ctx->domain = domain;
505 list_add(&ctx->node, &erofs_domain_cookies_list);
509 static struct erofs_fscache *erofs_domain_register_cookie(struct super_block *sb,
510 char *name, unsigned int flags)
512 struct erofs_fscache *ctx;
513 struct erofs_domain *domain = EROFS_SB(sb)->domain;
515 flags |= EROFS_REG_COOKIE_SHARE;
516 mutex_lock(&erofs_domain_cookies_lock);
517 list_for_each_entry(ctx, &erofs_domain_cookies_list, node) {
518 if (ctx->domain != domain || strcmp(ctx->name, name))
520 if (!(flags & EROFS_REG_COOKIE_NEED_NOEXIST)) {
521 refcount_inc(&ctx->ref);
523 erofs_err(sb, "%s already exists in domain %s", name,
525 ctx = ERR_PTR(-EEXIST);
527 mutex_unlock(&erofs_domain_cookies_lock);
530 ctx = erofs_domain_init_cookie(sb, name, flags);
531 mutex_unlock(&erofs_domain_cookies_lock);
535 struct erofs_fscache *erofs_fscache_register_cookie(struct super_block *sb,
539 if (EROFS_SB(sb)->domain_id)
540 return erofs_domain_register_cookie(sb, name, flags);
541 return erofs_fscache_acquire_cookie(sb, name, flags);
544 void erofs_fscache_unregister_cookie(struct erofs_fscache *ctx)
546 struct erofs_domain *domain = NULL;
551 return erofs_fscache_relinquish_cookie(ctx);
553 mutex_lock(&erofs_domain_cookies_lock);
554 if (refcount_dec_and_test(&ctx->ref)) {
555 domain = ctx->domain;
556 list_del(&ctx->node);
557 erofs_fscache_relinquish_cookie(ctx);
559 mutex_unlock(&erofs_domain_cookies_lock);
561 erofs_fscache_domain_put(domain);
564 int erofs_fscache_register_fs(struct super_block *sb)
567 struct erofs_sb_info *sbi = EROFS_SB(sb);
568 struct erofs_fscache *fscache;
569 unsigned int flags = 0;
572 ret = erofs_fscache_register_domain(sb);
574 ret = erofs_fscache_register_volume(sb);
579 * When shared domain is enabled, using NEED_NOEXIST to guarantee
580 * the primary data blob (aka fsid) is unique in the shared domain.
582 * For non-shared-domain case, fscache_acquire_volume() invoked by
583 * erofs_fscache_register_volume() has already guaranteed
584 * the uniqueness of primary data blob.
586 * Acquired domain/volume will be relinquished in kill_sb() on error.
589 flags |= EROFS_REG_COOKIE_NEED_NOEXIST;
590 fscache = erofs_fscache_register_cookie(sb, sbi->fsid, flags);
592 return PTR_ERR(fscache);
594 sbi->s_fscache = fscache;
598 void erofs_fscache_unregister_fs(struct super_block *sb)
600 struct erofs_sb_info *sbi = EROFS_SB(sb);
602 erofs_fscache_unregister_cookie(sbi->s_fscache);
605 erofs_fscache_domain_put(sbi->domain);
607 fscache_relinquish_volume(sbi->volume, NULL, false);
609 sbi->s_fscache = NULL;