Merge tag 'mm-nonmm-stable-2023-06-24-19-23' of git://git.kernel.org/pub/scm/linux...
[platform/kernel/linux-starfive.git] / fs / erofs / zmap.c
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Copyright (C) 2018-2019 HUAWEI, Inc.
4  *             https://www.huawei.com/
5  */
6 #include "internal.h"
7 #include <asm/unaligned.h>
8 #include <trace/events/erofs.h>
9
10 struct z_erofs_maprecorder {
11         struct inode *inode;
12         struct erofs_map_blocks *map;
13         void *kaddr;
14
15         unsigned long lcn;
16         /* compression extent information gathered */
17         u8  type, headtype;
18         u16 clusterofs;
19         u16 delta[2];
20         erofs_blk_t pblk, compressedblks;
21         erofs_off_t nextpackoff;
22         bool partialref;
23 };
24
25 static int z_erofs_load_full_lcluster(struct z_erofs_maprecorder *m,
26                                       unsigned long lcn)
27 {
28         struct inode *const inode = m->inode;
29         struct erofs_inode *const vi = EROFS_I(inode);
30         const erofs_off_t pos = Z_EROFS_FULL_INDEX_ALIGN(erofs_iloc(inode) +
31                         vi->inode_isize + vi->xattr_isize) +
32                         lcn * sizeof(struct z_erofs_lcluster_index);
33         struct z_erofs_lcluster_index *di;
34         unsigned int advise, type;
35
36         m->kaddr = erofs_read_metabuf(&m->map->buf, inode->i_sb,
37                                       erofs_blknr(inode->i_sb, pos), EROFS_KMAP);
38         if (IS_ERR(m->kaddr))
39                 return PTR_ERR(m->kaddr);
40
41         m->nextpackoff = pos + sizeof(struct z_erofs_lcluster_index);
42         m->lcn = lcn;
43         di = m->kaddr + erofs_blkoff(inode->i_sb, pos);
44
45         advise = le16_to_cpu(di->di_advise);
46         type = (advise >> Z_EROFS_LI_LCLUSTER_TYPE_BIT) &
47                 ((1 << Z_EROFS_LI_LCLUSTER_TYPE_BITS) - 1);
48         switch (type) {
49         case Z_EROFS_LCLUSTER_TYPE_NONHEAD:
50                 m->clusterofs = 1 << vi->z_logical_clusterbits;
51                 m->delta[0] = le16_to_cpu(di->di_u.delta[0]);
52                 if (m->delta[0] & Z_EROFS_LI_D0_CBLKCNT) {
53                         if (!(vi->z_advise & (Z_EROFS_ADVISE_BIG_PCLUSTER_1 |
54                                         Z_EROFS_ADVISE_BIG_PCLUSTER_2))) {
55                                 DBG_BUGON(1);
56                                 return -EFSCORRUPTED;
57                         }
58                         m->compressedblks = m->delta[0] &
59                                 ~Z_EROFS_LI_D0_CBLKCNT;
60                         m->delta[0] = 1;
61                 }
62                 m->delta[1] = le16_to_cpu(di->di_u.delta[1]);
63                 break;
64         case Z_EROFS_LCLUSTER_TYPE_PLAIN:
65         case Z_EROFS_LCLUSTER_TYPE_HEAD1:
66         case Z_EROFS_LCLUSTER_TYPE_HEAD2:
67                 if (advise & Z_EROFS_LI_PARTIAL_REF)
68                         m->partialref = true;
69                 m->clusterofs = le16_to_cpu(di->di_clusterofs);
70                 if (m->clusterofs >= 1 << vi->z_logical_clusterbits) {
71                         DBG_BUGON(1);
72                         return -EFSCORRUPTED;
73                 }
74                 m->pblk = le32_to_cpu(di->di_u.blkaddr);
75                 break;
76         default:
77                 DBG_BUGON(1);
78                 return -EOPNOTSUPP;
79         }
80         m->type = type;
81         return 0;
82 }
83
84 static unsigned int decode_compactedbits(unsigned int lobits,
85                                          unsigned int lomask,
86                                          u8 *in, unsigned int pos, u8 *type)
87 {
88         const unsigned int v = get_unaligned_le32(in + pos / 8) >> (pos & 7);
89         const unsigned int lo = v & lomask;
90
91         *type = (v >> lobits) & 3;
92         return lo;
93 }
94
95 static int get_compacted_la_distance(unsigned int lclusterbits,
96                                      unsigned int encodebits,
97                                      unsigned int vcnt, u8 *in, int i)
98 {
99         const unsigned int lomask = (1 << lclusterbits) - 1;
100         unsigned int lo, d1 = 0;
101         u8 type;
102
103         DBG_BUGON(i >= vcnt);
104
105         do {
106                 lo = decode_compactedbits(lclusterbits, lomask,
107                                           in, encodebits * i, &type);
108
109                 if (type != Z_EROFS_LCLUSTER_TYPE_NONHEAD)
110                         return d1;
111                 ++d1;
112         } while (++i < vcnt);
113
114         /* vcnt - 1 (Z_EROFS_LCLUSTER_TYPE_NONHEAD) item */
115         if (!(lo & Z_EROFS_LI_D0_CBLKCNT))
116                 d1 += lo - 1;
117         return d1;
118 }
119
120 static int unpack_compacted_index(struct z_erofs_maprecorder *m,
121                                   unsigned int amortizedshift,
122                                   erofs_off_t pos, bool lookahead)
123 {
124         struct erofs_inode *const vi = EROFS_I(m->inode);
125         const unsigned int lclusterbits = vi->z_logical_clusterbits;
126         const unsigned int lomask = (1 << lclusterbits) - 1;
127         unsigned int vcnt, base, lo, encodebits, nblk, eofs;
128         int i;
129         u8 *in, type;
130         bool big_pcluster;
131
132         if (1 << amortizedshift == 4 && lclusterbits <= 14)
133                 vcnt = 2;
134         else if (1 << amortizedshift == 2 && lclusterbits == 12)
135                 vcnt = 16;
136         else
137                 return -EOPNOTSUPP;
138
139         /* it doesn't equal to round_up(..) */
140         m->nextpackoff = round_down(pos, vcnt << amortizedshift) +
141                          (vcnt << amortizedshift);
142         big_pcluster = vi->z_advise & Z_EROFS_ADVISE_BIG_PCLUSTER_1;
143         encodebits = ((vcnt << amortizedshift) - sizeof(__le32)) * 8 / vcnt;
144         eofs = erofs_blkoff(m->inode->i_sb, pos);
145         base = round_down(eofs, vcnt << amortizedshift);
146         in = m->kaddr + base;
147
148         i = (eofs - base) >> amortizedshift;
149
150         lo = decode_compactedbits(lclusterbits, lomask,
151                                   in, encodebits * i, &type);
152         m->type = type;
153         if (type == Z_EROFS_LCLUSTER_TYPE_NONHEAD) {
154                 m->clusterofs = 1 << lclusterbits;
155
156                 /* figure out lookahead_distance: delta[1] if needed */
157                 if (lookahead)
158                         m->delta[1] = get_compacted_la_distance(lclusterbits,
159                                                 encodebits, vcnt, in, i);
160                 if (lo & Z_EROFS_LI_D0_CBLKCNT) {
161                         if (!big_pcluster) {
162                                 DBG_BUGON(1);
163                                 return -EFSCORRUPTED;
164                         }
165                         m->compressedblks = lo & ~Z_EROFS_LI_D0_CBLKCNT;
166                         m->delta[0] = 1;
167                         return 0;
168                 } else if (i + 1 != (int)vcnt) {
169                         m->delta[0] = lo;
170                         return 0;
171                 }
172                 /*
173                  * since the last lcluster in the pack is special,
174                  * of which lo saves delta[1] rather than delta[0].
175                  * Hence, get delta[0] by the previous lcluster indirectly.
176                  */
177                 lo = decode_compactedbits(lclusterbits, lomask,
178                                           in, encodebits * (i - 1), &type);
179                 if (type != Z_EROFS_LCLUSTER_TYPE_NONHEAD)
180                         lo = 0;
181                 else if (lo & Z_EROFS_LI_D0_CBLKCNT)
182                         lo = 1;
183                 m->delta[0] = lo + 1;
184                 return 0;
185         }
186         m->clusterofs = lo;
187         m->delta[0] = 0;
188         /* figout out blkaddr (pblk) for HEAD lclusters */
189         if (!big_pcluster) {
190                 nblk = 1;
191                 while (i > 0) {
192                         --i;
193                         lo = decode_compactedbits(lclusterbits, lomask,
194                                                   in, encodebits * i, &type);
195                         if (type == Z_EROFS_LCLUSTER_TYPE_NONHEAD)
196                                 i -= lo;
197
198                         if (i >= 0)
199                                 ++nblk;
200                 }
201         } else {
202                 nblk = 0;
203                 while (i > 0) {
204                         --i;
205                         lo = decode_compactedbits(lclusterbits, lomask,
206                                                   in, encodebits * i, &type);
207                         if (type == Z_EROFS_LCLUSTER_TYPE_NONHEAD) {
208                                 if (lo & Z_EROFS_LI_D0_CBLKCNT) {
209                                         --i;
210                                         nblk += lo & ~Z_EROFS_LI_D0_CBLKCNT;
211                                         continue;
212                                 }
213                                 /* bigpcluster shouldn't have plain d0 == 1 */
214                                 if (lo <= 1) {
215                                         DBG_BUGON(1);
216                                         return -EFSCORRUPTED;
217                                 }
218                                 i -= lo - 2;
219                                 continue;
220                         }
221                         ++nblk;
222                 }
223         }
224         in += (vcnt << amortizedshift) - sizeof(__le32);
225         m->pblk = le32_to_cpu(*(__le32 *)in) + nblk;
226         return 0;
227 }
228
229 static int z_erofs_load_compact_lcluster(struct z_erofs_maprecorder *m,
230                                          unsigned long lcn, bool lookahead)
231 {
232         struct inode *const inode = m->inode;
233         struct erofs_inode *const vi = EROFS_I(inode);
234         const erofs_off_t ebase = sizeof(struct z_erofs_map_header) +
235                 ALIGN(erofs_iloc(inode) + vi->inode_isize + vi->xattr_isize, 8);
236         unsigned int totalidx = erofs_iblks(inode);
237         unsigned int compacted_4b_initial, compacted_2b;
238         unsigned int amortizedshift;
239         erofs_off_t pos;
240
241         if (lcn >= totalidx)
242                 return -EINVAL;
243
244         m->lcn = lcn;
245         /* used to align to 32-byte (compacted_2b) alignment */
246         compacted_4b_initial = (32 - ebase % 32) / 4;
247         if (compacted_4b_initial == 32 / 4)
248                 compacted_4b_initial = 0;
249
250         if ((vi->z_advise & Z_EROFS_ADVISE_COMPACTED_2B) &&
251             compacted_4b_initial < totalidx)
252                 compacted_2b = rounddown(totalidx - compacted_4b_initial, 16);
253         else
254                 compacted_2b = 0;
255
256         pos = ebase;
257         if (lcn < compacted_4b_initial) {
258                 amortizedshift = 2;
259                 goto out;
260         }
261         pos += compacted_4b_initial * 4;
262         lcn -= compacted_4b_initial;
263
264         if (lcn < compacted_2b) {
265                 amortizedshift = 1;
266                 goto out;
267         }
268         pos += compacted_2b * 2;
269         lcn -= compacted_2b;
270         amortizedshift = 2;
271 out:
272         pos += lcn * (1 << amortizedshift);
273         m->kaddr = erofs_read_metabuf(&m->map->buf, inode->i_sb,
274                                       erofs_blknr(inode->i_sb, pos), EROFS_KMAP);
275         if (IS_ERR(m->kaddr))
276                 return PTR_ERR(m->kaddr);
277         return unpack_compacted_index(m, amortizedshift, pos, lookahead);
278 }
279
280 static int z_erofs_load_lcluster_from_disk(struct z_erofs_maprecorder *m,
281                                            unsigned int lcn, bool lookahead)
282 {
283         switch (EROFS_I(m->inode)->datalayout) {
284         case EROFS_INODE_COMPRESSED_FULL:
285                 return z_erofs_load_full_lcluster(m, lcn);
286         case EROFS_INODE_COMPRESSED_COMPACT:
287                 return z_erofs_load_compact_lcluster(m, lcn, lookahead);
288         default:
289                 return -EINVAL;
290         }
291 }
292
293 static int z_erofs_extent_lookback(struct z_erofs_maprecorder *m,
294                                    unsigned int lookback_distance)
295 {
296         struct super_block *sb = m->inode->i_sb;
297         struct erofs_inode *const vi = EROFS_I(m->inode);
298         const unsigned int lclusterbits = vi->z_logical_clusterbits;
299
300         while (m->lcn >= lookback_distance) {
301                 unsigned long lcn = m->lcn - lookback_distance;
302                 int err;
303
304                 err = z_erofs_load_lcluster_from_disk(m, lcn, false);
305                 if (err)
306                         return err;
307
308                 switch (m->type) {
309                 case Z_EROFS_LCLUSTER_TYPE_NONHEAD:
310                         lookback_distance = m->delta[0];
311                         if (!lookback_distance)
312                                 goto err_bogus;
313                         continue;
314                 case Z_EROFS_LCLUSTER_TYPE_PLAIN:
315                 case Z_EROFS_LCLUSTER_TYPE_HEAD1:
316                 case Z_EROFS_LCLUSTER_TYPE_HEAD2:
317                         m->headtype = m->type;
318                         m->map->m_la = (lcn << lclusterbits) | m->clusterofs;
319                         return 0;
320                 default:
321                         erofs_err(sb, "unknown type %u @ lcn %lu of nid %llu",
322                                   m->type, lcn, vi->nid);
323                         DBG_BUGON(1);
324                         return -EOPNOTSUPP;
325                 }
326         }
327 err_bogus:
328         erofs_err(sb, "bogus lookback distance %u @ lcn %lu of nid %llu",
329                   lookback_distance, m->lcn, vi->nid);
330         DBG_BUGON(1);
331         return -EFSCORRUPTED;
332 }
333
334 static int z_erofs_get_extent_compressedlen(struct z_erofs_maprecorder *m,
335                                             unsigned int initial_lcn)
336 {
337         struct super_block *sb = m->inode->i_sb;
338         struct erofs_inode *const vi = EROFS_I(m->inode);
339         struct erofs_map_blocks *const map = m->map;
340         const unsigned int lclusterbits = vi->z_logical_clusterbits;
341         unsigned long lcn;
342         int err;
343
344         DBG_BUGON(m->type != Z_EROFS_LCLUSTER_TYPE_PLAIN &&
345                   m->type != Z_EROFS_LCLUSTER_TYPE_HEAD1 &&
346                   m->type != Z_EROFS_LCLUSTER_TYPE_HEAD2);
347         DBG_BUGON(m->type != m->headtype);
348
349         if (m->headtype == Z_EROFS_LCLUSTER_TYPE_PLAIN ||
350             ((m->headtype == Z_EROFS_LCLUSTER_TYPE_HEAD1) &&
351              !(vi->z_advise & Z_EROFS_ADVISE_BIG_PCLUSTER_1)) ||
352             ((m->headtype == Z_EROFS_LCLUSTER_TYPE_HEAD2) &&
353              !(vi->z_advise & Z_EROFS_ADVISE_BIG_PCLUSTER_2))) {
354                 map->m_plen = 1ULL << lclusterbits;
355                 return 0;
356         }
357         lcn = m->lcn + 1;
358         if (m->compressedblks)
359                 goto out;
360
361         err = z_erofs_load_lcluster_from_disk(m, lcn, false);
362         if (err)
363                 return err;
364
365         /*
366          * If the 1st NONHEAD lcluster has already been handled initially w/o
367          * valid compressedblks, which means at least it mustn't be CBLKCNT, or
368          * an internal implemenatation error is detected.
369          *
370          * The following code can also handle it properly anyway, but let's
371          * BUG_ON in the debugging mode only for developers to notice that.
372          */
373         DBG_BUGON(lcn == initial_lcn &&
374                   m->type == Z_EROFS_LCLUSTER_TYPE_NONHEAD);
375
376         switch (m->type) {
377         case Z_EROFS_LCLUSTER_TYPE_PLAIN:
378         case Z_EROFS_LCLUSTER_TYPE_HEAD1:
379         case Z_EROFS_LCLUSTER_TYPE_HEAD2:
380                 /*
381                  * if the 1st NONHEAD lcluster is actually PLAIN or HEAD type
382                  * rather than CBLKCNT, it's a 1 lcluster-sized pcluster.
383                  */
384                 m->compressedblks = 1 << (lclusterbits - sb->s_blocksize_bits);
385                 break;
386         case Z_EROFS_LCLUSTER_TYPE_NONHEAD:
387                 if (m->delta[0] != 1)
388                         goto err_bonus_cblkcnt;
389                 if (m->compressedblks)
390                         break;
391                 fallthrough;
392         default:
393                 erofs_err(sb, "cannot found CBLKCNT @ lcn %lu of nid %llu", lcn,
394                           vi->nid);
395                 DBG_BUGON(1);
396                 return -EFSCORRUPTED;
397         }
398 out:
399         map->m_plen = erofs_pos(sb, m->compressedblks);
400         return 0;
401 err_bonus_cblkcnt:
402         erofs_err(sb, "bogus CBLKCNT @ lcn %lu of nid %llu", lcn, vi->nid);
403         DBG_BUGON(1);
404         return -EFSCORRUPTED;
405 }
406
407 static int z_erofs_get_extent_decompressedlen(struct z_erofs_maprecorder *m)
408 {
409         struct inode *inode = m->inode;
410         struct erofs_inode *vi = EROFS_I(inode);
411         struct erofs_map_blocks *map = m->map;
412         unsigned int lclusterbits = vi->z_logical_clusterbits;
413         u64 lcn = m->lcn, headlcn = map->m_la >> lclusterbits;
414         int err;
415
416         do {
417                 /* handle the last EOF pcluster (no next HEAD lcluster) */
418                 if ((lcn << lclusterbits) >= inode->i_size) {
419                         map->m_llen = inode->i_size - map->m_la;
420                         return 0;
421                 }
422
423                 err = z_erofs_load_lcluster_from_disk(m, lcn, true);
424                 if (err)
425                         return err;
426
427                 if (m->type == Z_EROFS_LCLUSTER_TYPE_NONHEAD) {
428                         DBG_BUGON(!m->delta[1] &&
429                                   m->clusterofs != 1 << lclusterbits);
430                 } else if (m->type == Z_EROFS_LCLUSTER_TYPE_PLAIN ||
431                            m->type == Z_EROFS_LCLUSTER_TYPE_HEAD1 ||
432                            m->type == Z_EROFS_LCLUSTER_TYPE_HEAD2) {
433                         /* go on until the next HEAD lcluster */
434                         if (lcn != headlcn)
435                                 break;
436                         m->delta[1] = 1;
437                 } else {
438                         erofs_err(inode->i_sb, "unknown type %u @ lcn %llu of nid %llu",
439                                   m->type, lcn, vi->nid);
440                         DBG_BUGON(1);
441                         return -EOPNOTSUPP;
442                 }
443                 lcn += m->delta[1];
444         } while (m->delta[1]);
445
446         map->m_llen = (lcn << lclusterbits) + m->clusterofs - map->m_la;
447         return 0;
448 }
449
450 static int z_erofs_do_map_blocks(struct inode *inode,
451                                  struct erofs_map_blocks *map, int flags)
452 {
453         struct erofs_inode *const vi = EROFS_I(inode);
454         bool ztailpacking = vi->z_advise & Z_EROFS_ADVISE_INLINE_PCLUSTER;
455         bool fragment = vi->z_advise & Z_EROFS_ADVISE_FRAGMENT_PCLUSTER;
456         struct z_erofs_maprecorder m = {
457                 .inode = inode,
458                 .map = map,
459         };
460         int err = 0;
461         unsigned int lclusterbits, endoff;
462         unsigned long initial_lcn;
463         unsigned long long ofs, end;
464
465         lclusterbits = vi->z_logical_clusterbits;
466         ofs = flags & EROFS_GET_BLOCKS_FINDTAIL ? inode->i_size - 1 : map->m_la;
467         initial_lcn = ofs >> lclusterbits;
468         endoff = ofs & ((1 << lclusterbits) - 1);
469
470         err = z_erofs_load_lcluster_from_disk(&m, initial_lcn, false);
471         if (err)
472                 goto unmap_out;
473
474         if (ztailpacking && (flags & EROFS_GET_BLOCKS_FINDTAIL))
475                 vi->z_idataoff = m.nextpackoff;
476
477         map->m_flags = EROFS_MAP_MAPPED | EROFS_MAP_ENCODED;
478         end = (m.lcn + 1ULL) << lclusterbits;
479
480         switch (m.type) {
481         case Z_EROFS_LCLUSTER_TYPE_PLAIN:
482         case Z_EROFS_LCLUSTER_TYPE_HEAD1:
483         case Z_EROFS_LCLUSTER_TYPE_HEAD2:
484                 if (endoff >= m.clusterofs) {
485                         m.headtype = m.type;
486                         map->m_la = (m.lcn << lclusterbits) | m.clusterofs;
487                         /*
488                          * For ztailpacking files, in order to inline data more
489                          * effectively, special EOF lclusters are now supported
490                          * which can have three parts at most.
491                          */
492                         if (ztailpacking && end > inode->i_size)
493                                 end = inode->i_size;
494                         break;
495                 }
496                 /* m.lcn should be >= 1 if endoff < m.clusterofs */
497                 if (!m.lcn) {
498                         erofs_err(inode->i_sb,
499                                   "invalid logical cluster 0 at nid %llu",
500                                   vi->nid);
501                         err = -EFSCORRUPTED;
502                         goto unmap_out;
503                 }
504                 end = (m.lcn << lclusterbits) | m.clusterofs;
505                 map->m_flags |= EROFS_MAP_FULL_MAPPED;
506                 m.delta[0] = 1;
507                 fallthrough;
508         case Z_EROFS_LCLUSTER_TYPE_NONHEAD:
509                 /* get the corresponding first chunk */
510                 err = z_erofs_extent_lookback(&m, m.delta[0]);
511                 if (err)
512                         goto unmap_out;
513                 break;
514         default:
515                 erofs_err(inode->i_sb,
516                           "unknown type %u @ offset %llu of nid %llu",
517                           m.type, ofs, vi->nid);
518                 err = -EOPNOTSUPP;
519                 goto unmap_out;
520         }
521         if (m.partialref)
522                 map->m_flags |= EROFS_MAP_PARTIAL_REF;
523         map->m_llen = end - map->m_la;
524
525         if (flags & EROFS_GET_BLOCKS_FINDTAIL) {
526                 vi->z_tailextent_headlcn = m.lcn;
527                 /* for non-compact indexes, fragmentoff is 64 bits */
528                 if (fragment && vi->datalayout == EROFS_INODE_COMPRESSED_FULL)
529                         vi->z_fragmentoff |= (u64)m.pblk << 32;
530         }
531         if (ztailpacking && m.lcn == vi->z_tailextent_headlcn) {
532                 map->m_flags |= EROFS_MAP_META;
533                 map->m_pa = vi->z_idataoff;
534                 map->m_plen = vi->z_idata_size;
535         } else if (fragment && m.lcn == vi->z_tailextent_headlcn) {
536                 map->m_flags |= EROFS_MAP_FRAGMENT;
537         } else {
538                 map->m_pa = erofs_pos(inode->i_sb, m.pblk);
539                 err = z_erofs_get_extent_compressedlen(&m, initial_lcn);
540                 if (err)
541                         goto unmap_out;
542         }
543
544         if (m.headtype == Z_EROFS_LCLUSTER_TYPE_PLAIN) {
545                 if (map->m_llen > map->m_plen) {
546                         DBG_BUGON(1);
547                         err = -EFSCORRUPTED;
548                         goto unmap_out;
549                 }
550                 if (vi->z_advise & Z_EROFS_ADVISE_INTERLACED_PCLUSTER)
551                         map->m_algorithmformat =
552                                 Z_EROFS_COMPRESSION_INTERLACED;
553                 else
554                         map->m_algorithmformat =
555                                 Z_EROFS_COMPRESSION_SHIFTED;
556         } else if (m.headtype == Z_EROFS_LCLUSTER_TYPE_HEAD2) {
557                 map->m_algorithmformat = vi->z_algorithmtype[1];
558         } else {
559                 map->m_algorithmformat = vi->z_algorithmtype[0];
560         }
561
562         if ((flags & EROFS_GET_BLOCKS_FIEMAP) ||
563             ((flags & EROFS_GET_BLOCKS_READMORE) &&
564              map->m_algorithmformat == Z_EROFS_COMPRESSION_LZMA &&
565              map->m_llen >= i_blocksize(inode))) {
566                 err = z_erofs_get_extent_decompressedlen(&m);
567                 if (!err)
568                         map->m_flags |= EROFS_MAP_FULL_MAPPED;
569         }
570
571 unmap_out:
572         erofs_unmap_metabuf(&m.map->buf);
573         return err;
574 }
575
576 static int z_erofs_fill_inode_lazy(struct inode *inode)
577 {
578         struct erofs_inode *const vi = EROFS_I(inode);
579         struct super_block *const sb = inode->i_sb;
580         int err, headnr;
581         erofs_off_t pos;
582         struct erofs_buf buf = __EROFS_BUF_INITIALIZER;
583         void *kaddr;
584         struct z_erofs_map_header *h;
585
586         if (test_bit(EROFS_I_Z_INITED_BIT, &vi->flags)) {
587                 /*
588                  * paired with smp_mb() at the end of the function to ensure
589                  * fields will only be observed after the bit is set.
590                  */
591                 smp_mb();
592                 return 0;
593         }
594
595         if (wait_on_bit_lock(&vi->flags, EROFS_I_BL_Z_BIT, TASK_KILLABLE))
596                 return -ERESTARTSYS;
597
598         err = 0;
599         if (test_bit(EROFS_I_Z_INITED_BIT, &vi->flags))
600                 goto out_unlock;
601
602         pos = ALIGN(erofs_iloc(inode) + vi->inode_isize + vi->xattr_isize, 8);
603         kaddr = erofs_read_metabuf(&buf, sb, erofs_blknr(sb, pos), EROFS_KMAP);
604         if (IS_ERR(kaddr)) {
605                 err = PTR_ERR(kaddr);
606                 goto out_unlock;
607         }
608
609         h = kaddr + erofs_blkoff(sb, pos);
610         /*
611          * if the highest bit of the 8-byte map header is set, the whole file
612          * is stored in the packed inode. The rest bits keeps z_fragmentoff.
613          */
614         if (h->h_clusterbits >> Z_EROFS_FRAGMENT_INODE_BIT) {
615                 vi->z_advise = Z_EROFS_ADVISE_FRAGMENT_PCLUSTER;
616                 vi->z_fragmentoff = le64_to_cpu(*(__le64 *)h) ^ (1ULL << 63);
617                 vi->z_tailextent_headlcn = 0;
618                 goto done;
619         }
620         vi->z_advise = le16_to_cpu(h->h_advise);
621         vi->z_algorithmtype[0] = h->h_algorithmtype & 15;
622         vi->z_algorithmtype[1] = h->h_algorithmtype >> 4;
623
624         headnr = 0;
625         if (vi->z_algorithmtype[0] >= Z_EROFS_COMPRESSION_MAX ||
626             vi->z_algorithmtype[++headnr] >= Z_EROFS_COMPRESSION_MAX) {
627                 erofs_err(sb, "unknown HEAD%u format %u for nid %llu, please upgrade kernel",
628                           headnr + 1, vi->z_algorithmtype[headnr], vi->nid);
629                 err = -EOPNOTSUPP;
630                 goto out_put_metabuf;
631         }
632
633         vi->z_logical_clusterbits = sb->s_blocksize_bits + (h->h_clusterbits & 7);
634         if (!erofs_sb_has_big_pcluster(EROFS_SB(sb)) &&
635             vi->z_advise & (Z_EROFS_ADVISE_BIG_PCLUSTER_1 |
636                             Z_EROFS_ADVISE_BIG_PCLUSTER_2)) {
637                 erofs_err(sb, "per-inode big pcluster without sb feature for nid %llu",
638                           vi->nid);
639                 err = -EFSCORRUPTED;
640                 goto out_put_metabuf;
641         }
642         if (vi->datalayout == EROFS_INODE_COMPRESSED_COMPACT &&
643             !(vi->z_advise & Z_EROFS_ADVISE_BIG_PCLUSTER_1) ^
644             !(vi->z_advise & Z_EROFS_ADVISE_BIG_PCLUSTER_2)) {
645                 erofs_err(sb, "big pcluster head1/2 of compact indexes should be consistent for nid %llu",
646                           vi->nid);
647                 err = -EFSCORRUPTED;
648                 goto out_put_metabuf;
649         }
650
651         if (vi->z_advise & Z_EROFS_ADVISE_INLINE_PCLUSTER) {
652                 struct erofs_map_blocks map = {
653                         .buf = __EROFS_BUF_INITIALIZER
654                 };
655
656                 vi->z_idata_size = le16_to_cpu(h->h_idata_size);
657                 err = z_erofs_do_map_blocks(inode, &map,
658                                             EROFS_GET_BLOCKS_FINDTAIL);
659                 erofs_put_metabuf(&map.buf);
660
661                 if (!map.m_plen ||
662                     erofs_blkoff(sb, map.m_pa) + map.m_plen > sb->s_blocksize) {
663                         erofs_err(sb, "invalid tail-packing pclustersize %llu",
664                                   map.m_plen);
665                         err = -EFSCORRUPTED;
666                 }
667                 if (err < 0)
668                         goto out_put_metabuf;
669         }
670
671         if (vi->z_advise & Z_EROFS_ADVISE_FRAGMENT_PCLUSTER &&
672             !(h->h_clusterbits >> Z_EROFS_FRAGMENT_INODE_BIT)) {
673                 struct erofs_map_blocks map = {
674                         .buf = __EROFS_BUF_INITIALIZER
675                 };
676
677                 vi->z_fragmentoff = le32_to_cpu(h->h_fragmentoff);
678                 err = z_erofs_do_map_blocks(inode, &map,
679                                             EROFS_GET_BLOCKS_FINDTAIL);
680                 erofs_put_metabuf(&map.buf);
681                 if (err < 0)
682                         goto out_put_metabuf;
683         }
684 done:
685         /* paired with smp_mb() at the beginning of the function */
686         smp_mb();
687         set_bit(EROFS_I_Z_INITED_BIT, &vi->flags);
688 out_put_metabuf:
689         erofs_put_metabuf(&buf);
690 out_unlock:
691         clear_and_wake_up_bit(EROFS_I_BL_Z_BIT, &vi->flags);
692         return err;
693 }
694
695 int z_erofs_map_blocks_iter(struct inode *inode, struct erofs_map_blocks *map,
696                             int flags)
697 {
698         struct erofs_inode *const vi = EROFS_I(inode);
699         int err = 0;
700
701         trace_z_erofs_map_blocks_iter_enter(inode, map, flags);
702
703         /* when trying to read beyond EOF, leave it unmapped */
704         if (map->m_la >= inode->i_size) {
705                 map->m_llen = map->m_la + 1 - inode->i_size;
706                 map->m_la = inode->i_size;
707                 map->m_flags = 0;
708                 goto out;
709         }
710
711         err = z_erofs_fill_inode_lazy(inode);
712         if (err)
713                 goto out;
714
715         if ((vi->z_advise & Z_EROFS_ADVISE_FRAGMENT_PCLUSTER) &&
716             !vi->z_tailextent_headlcn) {
717                 map->m_la = 0;
718                 map->m_llen = inode->i_size;
719                 map->m_flags = EROFS_MAP_MAPPED | EROFS_MAP_FULL_MAPPED |
720                                 EROFS_MAP_FRAGMENT;
721                 goto out;
722         }
723
724         err = z_erofs_do_map_blocks(inode, map, flags);
725 out:
726         trace_z_erofs_map_blocks_iter_exit(inode, map, flags, err);
727         return err;
728 }
729
730 static int z_erofs_iomap_begin_report(struct inode *inode, loff_t offset,
731                                 loff_t length, unsigned int flags,
732                                 struct iomap *iomap, struct iomap *srcmap)
733 {
734         int ret;
735         struct erofs_map_blocks map = { .m_la = offset };
736
737         ret = z_erofs_map_blocks_iter(inode, &map, EROFS_GET_BLOCKS_FIEMAP);
738         erofs_put_metabuf(&map.buf);
739         if (ret < 0)
740                 return ret;
741
742         iomap->bdev = inode->i_sb->s_bdev;
743         iomap->offset = map.m_la;
744         iomap->length = map.m_llen;
745         if (map.m_flags & EROFS_MAP_MAPPED) {
746                 iomap->type = IOMAP_MAPPED;
747                 iomap->addr = map.m_flags & EROFS_MAP_FRAGMENT ?
748                               IOMAP_NULL_ADDR : map.m_pa;
749         } else {
750                 iomap->type = IOMAP_HOLE;
751                 iomap->addr = IOMAP_NULL_ADDR;
752                 /*
753                  * No strict rule on how to describe extents for post EOF, yet
754                  * we need to do like below. Otherwise, iomap itself will get
755                  * into an endless loop on post EOF.
756                  *
757                  * Calculate the effective offset by subtracting extent start
758                  * (map.m_la) from the requested offset, and add it to length.
759                  * (NB: offset >= map.m_la always)
760                  */
761                 if (iomap->offset >= inode->i_size)
762                         iomap->length = length + offset - map.m_la;
763         }
764         iomap->flags = 0;
765         return 0;
766 }
767
768 const struct iomap_ops z_erofs_iomap_report_ops = {
769         .iomap_begin = z_erofs_iomap_begin_report,
770 };