Merge tag 'xilinx-for-v2021.01' of https://gitlab.denx.de/u-boot/custodians/u-boot...
[platform/kernel/u-boot.git] / fs / btrfs / disk-io.c
1 // SPDX-License-Identifier: GPL-2.0+
2 #include <common.h>
3 #include <fs_internal.h>
4 #include <uuid.h>
5 #include <memalign.h>
6 #include "kernel-shared/btrfs_tree.h"
7 #include "common/rbtree-utils.h"
8 #include "disk-io.h"
9 #include "ctree.h"
10 #include "btrfs.h"
11 #include "volumes.h"
12 #include "extent-io.h"
13 #include "crypto/hash.h"
14
15 /* specified errno for check_tree_block */
16 #define BTRFS_BAD_BYTENR                (-1)
17 #define BTRFS_BAD_FSID                  (-2)
18 #define BTRFS_BAD_LEVEL                 (-3)
19 #define BTRFS_BAD_NRITEMS               (-4)
20
21 /* Calculate max possible nritems for a leaf/node */
22 static u32 max_nritems(u8 level, u32 nodesize)
23 {
24
25         if (level == 0)
26                 return ((nodesize - sizeof(struct btrfs_header)) /
27                         sizeof(struct btrfs_item));
28         return ((nodesize - sizeof(struct btrfs_header)) /
29                 sizeof(struct btrfs_key_ptr));
30 }
31
32 static int check_tree_block(struct btrfs_fs_info *fs_info,
33                             struct extent_buffer *buf)
34 {
35
36         struct btrfs_fs_devices *fs_devices = fs_info->fs_devices;
37         u32 nodesize = fs_info->nodesize;
38         bool fsid_match = false;
39         int ret = BTRFS_BAD_FSID;
40
41         if (buf->start != btrfs_header_bytenr(buf))
42                 return BTRFS_BAD_BYTENR;
43         if (btrfs_header_level(buf) >= BTRFS_MAX_LEVEL)
44                 return BTRFS_BAD_LEVEL;
45         if (btrfs_header_nritems(buf) > max_nritems(btrfs_header_level(buf),
46                                                     nodesize))
47                 return BTRFS_BAD_NRITEMS;
48
49         /* Only leaf can be empty */
50         if (btrfs_header_nritems(buf) == 0 &&
51             btrfs_header_level(buf) != 0)
52                 return BTRFS_BAD_NRITEMS;
53
54         while (fs_devices) {
55                 /*
56                  * Checking the incompat flag is only valid for the current
57                  * fs. For seed devices it's forbidden to have their uuid
58                  * changed so reading ->fsid in this case is fine
59                  */
60                 if (fs_devices == fs_info->fs_devices &&
61                     btrfs_fs_incompat(fs_info, METADATA_UUID))
62                         fsid_match = !memcmp_extent_buffer(buf,
63                                                    fs_devices->metadata_uuid,
64                                                    btrfs_header_fsid(),
65                                                    BTRFS_FSID_SIZE);
66                 else
67                         fsid_match = !memcmp_extent_buffer(buf,
68                                                     fs_devices->fsid,
69                                                     btrfs_header_fsid(),
70                                                     BTRFS_FSID_SIZE);
71
72
73                 if (fsid_match) {
74                         ret = 0;
75                         break;
76                 }
77                 fs_devices = fs_devices->seed;
78         }
79         return ret;
80 }
81
82 static void print_tree_block_error(struct btrfs_fs_info *fs_info,
83                                 struct extent_buffer *eb,
84                                 int err)
85 {
86         char fs_uuid[BTRFS_UUID_UNPARSED_SIZE] = {'\0'};
87         char found_uuid[BTRFS_UUID_UNPARSED_SIZE] = {'\0'};
88         u8 buf[BTRFS_UUID_SIZE];
89
90         if (!err)
91                 return;
92
93         fprintf(stderr, "bad tree block %llu, ", eb->start);
94         switch (err) {
95         case BTRFS_BAD_FSID:
96                 read_extent_buffer(eb, buf, btrfs_header_fsid(),
97                                    BTRFS_UUID_SIZE);
98                 uuid_unparse(buf, found_uuid);
99                 uuid_unparse(fs_info->fs_devices->metadata_uuid, fs_uuid);
100                 fprintf(stderr, "fsid mismatch, want=%s, have=%s\n",
101                         fs_uuid, found_uuid);
102                 break;
103         case BTRFS_BAD_BYTENR:
104                 fprintf(stderr, "bytenr mismatch, want=%llu, have=%llu\n",
105                         eb->start, btrfs_header_bytenr(eb));
106                 break;
107         case BTRFS_BAD_LEVEL:
108                 fprintf(stderr, "bad level, %u > %d\n",
109                         btrfs_header_level(eb), BTRFS_MAX_LEVEL);
110                 break;
111         case BTRFS_BAD_NRITEMS:
112                 fprintf(stderr, "invalid nr_items: %u\n",
113                         btrfs_header_nritems(eb));
114                 break;
115         }
116 }
117
118 int btrfs_csum_data(u16 csum_type, const u8 *data, u8 *out, size_t len)
119 {
120         memset(out, 0, BTRFS_CSUM_SIZE);
121
122         switch (csum_type) {
123         case BTRFS_CSUM_TYPE_CRC32:
124                 return hash_crc32c(data, len, out);
125         case BTRFS_CSUM_TYPE_XXHASH:
126                 return hash_xxhash(data, len, out);
127         case BTRFS_CSUM_TYPE_SHA256:
128                 return hash_sha256(data, len, out);
129         default:
130                 printf("Unknown csum type %d\n", csum_type);
131                 return -EINVAL;
132         }
133 }
134
135 /*
136  * Check if the super is valid:
137  * - nodesize/sectorsize - minimum, maximum, alignment
138  * - tree block starts   - alignment
139  * - number of devices   - something sane
140  * - sys array size      - maximum
141  */
142 static int btrfs_check_super(struct btrfs_super_block *sb)
143 {
144         u8 result[BTRFS_CSUM_SIZE];
145         u16 csum_type;
146         int csum_size;
147         u8 *metadata_uuid;
148
149         if (btrfs_super_magic(sb) != BTRFS_MAGIC)
150                 return -EIO;
151
152         csum_type = btrfs_super_csum_type(sb);
153         if (csum_type >= btrfs_super_num_csums()) {
154                 error("unsupported checksum algorithm %u", csum_type);
155                 return -EIO;
156         }
157         csum_size = btrfs_super_csum_size(sb);
158
159         btrfs_csum_data(csum_type, (u8 *)sb + BTRFS_CSUM_SIZE,
160                         result, BTRFS_SUPER_INFO_SIZE - BTRFS_CSUM_SIZE);
161
162         if (memcmp(result, sb->csum, csum_size)) {
163                 error("superblock checksum mismatch");
164                 return -EIO;
165         }
166         if (btrfs_super_root_level(sb) >= BTRFS_MAX_LEVEL) {
167                 error("tree_root level too big: %d >= %d",
168                         btrfs_super_root_level(sb), BTRFS_MAX_LEVEL);
169                 goto error_out;
170         }
171         if (btrfs_super_chunk_root_level(sb) >= BTRFS_MAX_LEVEL) {
172                 error("chunk_root level too big: %d >= %d",
173                         btrfs_super_chunk_root_level(sb), BTRFS_MAX_LEVEL);
174                 goto error_out;
175         }
176         if (btrfs_super_log_root_level(sb) >= BTRFS_MAX_LEVEL) {
177                 error("log_root level too big: %d >= %d",
178                         btrfs_super_log_root_level(sb), BTRFS_MAX_LEVEL);
179                 goto error_out;
180         }
181
182         if (!IS_ALIGNED(btrfs_super_root(sb), 4096)) {
183                 error("tree_root block unaligned: %llu", btrfs_super_root(sb));
184                 goto error_out;
185         }
186         if (!IS_ALIGNED(btrfs_super_chunk_root(sb), 4096)) {
187                 error("chunk_root block unaligned: %llu",
188                         btrfs_super_chunk_root(sb));
189                 goto error_out;
190         }
191         if (!IS_ALIGNED(btrfs_super_log_root(sb), 4096)) {
192                 error("log_root block unaligned: %llu",
193                         btrfs_super_log_root(sb));
194                 goto error_out;
195         }
196         if (btrfs_super_nodesize(sb) < 4096) {
197                 error("nodesize too small: %u < 4096",
198                         btrfs_super_nodesize(sb));
199                 goto error_out;
200         }
201         if (!IS_ALIGNED(btrfs_super_nodesize(sb), 4096)) {
202                 error("nodesize unaligned: %u", btrfs_super_nodesize(sb));
203                 goto error_out;
204         }
205         if (btrfs_super_sectorsize(sb) < 4096) {
206                 error("sectorsize too small: %u < 4096",
207                         btrfs_super_sectorsize(sb));
208                 goto error_out;
209         }
210         if (!IS_ALIGNED(btrfs_super_sectorsize(sb), 4096)) {
211                 error("sectorsize unaligned: %u", btrfs_super_sectorsize(sb));
212                 goto error_out;
213         }
214         if (btrfs_super_total_bytes(sb) == 0) {
215                 error("invalid total_bytes 0");
216                 goto error_out;
217         }
218         if (btrfs_super_bytes_used(sb) < 6 * btrfs_super_nodesize(sb)) {
219                 error("invalid bytes_used %llu", btrfs_super_bytes_used(sb));
220                 goto error_out;
221         }
222         if ((btrfs_super_stripesize(sb) != 4096)
223                 && (btrfs_super_stripesize(sb) != btrfs_super_sectorsize(sb))) {
224                 error("invalid stripesize %u", btrfs_super_stripesize(sb));
225                 goto error_out;
226         }
227
228         if (btrfs_super_incompat_flags(sb) & BTRFS_FEATURE_INCOMPAT_METADATA_UUID)
229                 metadata_uuid = sb->metadata_uuid;
230         else
231                 metadata_uuid = sb->fsid;
232
233         if (memcmp(metadata_uuid, sb->dev_item.fsid, BTRFS_FSID_SIZE) != 0) {
234                 char fsid[BTRFS_UUID_UNPARSED_SIZE];
235                 char dev_fsid[BTRFS_UUID_UNPARSED_SIZE];
236
237                 uuid_unparse(sb->metadata_uuid, fsid);
238                 uuid_unparse(sb->dev_item.fsid, dev_fsid);
239                 error("dev_item UUID does not match fsid: %s != %s",
240                         dev_fsid, fsid);
241                 goto error_out;
242         }
243
244         /*
245          * Hint to catch really bogus numbers, bitflips or so
246          */
247         if (btrfs_super_num_devices(sb) > (1UL << 31)) {
248                 error("suspicious number of devices: %llu",
249                         btrfs_super_num_devices(sb));
250         }
251
252         if (btrfs_super_num_devices(sb) == 0) {
253                 error("number of devices is 0");
254                 goto error_out;
255         }
256
257         /*
258          * Obvious sys_chunk_array corruptions, it must hold at least one key
259          * and one chunk
260          */
261         if (btrfs_super_sys_array_size(sb) > BTRFS_SYSTEM_CHUNK_ARRAY_SIZE) {
262                 error("system chunk array too big %u > %u",
263                       btrfs_super_sys_array_size(sb),
264                       BTRFS_SYSTEM_CHUNK_ARRAY_SIZE);
265                 goto error_out;
266         }
267         if (btrfs_super_sys_array_size(sb) < sizeof(struct btrfs_disk_key)
268                         + sizeof(struct btrfs_chunk)) {
269                 error("system chunk array too small %u < %zu",
270                       btrfs_super_sys_array_size(sb),
271                       sizeof(struct btrfs_disk_key) +
272                       sizeof(struct btrfs_chunk));
273                 goto error_out;
274         }
275
276         return 0;
277
278 error_out:
279         error("superblock checksum matches but it has invalid members");
280         return -EIO;
281 }
282
283 /*
284  * btrfs_read_dev_super - read a valid primary superblock from a block device
285  * @desc,@part: file descriptor of the device
286  * @sb:         buffer where the superblock is going to be read in
287  *
288  * Unlike the btrfs-progs/kernel version, here we ony care about the first
289  * super block, thus it's much simpler.
290  */
291 int btrfs_read_dev_super(struct blk_desc *desc, struct disk_partition *part,
292                          struct btrfs_super_block *sb)
293 {
294         char tmp[BTRFS_SUPER_INFO_SIZE];
295         struct btrfs_super_block *buf = (struct btrfs_super_block *)tmp;
296         int ret;
297
298         ret = __btrfs_devread(desc, part, tmp, BTRFS_SUPER_INFO_SIZE,
299                               BTRFS_SUPER_INFO_OFFSET);
300         if (ret < BTRFS_SUPER_INFO_SIZE)
301                 return -EIO;
302
303         if (btrfs_super_bytenr(buf) != BTRFS_SUPER_INFO_OFFSET)
304                 return -EIO;
305
306         if (btrfs_check_super(buf))
307                 return -EIO;
308
309         memcpy(sb, buf, BTRFS_SUPER_INFO_SIZE);
310         return 0;
311 }
312
313 static int __csum_tree_block_size(struct extent_buffer *buf, u16 csum_size,
314                                   int verify, int silent, u16 csum_type)
315 {
316         u8 result[BTRFS_CSUM_SIZE];
317         u32 len;
318
319         len = buf->len - BTRFS_CSUM_SIZE;
320         btrfs_csum_data(csum_type, (u8 *)buf->data + BTRFS_CSUM_SIZE,
321                         result, len);
322
323         if (verify) {
324                 if (memcmp_extent_buffer(buf, result, 0, csum_size)) {
325                         /* FIXME: format */
326                         if (!silent)
327                                 printk("checksum verify failed on %llu found %08X wanted %08X\n",
328                                        (unsigned long long)buf->start,
329                                        result[0],
330                                        buf->data[0]);
331                         return 1;
332                 }
333         } else {
334                 write_extent_buffer(buf, result, 0, csum_size);
335         }
336         return 0;
337 }
338
339 int csum_tree_block_size(struct extent_buffer *buf, u16 csum_size, int verify,
340                          u16 csum_type)
341 {
342         return __csum_tree_block_size(buf, csum_size, verify, 0, csum_type);
343 }
344
345 static int csum_tree_block(struct btrfs_fs_info *fs_info,
346                            struct extent_buffer *buf, int verify)
347 {
348         u16 csum_size = btrfs_super_csum_size(fs_info->super_copy);
349         u16 csum_type = btrfs_super_csum_type(fs_info->super_copy);
350
351         return csum_tree_block_size(buf, csum_size, verify, csum_type);
352 }
353
354 struct extent_buffer *btrfs_find_tree_block(struct btrfs_fs_info *fs_info,
355                                             u64 bytenr, u32 blocksize)
356 {
357         return find_extent_buffer(&fs_info->extent_cache,
358                                   bytenr, blocksize);
359 }
360
361 struct extent_buffer* btrfs_find_create_tree_block(
362                 struct btrfs_fs_info *fs_info, u64 bytenr)
363 {
364         return alloc_extent_buffer(fs_info, bytenr, fs_info->nodesize);
365 }
366
367 static int verify_parent_transid(struct extent_io_tree *io_tree,
368                                  struct extent_buffer *eb, u64 parent_transid,
369                                  int ignore)
370 {
371         int ret;
372
373         if (!parent_transid || btrfs_header_generation(eb) == parent_transid)
374                 return 0;
375
376         if (extent_buffer_uptodate(eb) &&
377             btrfs_header_generation(eb) == parent_transid) {
378                 ret = 0;
379                 goto out;
380         }
381         printk("parent transid verify failed on %llu wanted %llu found %llu\n",
382                (unsigned long long)eb->start,
383                (unsigned long long)parent_transid,
384                (unsigned long long)btrfs_header_generation(eb));
385         if (ignore) {
386                 eb->flags |= EXTENT_BAD_TRANSID;
387                 printk("Ignoring transid failure\n");
388                 return 0;
389         }
390
391         ret = 1;
392 out:
393         clear_extent_buffer_uptodate(eb);
394         return ret;
395
396 }
397
398 int read_whole_eb(struct btrfs_fs_info *info, struct extent_buffer *eb, int mirror)
399 {
400         unsigned long offset = 0;
401         struct btrfs_multi_bio *multi = NULL;
402         struct btrfs_device *device;
403         int ret = 0;
404         u64 read_len;
405         unsigned long bytes_left = eb->len;
406
407         while (bytes_left) {
408                 read_len = bytes_left;
409                 device = NULL;
410
411                 ret = btrfs_map_block(info, READ, eb->start + offset,
412                                       &read_len, &multi, mirror, NULL);
413                 if (ret) {
414                         printk("Couldn't map the block %Lu\n", eb->start + offset);
415                         kfree(multi);
416                         return -EIO;
417                 }
418                 device = multi->stripes[0].dev;
419
420                 if (!device->desc || !device->part) {
421                         kfree(multi);
422                         return -EIO;
423                 }
424
425                 if (read_len > bytes_left)
426                         read_len = bytes_left;
427
428                 ret = read_extent_from_disk(device->desc, device->part,
429                                             multi->stripes[0].physical, eb,
430                                             offset, read_len);
431                 kfree(multi);
432                 multi = NULL;
433
434                 if (ret)
435                         return -EIO;
436                 offset += read_len;
437                 bytes_left -= read_len;
438         }
439         return 0;
440 }
441
442 struct extent_buffer* read_tree_block(struct btrfs_fs_info *fs_info, u64 bytenr,
443                 u64 parent_transid)
444 {
445         int ret;
446         struct extent_buffer *eb;
447         u64 best_transid = 0;
448         u32 sectorsize = fs_info->sectorsize;
449         int mirror_num = 1;
450         int good_mirror = 0;
451         int candidate_mirror = 0;
452         int num_copies;
453         int ignore = 0;
454
455         /*
456          * Don't even try to create tree block for unaligned tree block
457          * bytenr.
458          * Such unaligned tree block will free overlapping extent buffer,
459          * causing use-after-free bugs for fuzzed images.
460          */
461         if (bytenr < sectorsize || !IS_ALIGNED(bytenr, sectorsize)) {
462                 error("tree block bytenr %llu is not aligned to sectorsize %u",
463                       bytenr, sectorsize);
464                 return ERR_PTR(-EIO);
465         }
466
467         eb = btrfs_find_create_tree_block(fs_info, bytenr);
468         if (!eb)
469                 return ERR_PTR(-ENOMEM);
470
471         if (btrfs_buffer_uptodate(eb, parent_transid))
472                 return eb;
473
474         num_copies = btrfs_num_copies(fs_info, eb->start, eb->len);
475         while (1) {
476                 ret = read_whole_eb(fs_info, eb, mirror_num);
477                 if (ret == 0 && csum_tree_block(fs_info, eb, 1) == 0 &&
478                     check_tree_block(fs_info, eb) == 0 &&
479                     verify_parent_transid(&fs_info->extent_cache, eb,
480                                           parent_transid, ignore) == 0) {
481                         /*
482                          * check_tree_block() is less strict to allow btrfs
483                          * check to get raw eb with bad key order and fix it.
484                          * But we still need to try to get a good copy if
485                          * possible, or bad key order can go into tools like
486                          * btrfs ins dump-tree.
487                          */
488                         if (btrfs_header_level(eb))
489                                 ret = btrfs_check_node(fs_info, NULL, eb);
490                         else
491                                 ret = btrfs_check_leaf(fs_info, NULL, eb);
492                         if (!ret || candidate_mirror == mirror_num) {
493                                 btrfs_set_buffer_uptodate(eb);
494                                 return eb;
495                         }
496                         if (candidate_mirror <= 0)
497                                 candidate_mirror = mirror_num;
498                 }
499                 if (ignore) {
500                         if (candidate_mirror > 0) {
501                                 mirror_num = candidate_mirror;
502                                 continue;
503                         }
504                         if (check_tree_block(fs_info, eb))
505                                 print_tree_block_error(fs_info, eb,
506                                                 check_tree_block(fs_info, eb));
507                         else
508                                 fprintf(stderr, "Csum didn't match\n");
509                         ret = -EIO;
510                         break;
511                 }
512                 if (num_copies == 1) {
513                         ignore = 1;
514                         continue;
515                 }
516                 if (btrfs_header_generation(eb) > best_transid) {
517                         best_transid = btrfs_header_generation(eb);
518                         good_mirror = mirror_num;
519                 }
520                 mirror_num++;
521                 if (mirror_num > num_copies) {
522                         if (candidate_mirror > 0)
523                                 mirror_num = candidate_mirror;
524                         else
525                                 mirror_num = good_mirror;
526                         ignore = 1;
527                         continue;
528                 }
529         }
530         /*
531          * We failed to read this tree block, it be should deleted right now
532          * to avoid stale cache populate the cache.
533          */
534         free_extent_buffer(eb);
535         return ERR_PTR(ret);
536 }
537
538 int read_extent_data(struct btrfs_fs_info *fs_info, char *data, u64 logical,
539                      u64 *len, int mirror)
540 {
541         u64 offset = 0;
542         struct btrfs_multi_bio *multi = NULL;
543         struct btrfs_device *device;
544         int ret = 0;
545         u64 max_len = *len;
546
547         ret = btrfs_map_block(fs_info, READ, logical, len, &multi, mirror,
548                               NULL);
549         if (ret) {
550                 fprintf(stderr, "Couldn't map the block %llu\n",
551                                 logical + offset);
552                 goto err;
553         }
554         device = multi->stripes[0].dev;
555
556         if (*len > max_len)
557                 *len = max_len;
558         if (!device->desc || !device->part) {
559                 ret = -EIO;
560                 goto err;
561         }
562
563         ret = __btrfs_devread(device->desc, device->part, data, *len,
564                               multi->stripes[0].physical);
565         if (ret != *len)
566                 ret = -EIO;
567         else
568                 ret = 0;
569 err:
570         kfree(multi);
571         return ret;
572 }
573
574 void btrfs_setup_root(struct btrfs_root *root, struct btrfs_fs_info *fs_info,
575                       u64 objectid)
576 {
577         root->node = NULL;
578         root->track_dirty = 0;
579
580         root->fs_info = fs_info;
581         root->objectid = objectid;
582         root->last_trans = 0;
583         root->last_inode_alloc = 0;
584
585         memset(&root->root_key, 0, sizeof(root->root_key));
586         memset(&root->root_item, 0, sizeof(root->root_item));
587         root->root_key.objectid = objectid;
588 }
589
590 static int find_and_setup_root(struct btrfs_root *tree_root,
591                                struct btrfs_fs_info *fs_info,
592                                u64 objectid, struct btrfs_root *root)
593 {
594         int ret;
595         u64 generation;
596
597         btrfs_setup_root(root, fs_info, objectid);
598         ret = btrfs_find_last_root(tree_root, objectid,
599                                    &root->root_item, &root->root_key);
600         if (ret)
601                 return ret;
602
603         generation = btrfs_root_generation(&root->root_item);
604         root->node = read_tree_block(fs_info,
605                         btrfs_root_bytenr(&root->root_item), generation);
606         if (!extent_buffer_uptodate(root->node))
607                 return -EIO;
608
609         return 0;
610 }
611
612 int btrfs_free_fs_root(struct btrfs_root *root)
613 {
614         if (root->node)
615                 free_extent_buffer(root->node);
616         kfree(root);
617         return 0;
618 }
619
620 static void __free_fs_root(struct rb_node *node)
621 {
622         struct btrfs_root *root;
623
624         root = container_of(node, struct btrfs_root, rb_node);
625         btrfs_free_fs_root(root);
626 }
627
628 FREE_RB_BASED_TREE(fs_roots, __free_fs_root);
629
630 struct btrfs_root *btrfs_read_fs_root_no_cache(struct btrfs_fs_info *fs_info,
631                                                struct btrfs_key *location)
632 {
633         struct btrfs_root *root;
634         struct btrfs_root *tree_root = fs_info->tree_root;
635         struct btrfs_path *path;
636         struct extent_buffer *l;
637         u64 generation;
638         int ret = 0;
639
640         root = calloc(1, sizeof(*root));
641         if (!root)
642                 return ERR_PTR(-ENOMEM);
643         if (location->offset == (u64)-1) {
644                 ret = find_and_setup_root(tree_root, fs_info,
645                                           location->objectid, root);
646                 if (ret) {
647                         free(root);
648                         return ERR_PTR(ret);
649                 }
650                 goto insert;
651         }
652
653         btrfs_setup_root(root, fs_info,
654                          location->objectid);
655
656         path = btrfs_alloc_path();
657         if (!path) {
658                 free(root);
659                 return ERR_PTR(-ENOMEM);
660         }
661
662         ret = btrfs_search_slot(NULL, tree_root, location, path, 0, 0);
663         if (ret != 0) {
664                 if (ret > 0)
665                         ret = -ENOENT;
666                 goto out;
667         }
668         l = path->nodes[0];
669         read_extent_buffer(l, &root->root_item,
670                btrfs_item_ptr_offset(l, path->slots[0]),
671                sizeof(root->root_item));
672         memcpy(&root->root_key, location, sizeof(*location));
673
674         /* If this root is already an orphan, no need to read */
675         if (btrfs_root_refs(&root->root_item) == 0) {
676                 ret = -ENOENT;
677                 goto out;
678         }
679         ret = 0;
680 out:
681         btrfs_free_path(path);
682         if (ret) {
683                 free(root);
684                 return ERR_PTR(ret);
685         }
686         generation = btrfs_root_generation(&root->root_item);
687         root->node = read_tree_block(fs_info,
688                         btrfs_root_bytenr(&root->root_item), generation);
689         if (!extent_buffer_uptodate(root->node)) {
690                 free(root);
691                 return ERR_PTR(-EIO);
692         }
693 insert:
694         root->ref_cows = 1;
695         return root;
696 }
697
698 static int btrfs_fs_roots_compare_objectids(struct rb_node *node,
699                                             void *data)
700 {
701         u64 objectid = *((u64 *)data);
702         struct btrfs_root *root;
703
704         root = rb_entry(node, struct btrfs_root, rb_node);
705         if (objectid > root->objectid)
706                 return 1;
707         else if (objectid < root->objectid)
708                 return -1;
709         else
710                 return 0;
711 }
712
713 int btrfs_fs_roots_compare_roots(struct rb_node *node1, struct rb_node *node2)
714 {
715         struct btrfs_root *root;
716
717         root = rb_entry(node2, struct btrfs_root, rb_node);
718         return btrfs_fs_roots_compare_objectids(node1, (void *)&root->objectid);
719 }
720
721 struct btrfs_root *btrfs_read_fs_root(struct btrfs_fs_info *fs_info,
722                                       struct btrfs_key *location)
723 {
724         struct btrfs_root *root;
725         struct rb_node *node;
726         int ret;
727         u64 objectid = location->objectid;
728
729         if (location->objectid == BTRFS_ROOT_TREE_OBJECTID)
730                 return fs_info->tree_root;
731         if (location->objectid == BTRFS_CHUNK_TREE_OBJECTID)
732                 return fs_info->chunk_root;
733         if (location->objectid == BTRFS_CSUM_TREE_OBJECTID)
734                 return fs_info->csum_root;
735         BUG_ON(location->objectid == BTRFS_TREE_RELOC_OBJECTID ||
736                location->offset != (u64)-1);
737
738         node = rb_search(&fs_info->fs_root_tree, (void *)&objectid,
739                          btrfs_fs_roots_compare_objectids, NULL);
740         if (node)
741                 return container_of(node, struct btrfs_root, rb_node);
742
743         root = btrfs_read_fs_root_no_cache(fs_info, location);
744         if (IS_ERR(root))
745                 return root;
746
747         ret = rb_insert(&fs_info->fs_root_tree, &root->rb_node,
748                         btrfs_fs_roots_compare_roots);
749         BUG_ON(ret);
750         return root;
751 }
752
753 void btrfs_free_fs_info(struct btrfs_fs_info *fs_info)
754 {
755         free(fs_info->tree_root);
756         free(fs_info->chunk_root);
757         free(fs_info->csum_root);
758         free(fs_info->super_copy);
759         free(fs_info);
760 }
761
762 struct btrfs_fs_info *btrfs_new_fs_info(void)
763 {
764         struct btrfs_fs_info *fs_info;
765
766         fs_info = calloc(1, sizeof(struct btrfs_fs_info));
767         if (!fs_info)
768                 return NULL;
769
770         fs_info->tree_root = calloc(1, sizeof(struct btrfs_root));
771         fs_info->chunk_root = calloc(1, sizeof(struct btrfs_root));
772         fs_info->csum_root = calloc(1, sizeof(struct btrfs_root));
773         fs_info->super_copy = calloc(1, BTRFS_SUPER_INFO_SIZE);
774
775         if (!fs_info->tree_root || !fs_info->chunk_root ||
776             !fs_info->csum_root || !fs_info->super_copy)
777                 goto free_all;
778
779         extent_io_tree_init(&fs_info->extent_cache);
780
781         fs_info->fs_root_tree = RB_ROOT;
782         cache_tree_init(&fs_info->mapping_tree.cache_tree);
783
784         mutex_init(&fs_info->fs_mutex);
785
786         return fs_info;
787 free_all:
788         btrfs_free_fs_info(fs_info);
789         return NULL;
790 }
791
792 static int setup_root_or_create_block(struct btrfs_fs_info *fs_info,
793                                       struct btrfs_root *info_root,
794                                       u64 objectid, char *str)
795 {
796         struct btrfs_root *root = fs_info->tree_root;
797         int ret;
798
799         ret = find_and_setup_root(root, fs_info, objectid, info_root);
800         if (ret) {
801                 error("could not setup %s tree", str);
802                 return -EIO;
803         }
804
805         return 0;
806 }
807
808 int btrfs_setup_all_roots(struct btrfs_fs_info *fs_info)
809 {
810         struct btrfs_super_block *sb = fs_info->super_copy;
811         struct btrfs_root *root;
812         struct btrfs_key key;
813         u64 root_tree_bytenr;
814         u64 generation;
815         int ret;
816
817         root = fs_info->tree_root;
818         btrfs_setup_root(root, fs_info, BTRFS_ROOT_TREE_OBJECTID);
819         generation = btrfs_super_generation(sb);
820
821         root_tree_bytenr = btrfs_super_root(sb);
822
823         root->node = read_tree_block(fs_info, root_tree_bytenr, generation);
824         if (!extent_buffer_uptodate(root->node)) {
825                 fprintf(stderr, "Couldn't read tree root\n");
826                 return -EIO;
827         }
828
829         ret = setup_root_or_create_block(fs_info, fs_info->csum_root,
830                                          BTRFS_CSUM_TREE_OBJECTID, "csum");
831         if (ret)
832                 return ret;
833         fs_info->csum_root->track_dirty = 1;
834
835         fs_info->last_trans_committed = generation;
836
837         key.objectid = BTRFS_FS_TREE_OBJECTID;
838         key.type = BTRFS_ROOT_ITEM_KEY;
839         key.offset = (u64)-1;
840         fs_info->fs_root = btrfs_read_fs_root(fs_info, &key);
841
842         if (IS_ERR(fs_info->fs_root))
843                 return -EIO;
844         return 0;
845 }
846
847 void btrfs_release_all_roots(struct btrfs_fs_info *fs_info)
848 {
849         if (fs_info->csum_root)
850                 free_extent_buffer(fs_info->csum_root->node);
851         if (fs_info->tree_root)
852                 free_extent_buffer(fs_info->tree_root->node);
853         if (fs_info->chunk_root)
854                 free_extent_buffer(fs_info->chunk_root->node);
855 }
856
857 static void free_map_lookup(struct cache_extent *ce)
858 {
859         struct map_lookup *map;
860
861         map = container_of(ce, struct map_lookup, ce);
862         kfree(map);
863 }
864
865 FREE_EXTENT_CACHE_BASED_TREE(mapping_cache, free_map_lookup);
866
867 void btrfs_cleanup_all_caches(struct btrfs_fs_info *fs_info)
868 {
869         free_mapping_cache_tree(&fs_info->mapping_tree.cache_tree);
870         extent_io_tree_cleanup(&fs_info->extent_cache);
871 }
872
873 static int btrfs_scan_fs_devices(struct blk_desc *desc,
874                                  struct disk_partition *part,
875                                  struct btrfs_fs_devices **fs_devices)
876 {
877         u64 total_devs;
878         int ret;
879
880         if (round_up(BTRFS_SUPER_INFO_SIZE + BTRFS_SUPER_INFO_OFFSET,
881                      desc->blksz) > (part->size << desc->log2blksz)) {
882                 error("superblock end %u is larger than device size " LBAFU,
883                                 BTRFS_SUPER_INFO_SIZE + BTRFS_SUPER_INFO_OFFSET,
884                                 part->size << desc->log2blksz);
885                 return -EINVAL;
886         }
887
888         ret = btrfs_scan_one_device(desc, part, fs_devices, &total_devs);
889         if (ret) {
890                 fprintf(stderr, "No valid Btrfs found\n");
891                 return ret;
892         }
893         return 0;
894 }
895
896 int btrfs_check_fs_compatibility(struct btrfs_super_block *sb)
897 {
898         u64 features;
899
900         features = btrfs_super_incompat_flags(sb) &
901                    ~BTRFS_FEATURE_INCOMPAT_SUPP;
902         if (features) {
903                 printk("couldn't open because of unsupported "
904                        "option features (%llx).\n",
905                        (unsigned long long)features);
906                 return -ENOTSUPP;
907         }
908
909         features = btrfs_super_incompat_flags(sb);
910         if (!(features & BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF)) {
911                 features |= BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF;
912                 btrfs_set_super_incompat_flags(sb, features);
913         }
914
915         return 0;
916 }
917
918 static int btrfs_setup_chunk_tree_and_device_map(struct btrfs_fs_info *fs_info)
919 {
920         struct btrfs_super_block *sb = fs_info->super_copy;
921         u64 chunk_root_bytenr;
922         u64 generation;
923         int ret;
924
925         btrfs_setup_root(fs_info->chunk_root, fs_info,
926                         BTRFS_CHUNK_TREE_OBJECTID);
927
928         ret = btrfs_read_sys_array(fs_info);
929         if (ret)
930                 return ret;
931
932         generation = btrfs_super_chunk_root_generation(sb);
933         chunk_root_bytenr = btrfs_super_chunk_root(sb);
934
935         fs_info->chunk_root->node = read_tree_block(fs_info,
936                                                     chunk_root_bytenr,
937                                                     generation);
938         if (!extent_buffer_uptodate(fs_info->chunk_root->node)) {
939                 error("cannot read chunk root");
940                 return -EIO;
941         }
942
943         ret = btrfs_read_chunk_tree(fs_info);
944         if (ret) {
945                 fprintf(stderr, "Couldn't read chunk tree\n");
946                 return ret;
947         }
948         return 0;
949 }
950
951 struct btrfs_fs_info *open_ctree_fs_info(struct blk_desc *desc,
952                                          struct disk_partition *part)
953 {
954         struct btrfs_fs_info *fs_info;
955         struct btrfs_super_block *disk_super;
956         struct btrfs_fs_devices *fs_devices = NULL;
957         struct extent_buffer *eb;
958         int ret;
959
960         fs_info = btrfs_new_fs_info();
961         if (!fs_info) {
962                 fprintf(stderr, "Failed to allocate memory for fs_info\n");
963                 return NULL;
964         }
965
966         ret = btrfs_scan_fs_devices(desc, part, &fs_devices);
967         if (ret)
968                 goto out;
969
970         fs_info->fs_devices = fs_devices;
971
972         ret = btrfs_open_devices(fs_devices);
973         if (ret)
974                 goto out;
975
976         disk_super = fs_info->super_copy;
977         ret = btrfs_read_dev_super(desc, part, disk_super);
978         if (ret) {
979                 printk("No valid btrfs found\n");
980                 goto out_devices;
981         }
982
983         if (btrfs_super_flags(disk_super) & BTRFS_SUPER_FLAG_CHANGING_FSID) {
984                 fprintf(stderr, "ERROR: Filesystem UUID change in progress\n");
985                 goto out_devices;
986         }
987
988         ASSERT(!memcmp(disk_super->fsid, fs_devices->fsid, BTRFS_FSID_SIZE));
989         if (btrfs_fs_incompat(fs_info, METADATA_UUID))
990                 ASSERT(!memcmp(disk_super->metadata_uuid,
991                                fs_devices->metadata_uuid, BTRFS_FSID_SIZE));
992
993         fs_info->sectorsize = btrfs_super_sectorsize(disk_super);
994         fs_info->nodesize = btrfs_super_nodesize(disk_super);
995         fs_info->stripesize = btrfs_super_stripesize(disk_super);
996
997         ret = btrfs_check_fs_compatibility(fs_info->super_copy);
998         if (ret)
999                 goto out_devices;
1000
1001         ret = btrfs_setup_chunk_tree_and_device_map(fs_info);
1002         if (ret)
1003                 goto out_chunk;
1004
1005         /* Chunk tree root is unable to read, return directly */
1006         if (!fs_info->chunk_root)
1007                 return fs_info;
1008
1009         eb = fs_info->chunk_root->node;
1010         read_extent_buffer(eb, fs_info->chunk_tree_uuid,
1011                            btrfs_header_chunk_tree_uuid(eb),
1012                            BTRFS_UUID_SIZE);
1013
1014         ret = btrfs_setup_all_roots(fs_info);
1015         if (ret)
1016                 goto out_chunk;
1017
1018         return fs_info;
1019
1020 out_chunk:
1021         btrfs_release_all_roots(fs_info);
1022         btrfs_cleanup_all_caches(fs_info);
1023 out_devices:
1024         btrfs_close_devices(fs_devices);
1025 out:
1026         btrfs_free_fs_info(fs_info);
1027         return NULL;
1028 }
1029
1030 int close_ctree_fs_info(struct btrfs_fs_info *fs_info)
1031 {
1032         int ret;
1033         int err = 0;
1034
1035         free_fs_roots_tree(&fs_info->fs_root_tree);
1036
1037         btrfs_release_all_roots(fs_info);
1038         ret = btrfs_close_devices(fs_info->fs_devices);
1039         btrfs_cleanup_all_caches(fs_info);
1040         btrfs_free_fs_info(fs_info);
1041         if (!err)
1042                 err = ret;
1043         return err;
1044 }
1045
1046 int btrfs_buffer_uptodate(struct extent_buffer *buf, u64 parent_transid)
1047 {
1048         int ret;
1049
1050         ret = extent_buffer_uptodate(buf);
1051         if (!ret)
1052                 return ret;
1053
1054         ret = verify_parent_transid(&buf->fs_info->extent_cache, buf,
1055                                     parent_transid, 1);
1056         return !ret;
1057 }
1058
1059 int btrfs_set_buffer_uptodate(struct extent_buffer *eb)
1060 {
1061         return set_extent_buffer_uptodate(eb);
1062 }