mach-imx: bootaux: fix building with disabled bootelf
[platform/kernel/u-boot.git] / fs / btrfs / disk-io.c
1 // SPDX-License-Identifier: GPL-2.0+
2 #include <common.h>
3 #include <fs_internal.h>
4 #include <log.h>
5 #include <uuid.h>
6 #include <memalign.h>
7 #include "kernel-shared/btrfs_tree.h"
8 #include "common/rbtree-utils.h"
9 #include "disk-io.h"
10 #include "ctree.h"
11 #include "btrfs.h"
12 #include "volumes.h"
13 #include "extent-io.h"
14 #include "crypto/hash.h"
15
16 /* specified errno for check_tree_block */
17 #define BTRFS_BAD_BYTENR                (-1)
18 #define BTRFS_BAD_FSID                  (-2)
19 #define BTRFS_BAD_LEVEL                 (-3)
20 #define BTRFS_BAD_NRITEMS               (-4)
21
22 /* Calculate max possible nritems for a leaf/node */
23 static u32 max_nritems(u8 level, u32 nodesize)
24 {
25
26         if (level == 0)
27                 return ((nodesize - sizeof(struct btrfs_header)) /
28                         sizeof(struct btrfs_item));
29         return ((nodesize - sizeof(struct btrfs_header)) /
30                 sizeof(struct btrfs_key_ptr));
31 }
32
33 static int check_tree_block(struct btrfs_fs_info *fs_info,
34                             struct extent_buffer *buf)
35 {
36
37         struct btrfs_fs_devices *fs_devices = fs_info->fs_devices;
38         u32 nodesize = fs_info->nodesize;
39         bool fsid_match = false;
40         int ret = BTRFS_BAD_FSID;
41
42         if (buf->start != btrfs_header_bytenr(buf))
43                 return BTRFS_BAD_BYTENR;
44         if (btrfs_header_level(buf) >= BTRFS_MAX_LEVEL)
45                 return BTRFS_BAD_LEVEL;
46         if (btrfs_header_nritems(buf) > max_nritems(btrfs_header_level(buf),
47                                                     nodesize))
48                 return BTRFS_BAD_NRITEMS;
49
50         /* Only leaf can be empty */
51         if (btrfs_header_nritems(buf) == 0 &&
52             btrfs_header_level(buf) != 0)
53                 return BTRFS_BAD_NRITEMS;
54
55         while (fs_devices) {
56                 /*
57                  * Checking the incompat flag is only valid for the current
58                  * fs. For seed devices it's forbidden to have their uuid
59                  * changed so reading ->fsid in this case is fine
60                  */
61                 if (fs_devices == fs_info->fs_devices &&
62                     btrfs_fs_incompat(fs_info, METADATA_UUID))
63                         fsid_match = !memcmp_extent_buffer(buf,
64                                                    fs_devices->metadata_uuid,
65                                                    btrfs_header_fsid(),
66                                                    BTRFS_FSID_SIZE);
67                 else
68                         fsid_match = !memcmp_extent_buffer(buf,
69                                                     fs_devices->fsid,
70                                                     btrfs_header_fsid(),
71                                                     BTRFS_FSID_SIZE);
72
73
74                 if (fsid_match) {
75                         ret = 0;
76                         break;
77                 }
78                 fs_devices = fs_devices->seed;
79         }
80         return ret;
81 }
82
83 static void print_tree_block_error(struct btrfs_fs_info *fs_info,
84                                 struct extent_buffer *eb,
85                                 int err)
86 {
87         char fs_uuid[BTRFS_UUID_UNPARSED_SIZE] = {'\0'};
88         char found_uuid[BTRFS_UUID_UNPARSED_SIZE] = {'\0'};
89         u8 buf[BTRFS_UUID_SIZE];
90
91         if (!err)
92                 return;
93
94         fprintf(stderr, "bad tree block %llu, ", eb->start);
95         switch (err) {
96         case BTRFS_BAD_FSID:
97                 read_extent_buffer(eb, buf, btrfs_header_fsid(),
98                                    BTRFS_UUID_SIZE);
99                 uuid_unparse(buf, found_uuid);
100                 uuid_unparse(fs_info->fs_devices->metadata_uuid, fs_uuid);
101                 fprintf(stderr, "fsid mismatch, want=%s, have=%s\n",
102                         fs_uuid, found_uuid);
103                 break;
104         case BTRFS_BAD_BYTENR:
105                 fprintf(stderr, "bytenr mismatch, want=%llu, have=%llu\n",
106                         eb->start, btrfs_header_bytenr(eb));
107                 break;
108         case BTRFS_BAD_LEVEL:
109                 fprintf(stderr, "bad level, %u > %d\n",
110                         btrfs_header_level(eb), BTRFS_MAX_LEVEL);
111                 break;
112         case BTRFS_BAD_NRITEMS:
113                 fprintf(stderr, "invalid nr_items: %u\n",
114                         btrfs_header_nritems(eb));
115                 break;
116         }
117 }
118
119 int btrfs_csum_data(u16 csum_type, const u8 *data, u8 *out, size_t len)
120 {
121         memset(out, 0, BTRFS_CSUM_SIZE);
122
123         switch (csum_type) {
124         case BTRFS_CSUM_TYPE_CRC32:
125                 return hash_crc32c(data, len, out);
126         case BTRFS_CSUM_TYPE_XXHASH:
127                 return hash_xxhash(data, len, out);
128         case BTRFS_CSUM_TYPE_SHA256:
129                 return hash_sha256(data, len, out);
130         case BTRFS_CSUM_TYPE_BLAKE2:
131                 return hash_blake2(data, len, out);
132         default:
133                 printf("Unknown csum type %d\n", csum_type);
134                 return -EINVAL;
135         }
136 }
137
138 /*
139  * Check if the super is valid:
140  * - nodesize/sectorsize - minimum, maximum, alignment
141  * - tree block starts   - alignment
142  * - number of devices   - something sane
143  * - sys array size      - maximum
144  */
145 static int btrfs_check_super(struct btrfs_super_block *sb)
146 {
147         u8 result[BTRFS_CSUM_SIZE];
148         u16 csum_type;
149         int csum_size;
150         u8 *metadata_uuid;
151
152         if (btrfs_super_magic(sb) != BTRFS_MAGIC)
153                 return -EIO;
154
155         csum_type = btrfs_super_csum_type(sb);
156         if (csum_type >= btrfs_super_num_csums()) {
157                 error("unsupported checksum algorithm %u", csum_type);
158                 return -EIO;
159         }
160         csum_size = btrfs_super_csum_size(sb);
161
162         btrfs_csum_data(csum_type, (u8 *)sb + BTRFS_CSUM_SIZE,
163                         result, BTRFS_SUPER_INFO_SIZE - BTRFS_CSUM_SIZE);
164
165         if (memcmp(result, sb->csum, csum_size)) {
166                 error("superblock checksum mismatch");
167                 return -EIO;
168         }
169         if (btrfs_super_root_level(sb) >= BTRFS_MAX_LEVEL) {
170                 error("tree_root level too big: %d >= %d",
171                         btrfs_super_root_level(sb), BTRFS_MAX_LEVEL);
172                 goto error_out;
173         }
174         if (btrfs_super_chunk_root_level(sb) >= BTRFS_MAX_LEVEL) {
175                 error("chunk_root level too big: %d >= %d",
176                         btrfs_super_chunk_root_level(sb), BTRFS_MAX_LEVEL);
177                 goto error_out;
178         }
179         if (btrfs_super_log_root_level(sb) >= BTRFS_MAX_LEVEL) {
180                 error("log_root level too big: %d >= %d",
181                         btrfs_super_log_root_level(sb), BTRFS_MAX_LEVEL);
182                 goto error_out;
183         }
184
185         if (!IS_ALIGNED(btrfs_super_root(sb), 4096)) {
186                 error("tree_root block unaligned: %llu", btrfs_super_root(sb));
187                 goto error_out;
188         }
189         if (!IS_ALIGNED(btrfs_super_chunk_root(sb), 4096)) {
190                 error("chunk_root block unaligned: %llu",
191                         btrfs_super_chunk_root(sb));
192                 goto error_out;
193         }
194         if (!IS_ALIGNED(btrfs_super_log_root(sb), 4096)) {
195                 error("log_root block unaligned: %llu",
196                         btrfs_super_log_root(sb));
197                 goto error_out;
198         }
199         if (btrfs_super_nodesize(sb) < 4096) {
200                 error("nodesize too small: %u < 4096",
201                         btrfs_super_nodesize(sb));
202                 goto error_out;
203         }
204         if (!IS_ALIGNED(btrfs_super_nodesize(sb), 4096)) {
205                 error("nodesize unaligned: %u", btrfs_super_nodesize(sb));
206                 goto error_out;
207         }
208         if (btrfs_super_sectorsize(sb) < 4096) {
209                 error("sectorsize too small: %u < 4096",
210                         btrfs_super_sectorsize(sb));
211                 goto error_out;
212         }
213         if (!IS_ALIGNED(btrfs_super_sectorsize(sb), 4096)) {
214                 error("sectorsize unaligned: %u", btrfs_super_sectorsize(sb));
215                 goto error_out;
216         }
217         if (btrfs_super_total_bytes(sb) == 0) {
218                 error("invalid total_bytes 0");
219                 goto error_out;
220         }
221         if (btrfs_super_bytes_used(sb) < 6 * btrfs_super_nodesize(sb)) {
222                 error("invalid bytes_used %llu", btrfs_super_bytes_used(sb));
223                 goto error_out;
224         }
225         if ((btrfs_super_stripesize(sb) != 4096)
226                 && (btrfs_super_stripesize(sb) != btrfs_super_sectorsize(sb))) {
227                 error("invalid stripesize %u", btrfs_super_stripesize(sb));
228                 goto error_out;
229         }
230
231         if (btrfs_super_incompat_flags(sb) & BTRFS_FEATURE_INCOMPAT_METADATA_UUID)
232                 metadata_uuid = sb->metadata_uuid;
233         else
234                 metadata_uuid = sb->fsid;
235
236         if (memcmp(metadata_uuid, sb->dev_item.fsid, BTRFS_FSID_SIZE) != 0) {
237                 char fsid[BTRFS_UUID_UNPARSED_SIZE];
238                 char dev_fsid[BTRFS_UUID_UNPARSED_SIZE];
239
240                 uuid_unparse(sb->metadata_uuid, fsid);
241                 uuid_unparse(sb->dev_item.fsid, dev_fsid);
242                 error("dev_item UUID does not match fsid: %s != %s",
243                         dev_fsid, fsid);
244                 goto error_out;
245         }
246
247         /*
248          * Hint to catch really bogus numbers, bitflips or so
249          */
250         if (btrfs_super_num_devices(sb) > (1UL << 31)) {
251                 error("suspicious number of devices: %llu",
252                         btrfs_super_num_devices(sb));
253         }
254
255         if (btrfs_super_num_devices(sb) == 0) {
256                 error("number of devices is 0");
257                 goto error_out;
258         }
259
260         /*
261          * Obvious sys_chunk_array corruptions, it must hold at least one key
262          * and one chunk
263          */
264         if (btrfs_super_sys_array_size(sb) > BTRFS_SYSTEM_CHUNK_ARRAY_SIZE) {
265                 error("system chunk array too big %u > %u",
266                       btrfs_super_sys_array_size(sb),
267                       BTRFS_SYSTEM_CHUNK_ARRAY_SIZE);
268                 goto error_out;
269         }
270         if (btrfs_super_sys_array_size(sb) < sizeof(struct btrfs_disk_key)
271                         + sizeof(struct btrfs_chunk)) {
272                 error("system chunk array too small %u < %zu",
273                       btrfs_super_sys_array_size(sb),
274                       sizeof(struct btrfs_disk_key) +
275                       sizeof(struct btrfs_chunk));
276                 goto error_out;
277         }
278
279         return 0;
280
281 error_out:
282         error("superblock checksum matches but it has invalid members");
283         return -EIO;
284 }
285
286 /*
287  * btrfs_read_dev_super - read a valid primary superblock from a block device
288  * @desc,@part: file descriptor of the device
289  * @sb:         buffer where the superblock is going to be read in
290  *
291  * Unlike the btrfs-progs/kernel version, here we ony care about the first
292  * super block, thus it's much simpler.
293  */
294 int btrfs_read_dev_super(struct blk_desc *desc, struct disk_partition *part,
295                          struct btrfs_super_block *sb)
296 {
297         ALLOC_CACHE_ALIGN_BUFFER(char, tmp, BTRFS_SUPER_INFO_SIZE);
298         struct btrfs_super_block *buf = (struct btrfs_super_block *)tmp;
299         int ret;
300
301         ret = __btrfs_devread(desc, part, tmp, BTRFS_SUPER_INFO_SIZE,
302                               BTRFS_SUPER_INFO_OFFSET);
303         if (ret < BTRFS_SUPER_INFO_SIZE)
304                 return -EIO;
305
306         if (btrfs_super_bytenr(buf) != BTRFS_SUPER_INFO_OFFSET)
307                 return -EIO;
308
309         if (btrfs_check_super(buf))
310                 return -EIO;
311
312         memcpy(sb, buf, BTRFS_SUPER_INFO_SIZE);
313         return 0;
314 }
315
316 static int __csum_tree_block_size(struct extent_buffer *buf, u16 csum_size,
317                                   int verify, int silent, u16 csum_type)
318 {
319         u8 result[BTRFS_CSUM_SIZE];
320         u32 len;
321
322         len = buf->len - BTRFS_CSUM_SIZE;
323         btrfs_csum_data(csum_type, (u8 *)buf->data + BTRFS_CSUM_SIZE,
324                         result, len);
325
326         if (verify) {
327                 if (memcmp_extent_buffer(buf, result, 0, csum_size)) {
328                         /* FIXME: format */
329                         if (!silent)
330                                 printk("checksum verify failed on %llu found %08X wanted %08X\n",
331                                        (unsigned long long)buf->start,
332                                        result[0],
333                                        buf->data[0]);
334                         return 1;
335                 }
336         } else {
337                 write_extent_buffer(buf, result, 0, csum_size);
338         }
339         return 0;
340 }
341
342 int csum_tree_block_size(struct extent_buffer *buf, u16 csum_size, int verify,
343                          u16 csum_type)
344 {
345         return __csum_tree_block_size(buf, csum_size, verify, 0, csum_type);
346 }
347
348 static int csum_tree_block(struct btrfs_fs_info *fs_info,
349                            struct extent_buffer *buf, int verify)
350 {
351         u16 csum_size = btrfs_super_csum_size(fs_info->super_copy);
352         u16 csum_type = btrfs_super_csum_type(fs_info->super_copy);
353
354         return csum_tree_block_size(buf, csum_size, verify, csum_type);
355 }
356
357 struct extent_buffer *btrfs_find_tree_block(struct btrfs_fs_info *fs_info,
358                                             u64 bytenr, u32 blocksize)
359 {
360         return find_extent_buffer(&fs_info->extent_cache,
361                                   bytenr, blocksize);
362 }
363
364 struct extent_buffer* btrfs_find_create_tree_block(
365                 struct btrfs_fs_info *fs_info, u64 bytenr)
366 {
367         return alloc_extent_buffer(fs_info, bytenr, fs_info->nodesize);
368 }
369
370 static int verify_parent_transid(struct extent_io_tree *io_tree,
371                                  struct extent_buffer *eb, u64 parent_transid,
372                                  int ignore)
373 {
374         int ret;
375
376         if (!parent_transid || btrfs_header_generation(eb) == parent_transid)
377                 return 0;
378
379         if (extent_buffer_uptodate(eb) &&
380             btrfs_header_generation(eb) == parent_transid) {
381                 ret = 0;
382                 goto out;
383         }
384         printk("parent transid verify failed on %llu wanted %llu found %llu\n",
385                (unsigned long long)eb->start,
386                (unsigned long long)parent_transid,
387                (unsigned long long)btrfs_header_generation(eb));
388         if (ignore) {
389                 eb->flags |= EXTENT_BAD_TRANSID;
390                 printk("Ignoring transid failure\n");
391                 return 0;
392         }
393
394         ret = 1;
395 out:
396         clear_extent_buffer_uptodate(eb);
397         return ret;
398
399 }
400
401 int read_whole_eb(struct btrfs_fs_info *info, struct extent_buffer *eb, int mirror)
402 {
403         unsigned long offset = 0;
404         struct btrfs_multi_bio *multi = NULL;
405         struct btrfs_device *device;
406         int ret = 0;
407         u64 read_len;
408         unsigned long bytes_left = eb->len;
409
410         while (bytes_left) {
411                 read_len = bytes_left;
412                 device = NULL;
413
414                 ret = btrfs_map_block(info, READ, eb->start + offset,
415                                       &read_len, &multi, mirror, NULL);
416                 if (ret) {
417                         printk("Couldn't map the block %Lu\n", eb->start + offset);
418                         kfree(multi);
419                         return -EIO;
420                 }
421                 device = multi->stripes[0].dev;
422
423                 if (!device->desc || !device->part) {
424                         kfree(multi);
425                         return -EIO;
426                 }
427
428                 if (read_len > bytes_left)
429                         read_len = bytes_left;
430
431                 ret = read_extent_from_disk(device->desc, device->part,
432                                             multi->stripes[0].physical, eb,
433                                             offset, read_len);
434                 kfree(multi);
435                 multi = NULL;
436
437                 if (ret)
438                         return -EIO;
439                 offset += read_len;
440                 bytes_left -= read_len;
441         }
442         return 0;
443 }
444
445 struct extent_buffer* read_tree_block(struct btrfs_fs_info *fs_info, u64 bytenr,
446                 u64 parent_transid)
447 {
448         int ret;
449         struct extent_buffer *eb;
450         u64 best_transid = 0;
451         u32 sectorsize = fs_info->sectorsize;
452         int mirror_num = 1;
453         int good_mirror = 0;
454         int candidate_mirror = 0;
455         int num_copies;
456         int ignore = 0;
457
458         /*
459          * Don't even try to create tree block for unaligned tree block
460          * bytenr.
461          * Such unaligned tree block will free overlapping extent buffer,
462          * causing use-after-free bugs for fuzzed images.
463          */
464         if (bytenr < sectorsize || !IS_ALIGNED(bytenr, sectorsize)) {
465                 error("tree block bytenr %llu is not aligned to sectorsize %u",
466                       bytenr, sectorsize);
467                 return ERR_PTR(-EIO);
468         }
469
470         eb = btrfs_find_create_tree_block(fs_info, bytenr);
471         if (!eb)
472                 return ERR_PTR(-ENOMEM);
473
474         if (btrfs_buffer_uptodate(eb, parent_transid))
475                 return eb;
476
477         num_copies = btrfs_num_copies(fs_info, eb->start, eb->len);
478         while (1) {
479                 ret = read_whole_eb(fs_info, eb, mirror_num);
480                 if (ret == 0 && csum_tree_block(fs_info, eb, 1) == 0 &&
481                     check_tree_block(fs_info, eb) == 0 &&
482                     verify_parent_transid(&fs_info->extent_cache, eb,
483                                           parent_transid, ignore) == 0) {
484                         /*
485                          * check_tree_block() is less strict to allow btrfs
486                          * check to get raw eb with bad key order and fix it.
487                          * But we still need to try to get a good copy if
488                          * possible, or bad key order can go into tools like
489                          * btrfs ins dump-tree.
490                          */
491                         if (btrfs_header_level(eb))
492                                 ret = btrfs_check_node(fs_info, NULL, eb);
493                         else
494                                 ret = btrfs_check_leaf(fs_info, NULL, eb);
495                         if (!ret || candidate_mirror == mirror_num) {
496                                 btrfs_set_buffer_uptodate(eb);
497                                 return eb;
498                         }
499                         if (candidate_mirror <= 0)
500                                 candidate_mirror = mirror_num;
501                 }
502                 if (ignore) {
503                         if (candidate_mirror > 0) {
504                                 mirror_num = candidate_mirror;
505                                 continue;
506                         }
507                         if (check_tree_block(fs_info, eb))
508                                 print_tree_block_error(fs_info, eb,
509                                                 check_tree_block(fs_info, eb));
510                         else
511                                 fprintf(stderr, "Csum didn't match\n");
512                         ret = -EIO;
513                         break;
514                 }
515                 if (num_copies == 1) {
516                         ignore = 1;
517                         continue;
518                 }
519                 if (btrfs_header_generation(eb) > best_transid) {
520                         best_transid = btrfs_header_generation(eb);
521                         good_mirror = mirror_num;
522                 }
523                 mirror_num++;
524                 if (mirror_num > num_copies) {
525                         if (candidate_mirror > 0)
526                                 mirror_num = candidate_mirror;
527                         else
528                                 mirror_num = good_mirror;
529                         ignore = 1;
530                         continue;
531                 }
532         }
533         /*
534          * We failed to read this tree block, it be should deleted right now
535          * to avoid stale cache populate the cache.
536          */
537         free_extent_buffer(eb);
538         return ERR_PTR(ret);
539 }
540
541 int read_extent_data(struct btrfs_fs_info *fs_info, char *data, u64 logical,
542                      u64 *len, int mirror)
543 {
544         u64 orig_len = *len;
545         u64 cur = logical;
546         struct btrfs_multi_bio *multi = NULL;
547         struct btrfs_device *device;
548         int ret = 0;
549
550         while (cur < logical + orig_len) {
551                 u64 cur_len = logical + orig_len - cur;
552
553                 ret = btrfs_map_block(fs_info, READ, cur, &cur_len, &multi,
554                                       mirror, NULL);
555                 if (ret) {
556                         error("Couldn't map the block %llu", cur);
557                         goto err;
558                 }
559                 device = multi->stripes[0].dev;
560                 if (!device->desc || !device->part) {
561                         error("devid %llu is missing", device->devid);
562                         ret = -EIO;
563                         goto err;
564                 }
565                 ret = __btrfs_devread(device->desc, device->part,
566                                 data + (cur - logical), cur_len,
567                                 multi->stripes[0].physical);
568                 if (ret != cur_len) {
569                         error("read failed on devid %llu physical %llu",
570                               device->devid, multi->stripes[0].physical);
571                         ret = -EIO;
572                         goto err;
573                 }
574                 cur += cur_len;
575                 ret = 0;
576         }
577 err:
578         kfree(multi);
579         return ret;
580 }
581
582 void btrfs_setup_root(struct btrfs_root *root, struct btrfs_fs_info *fs_info,
583                       u64 objectid)
584 {
585         root->node = NULL;
586         root->track_dirty = 0;
587
588         root->fs_info = fs_info;
589         root->objectid = objectid;
590         root->last_trans = 0;
591         root->last_inode_alloc = 0;
592
593         memset(&root->root_key, 0, sizeof(root->root_key));
594         memset(&root->root_item, 0, sizeof(root->root_item));
595         root->root_key.objectid = objectid;
596 }
597
598 static int find_and_setup_root(struct btrfs_root *tree_root,
599                                struct btrfs_fs_info *fs_info,
600                                u64 objectid, struct btrfs_root *root)
601 {
602         int ret;
603         u64 generation;
604
605         btrfs_setup_root(root, fs_info, objectid);
606         ret = btrfs_find_last_root(tree_root, objectid,
607                                    &root->root_item, &root->root_key);
608         if (ret)
609                 return ret;
610
611         generation = btrfs_root_generation(&root->root_item);
612         root->node = read_tree_block(fs_info,
613                         btrfs_root_bytenr(&root->root_item), generation);
614         if (!extent_buffer_uptodate(root->node))
615                 return -EIO;
616
617         return 0;
618 }
619
620 int btrfs_free_fs_root(struct btrfs_root *root)
621 {
622         if (root->node)
623                 free_extent_buffer(root->node);
624         kfree(root);
625         return 0;
626 }
627
628 static void __free_fs_root(struct rb_node *node)
629 {
630         struct btrfs_root *root;
631
632         root = container_of(node, struct btrfs_root, rb_node);
633         btrfs_free_fs_root(root);
634 }
635
636 FREE_RB_BASED_TREE(fs_roots, __free_fs_root);
637
638 struct btrfs_root *btrfs_read_fs_root_no_cache(struct btrfs_fs_info *fs_info,
639                                                struct btrfs_key *location)
640 {
641         struct btrfs_root *root;
642         struct btrfs_root *tree_root = fs_info->tree_root;
643         struct btrfs_path *path;
644         struct extent_buffer *l;
645         u64 generation;
646         int ret = 0;
647
648         root = calloc(1, sizeof(*root));
649         if (!root)
650                 return ERR_PTR(-ENOMEM);
651         if (location->offset == (u64)-1) {
652                 ret = find_and_setup_root(tree_root, fs_info,
653                                           location->objectid, root);
654                 if (ret) {
655                         free(root);
656                         return ERR_PTR(ret);
657                 }
658                 goto insert;
659         }
660
661         btrfs_setup_root(root, fs_info,
662                          location->objectid);
663
664         path = btrfs_alloc_path();
665         if (!path) {
666                 free(root);
667                 return ERR_PTR(-ENOMEM);
668         }
669
670         ret = btrfs_search_slot(NULL, tree_root, location, path, 0, 0);
671         if (ret != 0) {
672                 if (ret > 0)
673                         ret = -ENOENT;
674                 goto out;
675         }
676         l = path->nodes[0];
677         read_extent_buffer(l, &root->root_item,
678                btrfs_item_ptr_offset(l, path->slots[0]),
679                sizeof(root->root_item));
680         memcpy(&root->root_key, location, sizeof(*location));
681
682         /* If this root is already an orphan, no need to read */
683         if (btrfs_root_refs(&root->root_item) == 0) {
684                 ret = -ENOENT;
685                 goto out;
686         }
687         ret = 0;
688 out:
689         btrfs_free_path(path);
690         if (ret) {
691                 free(root);
692                 return ERR_PTR(ret);
693         }
694         generation = btrfs_root_generation(&root->root_item);
695         root->node = read_tree_block(fs_info,
696                         btrfs_root_bytenr(&root->root_item), generation);
697         if (!extent_buffer_uptodate(root->node)) {
698                 free(root);
699                 return ERR_PTR(-EIO);
700         }
701 insert:
702         root->ref_cows = 1;
703         return root;
704 }
705
706 static int btrfs_fs_roots_compare_objectids(struct rb_node *node,
707                                             void *data)
708 {
709         u64 objectid = *((u64 *)data);
710         struct btrfs_root *root;
711
712         root = rb_entry(node, struct btrfs_root, rb_node);
713         if (objectid > root->objectid)
714                 return 1;
715         else if (objectid < root->objectid)
716                 return -1;
717         else
718                 return 0;
719 }
720
721 int btrfs_fs_roots_compare_roots(struct rb_node *node1, struct rb_node *node2)
722 {
723         struct btrfs_root *root;
724
725         root = rb_entry(node2, struct btrfs_root, rb_node);
726         return btrfs_fs_roots_compare_objectids(node1, (void *)&root->objectid);
727 }
728
729 struct btrfs_root *btrfs_read_fs_root(struct btrfs_fs_info *fs_info,
730                                       struct btrfs_key *location)
731 {
732         struct btrfs_root *root;
733         struct rb_node *node;
734         int ret;
735         u64 objectid = location->objectid;
736
737         if (location->objectid == BTRFS_ROOT_TREE_OBJECTID)
738                 return fs_info->tree_root;
739         if (location->objectid == BTRFS_CHUNK_TREE_OBJECTID)
740                 return fs_info->chunk_root;
741         if (location->objectid == BTRFS_CSUM_TREE_OBJECTID)
742                 return fs_info->csum_root;
743         BUG_ON(location->objectid == BTRFS_TREE_RELOC_OBJECTID);
744
745         node = rb_search(&fs_info->fs_root_tree, (void *)&objectid,
746                          btrfs_fs_roots_compare_objectids, NULL);
747         if (node)
748                 return container_of(node, struct btrfs_root, rb_node);
749
750         root = btrfs_read_fs_root_no_cache(fs_info, location);
751         if (IS_ERR(root))
752                 return root;
753
754         ret = rb_insert(&fs_info->fs_root_tree, &root->rb_node,
755                         btrfs_fs_roots_compare_roots);
756         BUG_ON(ret);
757         return root;
758 }
759
760 void btrfs_free_fs_info(struct btrfs_fs_info *fs_info)
761 {
762         free(fs_info->tree_root);
763         free(fs_info->chunk_root);
764         free(fs_info->csum_root);
765         free(fs_info->super_copy);
766         free(fs_info);
767 }
768
769 struct btrfs_fs_info *btrfs_new_fs_info(void)
770 {
771         struct btrfs_fs_info *fs_info;
772
773         fs_info = calloc(1, sizeof(struct btrfs_fs_info));
774         if (!fs_info)
775                 return NULL;
776
777         fs_info->tree_root = calloc(1, sizeof(struct btrfs_root));
778         fs_info->chunk_root = calloc(1, sizeof(struct btrfs_root));
779         fs_info->csum_root = calloc(1, sizeof(struct btrfs_root));
780         fs_info->super_copy = calloc(1, BTRFS_SUPER_INFO_SIZE);
781
782         if (!fs_info->tree_root || !fs_info->chunk_root ||
783             !fs_info->csum_root || !fs_info->super_copy)
784                 goto free_all;
785
786         extent_io_tree_init(&fs_info->extent_cache);
787
788         fs_info->fs_root_tree = RB_ROOT;
789         cache_tree_init(&fs_info->mapping_tree.cache_tree);
790
791         return fs_info;
792 free_all:
793         btrfs_free_fs_info(fs_info);
794         return NULL;
795 }
796
797 static int setup_root_or_create_block(struct btrfs_fs_info *fs_info,
798                                       struct btrfs_root *info_root,
799                                       u64 objectid, char *str)
800 {
801         struct btrfs_root *root = fs_info->tree_root;
802         int ret;
803
804         ret = find_and_setup_root(root, fs_info, objectid, info_root);
805         if (ret) {
806                 error("could not setup %s tree", str);
807                 return -EIO;
808         }
809
810         return 0;
811 }
812
813 static int get_default_subvolume(struct btrfs_fs_info *fs_info,
814                                  struct btrfs_key *key_ret)
815 {
816         struct btrfs_root *root = fs_info->tree_root;
817         struct btrfs_dir_item *dir_item;
818         struct btrfs_path path;
819         int ret = 0;
820
821         btrfs_init_path(&path);
822
823         dir_item = btrfs_lookup_dir_item(NULL, root, &path,
824                                          BTRFS_ROOT_TREE_DIR_OBJECTID,
825                                          "default", 7, 0);
826         if (IS_ERR(dir_item)) {
827                 ret = PTR_ERR(dir_item);
828                 goto out;
829         }
830
831         btrfs_dir_item_key_to_cpu(path.nodes[0], dir_item, key_ret);
832 out:
833         btrfs_release_path(&path);
834         return ret;
835 }
836
837 int btrfs_setup_all_roots(struct btrfs_fs_info *fs_info)
838 {
839         struct btrfs_super_block *sb = fs_info->super_copy;
840         struct btrfs_root *root;
841         struct btrfs_key key;
842         u64 root_tree_bytenr;
843         u64 generation;
844         int ret;
845
846         root = fs_info->tree_root;
847         btrfs_setup_root(root, fs_info, BTRFS_ROOT_TREE_OBJECTID);
848         generation = btrfs_super_generation(sb);
849
850         root_tree_bytenr = btrfs_super_root(sb);
851
852         root->node = read_tree_block(fs_info, root_tree_bytenr, generation);
853         if (!extent_buffer_uptodate(root->node)) {
854                 fprintf(stderr, "Couldn't read tree root\n");
855                 return -EIO;
856         }
857
858         ret = setup_root_or_create_block(fs_info, fs_info->csum_root,
859                                          BTRFS_CSUM_TREE_OBJECTID, "csum");
860         if (ret)
861                 return ret;
862         fs_info->csum_root->track_dirty = 1;
863
864         fs_info->last_trans_committed = generation;
865
866         ret = get_default_subvolume(fs_info, &key);
867         if (ret) {
868                 /*
869                  * The default dir item isn't there. Linux kernel behaviour is
870                  * to silently use the top-level subvolume in this case.
871                  */
872                 key.objectid = BTRFS_FS_TREE_OBJECTID;
873                 key.type = BTRFS_ROOT_ITEM_KEY;
874                 key.offset = (u64)-1;
875         }
876
877         fs_info->fs_root = btrfs_read_fs_root(fs_info, &key);
878
879         if (IS_ERR(fs_info->fs_root))
880                 return -EIO;
881         return 0;
882 }
883
884 void btrfs_release_all_roots(struct btrfs_fs_info *fs_info)
885 {
886         if (fs_info->csum_root)
887                 free_extent_buffer(fs_info->csum_root->node);
888         if (fs_info->tree_root)
889                 free_extent_buffer(fs_info->tree_root->node);
890         if (fs_info->chunk_root)
891                 free_extent_buffer(fs_info->chunk_root->node);
892 }
893
894 static void free_map_lookup(struct cache_extent *ce)
895 {
896         struct map_lookup *map;
897
898         map = container_of(ce, struct map_lookup, ce);
899         kfree(map);
900 }
901
902 FREE_EXTENT_CACHE_BASED_TREE(mapping_cache, free_map_lookup);
903
904 void btrfs_cleanup_all_caches(struct btrfs_fs_info *fs_info)
905 {
906         free_mapping_cache_tree(&fs_info->mapping_tree.cache_tree);
907         extent_io_tree_cleanup(&fs_info->extent_cache);
908 }
909
910 static int btrfs_scan_fs_devices(struct blk_desc *desc,
911                                  struct disk_partition *part,
912                                  struct btrfs_fs_devices **fs_devices)
913 {
914         u64 total_devs;
915         int ret;
916
917         if (round_up(BTRFS_SUPER_INFO_SIZE + BTRFS_SUPER_INFO_OFFSET,
918                      desc->blksz) > (part->size << desc->log2blksz)) {
919                 log_debug("superblock end %u is larger than device size " LBAFU,
920                           BTRFS_SUPER_INFO_SIZE + BTRFS_SUPER_INFO_OFFSET,
921                           part->size << desc->log2blksz);
922                 return -EINVAL;
923         }
924
925         ret = btrfs_scan_one_device(desc, part, fs_devices, &total_devs);
926         if (ret) {
927                 /*
928                  * Avoid showing this when probing for a possible Btrfs
929                  *
930                  * fprintf(stderr, "No valid Btrfs found\n");
931                  */
932                 return ret;
933         }
934         return 0;
935 }
936
937 int btrfs_check_fs_compatibility(struct btrfs_super_block *sb)
938 {
939         u64 features;
940
941         features = btrfs_super_incompat_flags(sb) &
942                    ~BTRFS_FEATURE_INCOMPAT_SUPP;
943         if (features) {
944                 printk("couldn't open because of unsupported "
945                        "option features (%llx).\n",
946                        (unsigned long long)features);
947                 return -ENOTSUPP;
948         }
949
950         features = btrfs_super_incompat_flags(sb);
951         if (!(features & BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF)) {
952                 features |= BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF;
953                 btrfs_set_super_incompat_flags(sb, features);
954         }
955
956         return 0;
957 }
958
959 static int btrfs_setup_chunk_tree_and_device_map(struct btrfs_fs_info *fs_info)
960 {
961         struct btrfs_super_block *sb = fs_info->super_copy;
962         u64 chunk_root_bytenr;
963         u64 generation;
964         int ret;
965
966         btrfs_setup_root(fs_info->chunk_root, fs_info,
967                         BTRFS_CHUNK_TREE_OBJECTID);
968
969         ret = btrfs_read_sys_array(fs_info);
970         if (ret)
971                 return ret;
972
973         generation = btrfs_super_chunk_root_generation(sb);
974         chunk_root_bytenr = btrfs_super_chunk_root(sb);
975
976         fs_info->chunk_root->node = read_tree_block(fs_info,
977                                                     chunk_root_bytenr,
978                                                     generation);
979         if (!extent_buffer_uptodate(fs_info->chunk_root->node)) {
980                 error("cannot read chunk root");
981                 return -EIO;
982         }
983
984         ret = btrfs_read_chunk_tree(fs_info);
985         if (ret) {
986                 fprintf(stderr, "Couldn't read chunk tree\n");
987                 return ret;
988         }
989         return 0;
990 }
991
992 struct btrfs_fs_info *open_ctree_fs_info(struct blk_desc *desc,
993                                          struct disk_partition *part)
994 {
995         struct btrfs_fs_info *fs_info;
996         struct btrfs_super_block *disk_super;
997         struct btrfs_fs_devices *fs_devices = NULL;
998         struct extent_buffer *eb;
999         int ret;
1000
1001         fs_info = btrfs_new_fs_info();
1002         if (!fs_info) {
1003                 fprintf(stderr, "Failed to allocate memory for fs_info\n");
1004                 return NULL;
1005         }
1006
1007         ret = btrfs_scan_fs_devices(desc, part, &fs_devices);
1008         if (ret)
1009                 goto out;
1010
1011         fs_info->fs_devices = fs_devices;
1012
1013         ret = btrfs_open_devices(fs_devices);
1014         if (ret)
1015                 goto out;
1016
1017         disk_super = fs_info->super_copy;
1018         ret = btrfs_read_dev_super(desc, part, disk_super);
1019         if (ret) {
1020                 debug("No valid btrfs found\n");
1021                 goto out_devices;
1022         }
1023
1024         if (btrfs_super_flags(disk_super) & BTRFS_SUPER_FLAG_CHANGING_FSID) {
1025                 fprintf(stderr, "ERROR: Filesystem UUID change in progress\n");
1026                 goto out_devices;
1027         }
1028
1029         ASSERT(!memcmp(disk_super->fsid, fs_devices->fsid, BTRFS_FSID_SIZE));
1030         if (btrfs_fs_incompat(fs_info, METADATA_UUID))
1031                 ASSERT(!memcmp(disk_super->metadata_uuid,
1032                                fs_devices->metadata_uuid, BTRFS_FSID_SIZE));
1033
1034         fs_info->sectorsize = btrfs_super_sectorsize(disk_super);
1035         fs_info->nodesize = btrfs_super_nodesize(disk_super);
1036         fs_info->stripesize = btrfs_super_stripesize(disk_super);
1037
1038         ret = btrfs_check_fs_compatibility(fs_info->super_copy);
1039         if (ret)
1040                 goto out_devices;
1041
1042         ret = btrfs_setup_chunk_tree_and_device_map(fs_info);
1043         if (ret)
1044                 goto out_chunk;
1045
1046         /* Chunk tree root is unable to read, return directly */
1047         if (!fs_info->chunk_root)
1048                 return fs_info;
1049
1050         eb = fs_info->chunk_root->node;
1051         read_extent_buffer(eb, fs_info->chunk_tree_uuid,
1052                            btrfs_header_chunk_tree_uuid(eb),
1053                            BTRFS_UUID_SIZE);
1054
1055         ret = btrfs_setup_all_roots(fs_info);
1056         if (ret)
1057                 goto out_chunk;
1058
1059         return fs_info;
1060
1061 out_chunk:
1062         btrfs_release_all_roots(fs_info);
1063         btrfs_cleanup_all_caches(fs_info);
1064 out_devices:
1065         btrfs_close_devices(fs_devices);
1066 out:
1067         btrfs_free_fs_info(fs_info);
1068         return NULL;
1069 }
1070
1071 int close_ctree_fs_info(struct btrfs_fs_info *fs_info)
1072 {
1073         int ret;
1074
1075         free_fs_roots_tree(&fs_info->fs_root_tree);
1076
1077         btrfs_release_all_roots(fs_info);
1078         ret = btrfs_close_devices(fs_info->fs_devices);
1079         btrfs_cleanup_all_caches(fs_info);
1080         btrfs_free_fs_info(fs_info);
1081         return ret;
1082 }
1083
1084 int btrfs_buffer_uptodate(struct extent_buffer *buf, u64 parent_transid)
1085 {
1086         int ret;
1087
1088         ret = extent_buffer_uptodate(buf);
1089         if (!ret)
1090                 return ret;
1091
1092         ret = verify_parent_transid(&buf->fs_info->extent_cache, buf,
1093                                     parent_transid, 1);
1094         return !ret;
1095 }
1096
1097 int btrfs_set_buffer_uptodate(struct extent_buffer *eb)
1098 {
1099         return set_extent_buffer_uptodate(eb);
1100 }