7b740a3450c5b7ea1897468a94bea3c7335360be
[platform/upstream/btrfs-progs.git] / cmds-chunk.c
1 /*
2  * Copyright (C) 2013 Fujitsu.  All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public
6  * License v2 as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public
14  * License along with this program; if not, write to the
15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16  * Boston, MA 021110-1307, USA.
17  */
18 #define _XOPEN_SOURCE 500
19 #define _GNU_SOURCE
20
21 #include <stdio.h>
22 #include <stdio_ext.h>
23 #include <stdlib.h>
24 #include <sys/types.h>
25 #include <sys/stat.h>
26 #include <fcntl.h>
27 #include <unistd.h>
28 #include <uuid/uuid.h>
29
30 #include "kerncompat.h"
31 #include "list.h"
32 #include "radix-tree.h"
33 #include "ctree.h"
34 #include "extent-cache.h"
35 #include "disk-io.h"
36 #include "volumes.h"
37 #include "transaction.h"
38 #include "crc32c.h"
39 #include "utils.h"
40 #include "version.h"
41 #include "btrfsck.h"
42 #include "commands.h"
43
44 #define BTRFS_CHUNK_TREE_REBUILD_ABORTED        -7500
45 #define BTRFS_STRIPE_LEN                        (64 * 1024)
46
47 struct recover_control {
48         int verbose;
49         int yes;
50
51         u16 csum_size;
52         u32 sectorsize;
53         u32 leafsize;
54         u64 generation;
55         u64 chunk_root_generation;
56
57         struct btrfs_fs_devices *fs_devices;
58
59         struct cache_tree chunk;
60         struct block_group_tree bg;
61         struct device_extent_tree devext;
62
63         struct list_head good_chunks;
64         struct list_head bad_chunks;
65 };
66
67 static struct btrfs_chunk *create_chunk_item(struct chunk_record *record)
68 {
69         struct btrfs_chunk *ret;
70         struct btrfs_stripe *chunk_stripe;
71         int i;
72
73         if (!record || record->num_stripes == 0)
74                 return NULL;
75         ret = malloc(btrfs_chunk_item_size(record->num_stripes));
76         if (!ret)
77                 return NULL;
78         btrfs_set_stack_chunk_length(ret, record->length);
79         btrfs_set_stack_chunk_owner(ret, record->owner);
80         btrfs_set_stack_chunk_stripe_len(ret, record->stripe_len);
81         btrfs_set_stack_chunk_type(ret, record->type_flags);
82         btrfs_set_stack_chunk_io_align(ret, record->io_align);
83         btrfs_set_stack_chunk_io_width(ret, record->io_width);
84         btrfs_set_stack_chunk_sector_size(ret, record->sector_size);
85         btrfs_set_stack_chunk_num_stripes(ret, record->num_stripes);
86         btrfs_set_stack_chunk_sub_stripes(ret, record->sub_stripes);
87         for (i = 0, chunk_stripe = &ret->stripe; i < record->num_stripes;
88              i++, chunk_stripe++) {
89                 btrfs_set_stack_stripe_devid(chunk_stripe,
90                                 record->stripes[i].devid);
91                 btrfs_set_stack_stripe_offset(chunk_stripe,
92                                 record->stripes[i].offset);
93                 memcpy(chunk_stripe->dev_uuid, record->stripes[i].dev_uuid,
94                        BTRFS_UUID_SIZE);
95         }
96         return ret;
97 }
98
99 void init_recover_control(struct recover_control *rc, int verbose, int yes)
100 {
101         memset(rc, 0, sizeof(struct recover_control));
102         cache_tree_init(&rc->chunk);
103         block_group_tree_init(&rc->bg);
104         device_extent_tree_init(&rc->devext);
105
106         INIT_LIST_HEAD(&rc->good_chunks);
107         INIT_LIST_HEAD(&rc->bad_chunks);
108
109         rc->verbose = verbose;
110         rc->yes = yes;
111 }
112
113 void free_recover_control(struct recover_control *rc)
114 {
115         free_block_group_tree(&rc->bg);
116         free_chunk_cache_tree(&rc->chunk);
117         free_device_extent_tree(&rc->devext);
118 }
119
120 static int process_block_group_item(struct block_group_tree *bg_cache,
121                                     struct extent_buffer *leaf,
122                                     struct btrfs_key *key, int slot)
123 {
124         struct block_group_record *rec;
125         struct block_group_record *exist;
126         struct cache_extent *cache;
127         int ret = 0;
128
129         rec = btrfs_new_block_group_record(leaf, key, slot);
130         if (!rec->cache.size)
131                 goto free_out;
132 again:
133         cache = lookup_cache_extent(&bg_cache->tree,
134                                     rec->cache.start,
135                                     rec->cache.size);
136         if (cache) {
137                 exist = container_of(cache, struct block_group_record, cache);
138
139                 /*check the generation and replace if needed*/
140                 if (exist->generation > rec->generation)
141                         goto free_out;
142                 if (exist->generation == rec->generation) {
143                         int offset = offsetof(struct block_group_record,
144                                               generation);
145                         /*
146                          * According to the current kernel code, the following
147                          * case is impossble, or there is something wrong in
148                          * the kernel code.
149                          */
150                         if (memcmp(((void *)exist) + offset,
151                                    ((void *)rec) + offset,
152                                    sizeof(*rec) - offset))
153                                 ret = -EEXIST;
154                         goto free_out;
155                 }
156                 remove_cache_extent(&bg_cache->tree, cache);
157                 list_del_init(&exist->list);
158                 free(exist);
159                 /*
160                  * We must do seach again to avoid the following cache.
161                  * /--old bg 1--//--old bg 2--/
162                  *        /--new bg--/
163                  */
164                 goto again;
165         }
166
167         ret = insert_block_group_record(bg_cache, rec);
168         BUG_ON(ret);
169 out:
170         return ret;
171 free_out:
172         free(rec);
173         goto out;
174 }
175
176 static int process_chunk_item(struct cache_tree *chunk_cache,
177                               struct extent_buffer *leaf, struct btrfs_key *key,
178                               int slot)
179 {
180         struct chunk_record *rec;
181         struct chunk_record *exist;
182         struct cache_extent *cache;
183         int ret = 0;
184
185         rec = btrfs_new_chunk_record(leaf, key, slot);
186         if (!rec->cache.size)
187                 goto free_out;
188 again:
189         cache = lookup_cache_extent(chunk_cache, rec->offset, rec->length);
190         if (cache) {
191                 exist = container_of(cache, struct chunk_record, cache);
192
193                 if (exist->generation > rec->generation)
194                         goto free_out;
195                 if (exist->generation == rec->generation) {
196                         int num_stripes = rec->num_stripes;
197                         int rec_size = btrfs_chunk_record_size(num_stripes);
198                         int offset = offsetof(struct chunk_record, generation);
199
200                         if (exist->num_stripes != rec->num_stripes ||
201                             memcmp(((void *)exist) + offset,
202                                    ((void *)rec) + offset,
203                                    rec_size - offset))
204                                 ret = -EEXIST;
205                         goto free_out;
206                 }
207                 remove_cache_extent(chunk_cache, cache);
208                 free(exist);
209                 goto again;
210         }
211         ret = insert_cache_extent(chunk_cache, &rec->cache);
212         BUG_ON(ret);
213 out:
214         return ret;
215 free_out:
216         free(rec);
217         goto out;
218 }
219
220 static int process_device_extent_item(struct device_extent_tree *devext_cache,
221                                       struct extent_buffer *leaf,
222                                       struct btrfs_key *key, int slot)
223 {
224         struct device_extent_record *rec;
225         struct device_extent_record *exist;
226         struct cache_extent *cache;
227         int ret = 0;
228
229         rec = btrfs_new_device_extent_record(leaf, key, slot);
230         if (!rec->cache.size)
231                 goto free_out;
232 again:
233         cache = lookup_cache_extent2(&devext_cache->tree,
234                                      rec->cache.objectid,
235                                      rec->cache.start,
236                                      rec->cache.size);
237         if (cache) {
238                 exist = container_of(cache, struct device_extent_record, cache);
239                 if (exist->generation > rec->generation)
240                         goto free_out;
241                 if (exist->generation == rec->generation) {
242                         int offset = offsetof(struct device_extent_record,
243                                               generation);
244                         if (memcmp(((void *)exist) + offset,
245                                    ((void *)rec) + offset,
246                                    sizeof(*rec) - offset))
247                                 ret = -EEXIST;
248                         goto free_out;
249                 }
250                 remove_cache_extent(&devext_cache->tree, cache);
251                 list_del_init(&exist->chunk_list);
252                 list_del_init(&exist->device_list);
253                 free(exist);
254                 goto again;
255         }
256
257         ret = insert_device_extent_record(devext_cache, rec);
258         BUG_ON(ret);
259 out:
260         return ret;
261 free_out:
262         free(rec);
263         goto out;
264 }
265
266 static void print_block_group_info(struct block_group_record *rec, char *prefix)
267 {
268         if (prefix)
269                 printf("%s", prefix);
270         printf("Block Group: start = %llu, len = %llu, flag = %llx\n",
271                rec->objectid, rec->offset, rec->flags);
272 }
273
274 static void print_block_group_tree(struct block_group_tree *tree)
275 {
276         struct cache_extent *cache;
277         struct block_group_record *rec;
278
279         printf("All Block Groups:\n");
280         for (cache = first_cache_extent(&tree->tree); cache;
281              cache = next_cache_extent(cache)) {
282                 rec = container_of(cache, struct block_group_record, cache);
283                 print_block_group_info(rec, "\t");
284         }
285         printf("\n");
286 }
287
288 static void print_stripe_info(struct stripe *data, char *prefix1, char *prefix2,
289                               int index)
290 {
291         if (prefix1)
292                 printf("%s", prefix1);
293         if (prefix2)
294                 printf("%s", prefix2);
295         printf("[%2d] Stripe: devid = %llu, offset = %llu\n",
296                index, data->devid, data->offset);
297 }
298
299 static void print_chunk_self_info(struct chunk_record *rec, char *prefix)
300 {
301         int i;
302
303         if (prefix)
304                 printf("%s", prefix);
305         printf("Chunk: start = %llu, len = %llu, type = %llx, num_stripes = %u\n",
306                rec->offset, rec->length, rec->type_flags, rec->num_stripes);
307         if (prefix)
308                 printf("%s", prefix);
309         printf("    Stripes list:\n");
310         for (i = 0; i < rec->num_stripes; i++)
311                 print_stripe_info(&rec->stripes[i], prefix, "    ", i);
312 }
313
314 static void print_chunk_tree(struct cache_tree *tree)
315 {
316         struct cache_extent *n;
317         struct chunk_record *entry;
318
319         printf("All Chunks:\n");
320         for (n = first_cache_extent(tree); n;
321              n = next_cache_extent(n)) {
322                 entry = container_of(n, struct chunk_record, cache);
323                 print_chunk_self_info(entry, "\t");
324         }
325         printf("\n");
326 }
327
328 static void print_device_extent_info(struct device_extent_record *rec,
329                                      char *prefix)
330 {
331         if (prefix)
332                 printf("%s", prefix);
333         printf("Device extent: devid = %llu, start = %llu, len = %llu, chunk offset = %llu\n",
334                rec->objectid, rec->offset, rec->length, rec->chunk_offset);
335 }
336
337 static void print_device_extent_tree(struct device_extent_tree *tree)
338 {
339         struct cache_extent *n;
340         struct device_extent_record *entry;
341
342         printf("All Device Extents:\n");
343         for (n = first_cache_extent(&tree->tree); n;
344              n = next_cache_extent(n)) {
345                 entry = container_of(n, struct device_extent_record, cache);
346                 print_device_extent_info(entry, "\t");
347         }
348         printf("\n");
349 }
350
351 static void print_device_info(struct btrfs_device *device, char *prefix)
352 {
353         if (prefix)
354                 printf("%s", prefix);
355         printf("Device: id = %llu, name = %s\n",
356                device->devid, device->name);
357 }
358
359 static void print_all_devices(struct list_head *devices)
360 {
361         struct btrfs_device *dev;
362
363         printf("All Devices:\n");
364         list_for_each_entry(dev, devices, dev_list)
365                 print_device_info(dev, "\t");
366         printf("\n");
367 }
368
369 static void print_scan_result(struct recover_control *rc)
370 {
371         if (!rc->verbose)
372                 return;
373
374         printf("DEVICE SCAN RESULT:\n");
375         printf("Filesystem Information:\n");
376         printf("\tsectorsize: %d\n", rc->sectorsize);
377         printf("\tleafsize: %d\n", rc->leafsize);
378         printf("\ttree root generation: %llu\n", rc->generation);
379         printf("\tchunk root generation: %llu\n", rc->chunk_root_generation);
380         printf("\n");
381
382         print_all_devices(&rc->fs_devices->devices);
383         print_block_group_tree(&rc->bg);
384         print_chunk_tree(&rc->chunk);
385         print_device_extent_tree(&rc->devext);
386 }
387
388 static void print_chunk_info(struct chunk_record *chunk, char *prefix)
389 {
390         struct device_extent_record *devext;
391         int i;
392
393         print_chunk_self_info(chunk, prefix);
394         if (prefix)
395                 printf("%s", prefix);
396         if (chunk->bg_rec)
397                 print_block_group_info(chunk->bg_rec, "    ");
398         else
399                 printf("    No block group.\n");
400         if (prefix)
401                 printf("%s", prefix);
402         if (list_empty(&chunk->dextents)) {
403                 printf("    No device extent.\n");
404         } else {
405                 printf("    Device extent list:\n");
406                 i = 0;
407                 list_for_each_entry(devext, &chunk->dextents, chunk_list) {
408                         if (prefix)
409                                 printf("%s", prefix);
410                         printf("%s[%2d]", "        ", i);
411                         print_device_extent_info(devext, NULL);
412                         i++;
413                 }
414         }
415 }
416
417 static void print_check_result(struct recover_control *rc)
418 {
419         struct chunk_record *chunk;
420         struct block_group_record *bg;
421         struct device_extent_record *devext;
422         int total = 0;
423         int good = 0;
424         int bad = 0;
425
426         if (!rc->verbose)
427                 return;
428
429         printf("CHECK RESULT:\n");
430         printf("Healthy Chunks:\n");
431         list_for_each_entry(chunk, &rc->good_chunks, list) {
432                 print_chunk_info(chunk, "  ");
433                 good++;
434                 total++;
435         }
436         printf("Bad Chunks:\n");
437         list_for_each_entry(chunk, &rc->bad_chunks, list) {
438                 print_chunk_info(chunk, "  ");
439                 bad++;
440                 total++;
441         }
442         printf("\n");
443         printf("Total Chunks:\t%d\n", total);
444         printf("  Heathy:\t%d\n", good);
445         printf("  Bad:\t%d\n", bad);
446
447         printf("\n");
448         printf("Orphan Block Groups:\n");
449         list_for_each_entry(bg, &rc->bg.block_groups, list)
450                 print_block_group_info(bg, "  ");
451
452         printf("\n");
453         printf("Orphan Device Extents:\n");
454         list_for_each_entry(devext, &rc->devext.no_chunk_orphans, chunk_list)
455                 print_device_extent_info(devext, "  ");
456 }
457
458 static int check_chunk_by_metadata(struct recover_control *rc,
459                                    struct btrfs_root *root,
460                                    struct chunk_record *chunk, int bg_only)
461 {
462         int ret;
463         int i;
464         int slot;
465         struct btrfs_path path;
466         struct btrfs_key key;
467         struct btrfs_root *dev_root;
468         struct stripe *stripe;
469         struct btrfs_dev_extent *dev_extent;
470         struct btrfs_block_group_item *bg_ptr;
471         struct extent_buffer *l;
472
473         btrfs_init_path(&path);
474
475         if (bg_only)
476                 goto bg_check;
477
478         dev_root = root->fs_info->dev_root;
479         for (i = 0; i < chunk->num_stripes; i++) {
480                 stripe = &chunk->stripes[i];
481
482                 key.objectid = stripe->devid;
483                 key.offset = stripe->offset;
484                 key.type = BTRFS_DEV_EXTENT_KEY;
485
486                 ret = btrfs_search_slot(NULL, dev_root, &key, &path, 0, 0);
487                 if (ret < 0) {
488                         fprintf(stderr, "Search device extent failed(%d)\n",
489                                 ret);
490                         btrfs_release_path(root, &path);
491                         return ret;
492                 } else if (ret > 0) {
493                         if (rc->verbose)
494                                 fprintf(stderr,
495                                         "No device extent[%llu, %llu]\n",
496                                         stripe->devid, stripe->offset);
497                         btrfs_release_path(root, &path);
498                         return -ENOENT;
499                 }
500                 l = path.nodes[0];
501                 slot = path.slots[0];
502                 dev_extent = btrfs_item_ptr(l, slot, struct btrfs_dev_extent);
503                 if (chunk->offset !=
504                     btrfs_dev_extent_chunk_offset(l, dev_extent)) {
505                         if (rc->verbose)
506                                 fprintf(stderr,
507                                         "Device tree unmatch with chunks dev_extent[%llu, %llu], chunk[%llu, %llu]\n",
508                                         btrfs_dev_extent_chunk_offset(l,
509                                                                 dev_extent),
510                                         btrfs_dev_extent_length(l, dev_extent),
511                                         chunk->offset, chunk->length);
512                         btrfs_release_path(root, &path);
513                         return -ENOENT;
514                 }
515                 btrfs_release_path(root, &path);
516         }
517
518 bg_check:
519         key.objectid = chunk->offset;
520         key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
521         key.offset = chunk->length;
522
523         ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, &path,
524                                 0, 0);
525         if (ret < 0) {
526                 fprintf(stderr, "Search block group failed(%d)\n", ret);
527                 btrfs_release_path(root, &path);
528                 return ret;
529         } else if (ret > 0) {
530                 if (rc->verbose)
531                         fprintf(stderr, "No block group[%llu, %llu]\n",
532                                 key.objectid, key.offset);
533                 btrfs_release_path(root, &path);
534                 return -ENOENT;
535         }
536
537         l = path.nodes[0];
538         slot = path.slots[0];
539         bg_ptr = btrfs_item_ptr(l, slot, struct btrfs_block_group_item);
540         if (chunk->type_flags != btrfs_disk_block_group_flags(l, bg_ptr)) {
541                 if (rc->verbose)
542                         fprintf(stderr,
543                                 "Chunk[%llu, %llu]'s type(%llu) is differemt with Block Group's type(%llu)\n",
544                                 chunk->offset, chunk->length, chunk->type_flags,
545                                 btrfs_disk_block_group_flags(l, bg_ptr));
546                 btrfs_release_path(root, &path);
547                 return -ENOENT;
548         }
549         btrfs_release_path(root, &path);
550         return 0;
551 }
552
553 static int check_all_chunks_by_metadata(struct recover_control *rc,
554                                         struct btrfs_root *root)
555 {
556         struct chunk_record *chunk;
557         LIST_HEAD(orphan_chunks);
558         int ret = 0;
559         int err;
560
561         list_for_each_entry(chunk, &rc->good_chunks, list) {
562                 err = check_chunk_by_metadata(rc, root, chunk, 0);
563                 if (err) {
564                         if (err == -ENOENT)
565                                 list_move_tail(&chunk->list, &orphan_chunks);
566                         else if (err && !ret)
567                                 ret = err;
568                 }
569         }
570
571         list_for_each_entry(chunk, &rc->bad_chunks, list) {
572                 err = check_chunk_by_metadata(rc, root, chunk, 1);
573                 if (err != -ENOENT && !ret)
574                         ret = err ? err : -EINVAL;
575         }
576         list_splice(&orphan_chunks, &rc->bad_chunks);
577         return ret;
578 }
579
580 static int extract_metadata_record(struct recover_control *rc,
581                                    struct extent_buffer *leaf)
582 {
583         struct btrfs_key key;
584         int ret = 0;
585         int i;
586         u32 nritems;
587
588         nritems = btrfs_header_nritems(leaf);
589         for (i = 0; i < nritems; i++) {
590                 btrfs_item_key_to_cpu(leaf, &key, i);
591                 switch (key.type) {
592                 case BTRFS_BLOCK_GROUP_ITEM_KEY:
593                         ret = process_block_group_item(&rc->bg, leaf, &key, i);
594                         break;
595                 case BTRFS_CHUNK_ITEM_KEY:
596                         ret = process_chunk_item(&rc->chunk, leaf, &key, i);
597                         break;
598                 case BTRFS_DEV_EXTENT_KEY:
599                         ret = process_device_extent_item(&rc->devext, leaf,
600                                                          &key, i);
601                         break;
602                 }
603                 if (ret)
604                         break;
605         }
606         return ret;
607 }
608
609 static inline int is_super_block_address(u64 offset)
610 {
611         int i;
612
613         for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
614                 if (offset == btrfs_sb_offset(i))
615                         return 1;
616         }
617         return 0;
618 }
619
620 static int scan_one_device(struct recover_control *rc, int fd)
621 {
622         struct extent_buffer *buf;
623         u64 bytenr;
624         int ret = 0;
625
626         buf = malloc(sizeof(*buf) + rc->leafsize);
627         if (!buf)
628                 return -ENOMEM;
629         buf->len = rc->leafsize;
630
631         bytenr = 0;
632         while (1) {
633                 if (is_super_block_address(bytenr))
634                         bytenr += rc->sectorsize;
635
636                 if (pread64(fd, buf->data, rc->leafsize, bytenr) <
637                     rc->leafsize)
638                         break;
639
640                 if (memcmp_extent_buffer(buf, rc->fs_devices->fsid,
641                                          (unsigned long)btrfs_header_fsid(buf),
642                                          BTRFS_FSID_SIZE)) {
643                         bytenr += rc->sectorsize;
644                         continue;
645                 }
646
647                 if (verify_tree_block_csum_silent(buf, rc->csum_size)) {
648                         bytenr += rc->sectorsize;
649                         continue;
650                 }
651
652                 if (btrfs_header_level(buf) != 0)
653                         goto next_node;
654
655                 switch (btrfs_header_owner(buf)) {
656                 case BTRFS_EXTENT_TREE_OBJECTID:
657                 case BTRFS_DEV_TREE_OBJECTID:
658                         /* different tree use different generation */
659                         if (btrfs_header_generation(buf) > rc->generation)
660                                 break;
661                         ret = extract_metadata_record(rc, buf);
662                         if (ret)
663                                 goto out;
664                         break;
665                 case BTRFS_CHUNK_TREE_OBJECTID:
666                         if (btrfs_header_generation(buf) >
667                             rc->chunk_root_generation)
668                                 break;
669                         ret = extract_metadata_record(rc, buf);
670                         if (ret)
671                                 goto out;
672                         break;
673                 }
674 next_node:
675                 bytenr += rc->leafsize;
676         }
677 out:
678         free(buf);
679         return ret;
680 }
681
682 static int scan_devices(struct recover_control *rc)
683 {
684         int ret = 0;
685         int fd;
686         struct btrfs_device *dev;
687
688         list_for_each_entry(dev, &rc->fs_devices->devices, dev_list) {
689                 fd = open(dev->name, O_RDONLY);
690                 if (fd < 0) {
691                         fprintf(stderr, "Failed to open device %s\n",
692                                 dev->name);
693                         return -1;
694                 }
695                 ret = scan_one_device(rc, fd);
696                 close(fd);
697                 if (ret)
698                         return ret;
699         }
700         return ret;
701 }
702
703 static int build_device_map_by_chunk_record(struct btrfs_root *root,
704                                             struct chunk_record *chunk)
705 {
706         int ret = 0;
707         int i;
708         u64 devid;
709         u8 uuid[BTRFS_UUID_SIZE];
710         u16 num_stripes;
711         struct btrfs_mapping_tree *map_tree;
712         struct map_lookup *map;
713         struct stripe *stripe;
714
715         map_tree = &root->fs_info->mapping_tree;
716         num_stripes = chunk->num_stripes;
717         map = malloc(btrfs_map_lookup_size(num_stripes));
718         if (!map)
719                 return -ENOMEM;
720         map->ce.start = chunk->offset;
721         map->ce.size = chunk->length;
722         map->num_stripes = num_stripes;
723         map->io_width = chunk->io_width;
724         map->io_align = chunk->io_align;
725         map->sector_size = chunk->sector_size;
726         map->stripe_len = chunk->stripe_len;
727         map->type = chunk->type_flags;
728         map->sub_stripes = chunk->sub_stripes;
729
730         for (i = 0, stripe = chunk->stripes; i < num_stripes; i++, stripe++) {
731                 devid = stripe->devid;
732                 memcpy(uuid, stripe->dev_uuid, BTRFS_UUID_SIZE);
733                 map->stripes[i].physical = stripe->offset;
734                 map->stripes[i].dev = btrfs_find_device(root, devid,
735                                                         uuid, NULL);
736                 if (!map->stripes[i].dev) {
737                         kfree(map);
738                         return -EIO;
739                 }
740         }
741
742         ret = insert_cache_extent(&map_tree->cache_tree, &map->ce);
743         return ret;
744 }
745
746 static int build_device_maps_by_chunk_records(struct recover_control *rc,
747                                               struct btrfs_root *root)
748 {
749         int ret = 0;
750         struct chunk_record *chunk;
751
752         list_for_each_entry(chunk, &rc->good_chunks, list) {
753                 ret = build_device_map_by_chunk_record(root, chunk);
754                 if (ret)
755                         return ret;
756         }
757         return ret;
758 }
759
760 static int block_group_remove_all_extent_items(struct btrfs_trans_handle *trans,
761                                                struct btrfs_root *root,
762                                                struct block_group_record *bg)
763 {
764         struct btrfs_fs_info *fs_info = root->fs_info;
765         struct btrfs_key key;
766         struct btrfs_path path;
767         struct extent_buffer *leaf;
768         u64 start = bg->objectid;
769         u64 end = bg->objectid + bg->offset;
770         u64 old_val;
771         int nitems;
772         int ret;
773         int i;
774         int del_s, del_nr;
775
776         btrfs_init_path(&path);
777         root = root->fs_info->extent_root;
778
779         key.objectid = start;
780         key.offset = 0;
781         key.type = BTRFS_EXTENT_ITEM_KEY;
782 again:
783         ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
784         if (ret < 0)
785                 goto err;
786         else if (ret > 0)
787                 ret = 0;
788
789         leaf = path.nodes[0];
790         nitems = btrfs_header_nritems(leaf);
791         if (!nitems) {
792                 /* The tree is empty. */
793                 ret = 0;
794                 goto err;
795         }
796
797         if (path.slots[0] >= nitems) {
798                 ret = btrfs_next_leaf(root, &path);
799                 if (ret < 0)
800                         goto err;
801                 if (ret > 0) {
802                         ret = 0;
803                         goto err;
804                 }
805                 leaf = path.nodes[0];
806                 btrfs_item_key_to_cpu(leaf, &key, 0);
807                 if (key.objectid >= end)
808                         goto err;
809                 btrfs_release_path(root, &path);
810                 goto again;
811         }
812
813         del_nr = 0;
814         del_s = -1;
815         for (i = path.slots[0]; i < nitems; i++) {
816                 btrfs_item_key_to_cpu(leaf, &key, i);
817                 if (key.objectid >= end)
818                         break;
819
820                 if (key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
821                         if (del_nr == 0)
822                                 continue;
823                         else
824                                 break;
825                 }
826
827                 if (del_s == -1)
828                         del_s = i;
829                 del_nr++;
830                 if (key.type == BTRFS_EXTENT_ITEM_KEY ||
831                     key.type == BTRFS_METADATA_ITEM_KEY) {
832                         old_val = btrfs_super_bytes_used(fs_info->super_copy);
833                         if (key.type == BTRFS_METADATA_ITEM_KEY)
834                                 old_val += root->leafsize;
835                         else
836                                 old_val += key.offset;
837                         btrfs_set_super_bytes_used(fs_info->super_copy,
838                                                    old_val);
839                 }
840         }
841
842         if (del_nr) {
843                 ret = btrfs_del_items(trans, root, &path, del_s, del_nr);
844                 if (ret)
845                         goto err;
846         }
847
848         if (key.objectid < end) {
849                 if (key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
850                         key.objectid += root->sectorsize;
851                         key.type = BTRFS_EXTENT_ITEM_KEY;
852                         key.offset = 0;
853                 }
854                 btrfs_release_path(root, &path);
855                 goto again;
856         }
857 err:
858         btrfs_release_path(root, &path);
859         return ret;
860 }
861
862 static int block_group_free_all_extent(struct btrfs_trans_handle *trans,
863                                        struct btrfs_root *root,
864                                        struct block_group_record *bg)
865 {
866         struct btrfs_block_group_cache *cache;
867         struct btrfs_fs_info *info;
868         u64 start;
869         u64 end;
870
871         info = root->fs_info;
872         cache = btrfs_lookup_block_group(info, bg->objectid);
873         if (!cache)
874                 return -ENOENT;
875
876         start = cache->key.objectid;
877         end = start + cache->key.offset - 1;
878
879         set_extent_bits(&info->block_group_cache, start, end,
880                         BLOCK_GROUP_DIRTY, GFP_NOFS);
881         set_extent_dirty(&info->free_space_cache, start, end, GFP_NOFS);
882
883         btrfs_set_block_group_used(&cache->item, 0);
884
885         return 0;
886 }
887
888 static int remove_chunk_extent_item(struct btrfs_trans_handle *trans,
889                                     struct recover_control *rc,
890                                     struct btrfs_root *root)
891 {
892         struct chunk_record *chunk;
893         int ret = 0;
894
895         list_for_each_entry(chunk, &rc->good_chunks, list) {
896                 if (!(chunk->type_flags & BTRFS_BLOCK_GROUP_SYSTEM))
897                         continue;
898                 ret = block_group_remove_all_extent_items(trans, root,
899                                                           chunk->bg_rec);
900                 if (ret)
901                         return ret;
902
903                 ret = block_group_free_all_extent(trans, root, chunk->bg_rec);
904                 if (ret)
905                         return ret;
906         }
907         return ret;
908 }
909
910 static int __rebuild_chunk_root(struct btrfs_trans_handle *trans,
911                                 struct recover_control *rc,
912                                 struct btrfs_root *root)
913 {
914         u64 min_devid = -1;
915         struct btrfs_device *dev;
916         struct extent_buffer *cow;
917         struct btrfs_disk_key disk_key;
918         int ret = 0;
919
920         list_for_each_entry(dev, &rc->fs_devices->devices, dev_list) {
921                 if (min_devid > dev->devid)
922                         min_devid = dev->devid;
923         }
924         disk_key.objectid = BTRFS_DEV_ITEMS_OBJECTID;
925         disk_key.type = BTRFS_DEV_ITEM_KEY;
926         disk_key.offset = min_devid;
927
928         cow = btrfs_alloc_free_block(trans, root, root->sectorsize,
929                                      BTRFS_CHUNK_TREE_OBJECTID,
930                                      &disk_key, 0, 0, 0);
931         btrfs_set_header_bytenr(cow, cow->start);
932         btrfs_set_header_generation(cow, trans->transid);
933         btrfs_set_header_nritems(cow, 0);
934         btrfs_set_header_level(cow, 0);
935         btrfs_set_header_backref_rev(cow, BTRFS_MIXED_BACKREF_REV);
936         btrfs_set_header_owner(cow, BTRFS_CHUNK_TREE_OBJECTID);
937         write_extent_buffer(cow, root->fs_info->fsid,
938                         (unsigned long)btrfs_header_fsid(cow),
939                         BTRFS_FSID_SIZE);
940
941         write_extent_buffer(cow, root->fs_info->chunk_tree_uuid,
942                         (unsigned long)btrfs_header_chunk_tree_uuid(cow),
943                         BTRFS_UUID_SIZE);
944
945         root->node = cow;
946         btrfs_mark_buffer_dirty(cow);
947
948         return ret;
949 }
950
951 static int __rebuild_device_items(struct btrfs_trans_handle *trans,
952                                   struct recover_control *rc,
953                                   struct btrfs_root *root)
954 {
955         struct btrfs_device *dev;
956         struct btrfs_key key;
957         struct btrfs_dev_item *dev_item;
958         int ret = 0;
959
960         dev_item = malloc(sizeof(struct btrfs_dev_item));
961         if (!dev_item)
962                 return -ENOMEM;
963
964         list_for_each_entry(dev, &rc->fs_devices->devices, dev_list) {
965                 key.objectid = BTRFS_DEV_ITEMS_OBJECTID;
966                 key.type = BTRFS_DEV_ITEM_KEY;
967                 key.offset = dev->devid;
968
969                 btrfs_set_stack_device_generation(dev_item, 0);
970                 btrfs_set_stack_device_type(dev_item, dev->type);
971                 btrfs_set_stack_device_id(dev_item, dev->devid);
972                 btrfs_set_stack_device_total_bytes(dev_item, dev->total_bytes);
973                 btrfs_set_stack_device_bytes_used(dev_item, dev->bytes_used);
974                 btrfs_set_stack_device_io_align(dev_item, dev->io_align);
975                 btrfs_set_stack_device_io_width(dev_item, dev->io_width);
976                 btrfs_set_stack_device_sector_size(dev_item, dev->sector_size);
977                 memcpy(dev_item->uuid, dev->uuid, BTRFS_UUID_SIZE);
978                 memcpy(dev_item->fsid, dev->fs_devices->fsid, BTRFS_UUID_SIZE);
979
980                 ret = btrfs_insert_item(trans, root, &key,
981                                         dev_item, sizeof(*dev_item));
982         }
983
984         free(dev_item);
985         return ret;
986 }
987
988 static int __rebuild_chunk_items(struct btrfs_trans_handle *trans,
989                                  struct recover_control *rc,
990                                  struct btrfs_root *root)
991 {
992         struct btrfs_key key;
993         struct btrfs_chunk *chunk = NULL;
994         struct btrfs_root *chunk_root;
995         struct chunk_record *chunk_rec;
996         int ret;
997
998         chunk_root = root->fs_info->chunk_root;
999
1000         list_for_each_entry(chunk_rec, &rc->good_chunks, list) {
1001                 chunk = create_chunk_item(chunk_rec);
1002                 if (!chunk)
1003                         return -ENOMEM;
1004
1005                 key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
1006                 key.type = BTRFS_CHUNK_ITEM_KEY;
1007                 key.offset = chunk_rec->offset;
1008
1009                 ret = btrfs_insert_item(trans, chunk_root, &key, chunk,
1010                                 btrfs_chunk_item_size(chunk->num_stripes));
1011                 free(chunk);
1012                 if (ret)
1013                         return ret;
1014         }
1015         return 0;
1016 }
1017
1018 static int rebuild_chunk_tree(struct btrfs_trans_handle *trans,
1019                               struct recover_control *rc,
1020                               struct btrfs_root *root)
1021 {
1022         int ret = 0;
1023
1024         root = root->fs_info->chunk_root;
1025
1026         ret = __rebuild_chunk_root(trans, rc, root);
1027         if (ret)
1028                 return ret;
1029
1030         ret = __rebuild_device_items(trans, rc, root);
1031         if (ret)
1032                 return ret;
1033
1034         ret = __rebuild_chunk_items(trans, rc, root);
1035
1036         return ret;
1037 }
1038
1039 static int rebuild_sys_array(struct recover_control *rc,
1040                              struct btrfs_root *root)
1041 {
1042         struct btrfs_chunk *chunk;
1043         struct btrfs_key key;
1044         struct chunk_record *chunk_rec;
1045         int ret = 0;
1046         u16 num_stripes;
1047
1048         btrfs_set_super_sys_array_size(root->fs_info->super_copy, 0);
1049
1050         list_for_each_entry(chunk_rec, &rc->good_chunks, list) {
1051                 if (!(chunk_rec->type_flags & BTRFS_BLOCK_GROUP_SYSTEM))
1052                         continue;
1053
1054                 num_stripes = chunk_rec->num_stripes;
1055                 chunk = create_chunk_item(chunk_rec);
1056                 if (!chunk) {
1057                         ret = -ENOMEM;
1058                         break;
1059                 }
1060
1061                 key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
1062                 key.type = BTRFS_CHUNK_ITEM_KEY;
1063                 key.offset = chunk_rec->offset;
1064
1065                 ret = btrfs_add_system_chunk(NULL, root, &key, chunk,
1066                                 btrfs_chunk_item_size(num_stripes));
1067                 free(chunk);
1068                 if (ret)
1069                         break;
1070         }
1071         return ret;
1072
1073 }
1074
1075 static struct btrfs_root *
1076 open_ctree_with_broken_chunk(struct recover_control *rc)
1077 {
1078         struct btrfs_fs_info *fs_info;
1079         struct btrfs_super_block *disk_super;
1080         struct extent_buffer *eb;
1081         u32 sectorsize;
1082         u32 nodesize;
1083         u32 leafsize;
1084         u32 stripesize;
1085         int ret;
1086
1087         fs_info = btrfs_new_fs_info(1, BTRFS_SUPER_INFO_OFFSET);
1088         if (!fs_info) {
1089                 fprintf(stderr, "Failed to allocate memory for fs_info\n");
1090                 return ERR_PTR(-ENOMEM);
1091         }
1092
1093         fs_info->fs_devices = rc->fs_devices;
1094         ret = btrfs_open_devices(fs_info->fs_devices, O_RDWR);
1095         if (ret)
1096                 goto out;
1097
1098         disk_super = fs_info->super_copy;
1099         ret = btrfs_read_dev_super(fs_info->fs_devices->latest_bdev,
1100                                    disk_super, fs_info->super_bytenr);
1101         if (ret) {
1102                 fprintf(stderr, "No valid btrfs found\n");
1103                 goto out_devices;
1104         }
1105
1106         memcpy(fs_info->fsid, &disk_super->fsid, BTRFS_FSID_SIZE);
1107
1108         ret = btrfs_check_fs_compatibility(disk_super, 1);
1109         if (ret)
1110                 goto out_devices;
1111
1112         nodesize = btrfs_super_nodesize(disk_super);
1113         leafsize = btrfs_super_leafsize(disk_super);
1114         sectorsize = btrfs_super_sectorsize(disk_super);
1115         stripesize = btrfs_super_stripesize(disk_super);
1116
1117         __setup_root(nodesize, leafsize, sectorsize, stripesize,
1118                      fs_info->chunk_root, fs_info, BTRFS_CHUNK_TREE_OBJECTID);
1119
1120         ret = build_device_maps_by_chunk_records(rc, fs_info->chunk_root);
1121         if (ret)
1122                 goto out_cleanup;
1123
1124         ret = btrfs_setup_all_roots(fs_info, 0, 0);
1125         if (ret)
1126                 goto out_failed;
1127
1128         eb = fs_info->tree_root->node;
1129         read_extent_buffer(eb, fs_info->chunk_tree_uuid,
1130                            (unsigned long)btrfs_header_chunk_tree_uuid(eb),
1131                            BTRFS_UUID_SIZE);
1132
1133         return fs_info->fs_root;
1134 out_failed:
1135         btrfs_release_all_roots(fs_info);
1136 out_cleanup:
1137         btrfs_cleanup_all_caches(fs_info);
1138 out_devices:
1139         btrfs_close_devices(fs_info->fs_devices);
1140 out:
1141         btrfs_free_fs_info(fs_info);
1142         return ERR_PTR(ret);
1143 }
1144
1145 static int recover_prepare(struct recover_control *rc, char *path)
1146 {
1147         int ret;
1148         int fd;
1149         struct btrfs_super_block *sb;
1150         struct btrfs_fs_devices *fs_devices;
1151
1152         ret = 0;
1153         fd = open(path, O_RDONLY);
1154         if (fd < 0) {
1155                 fprintf(stderr, "open %s\n error.\n", path);
1156                 return -1;
1157         }
1158
1159         sb = malloc(sizeof(struct btrfs_super_block));
1160         if (!sb) {
1161                 fprintf(stderr, "allocating memory for sb failed.\n");
1162                 ret = -ENOMEM;
1163                 goto fail_close_fd;
1164         }
1165
1166         ret = btrfs_read_dev_super(fd, sb, BTRFS_SUPER_INFO_OFFSET);
1167         if (ret) {
1168                 fprintf(stderr, "read super block error\n");
1169                 goto fail_free_sb;
1170         }
1171
1172         rc->sectorsize = btrfs_super_sectorsize(sb);
1173         rc->leafsize = btrfs_super_leafsize(sb);
1174         rc->generation = btrfs_super_generation(sb);
1175         rc->chunk_root_generation = btrfs_super_chunk_root_generation(sb);
1176         rc->csum_size = btrfs_super_csum_size(sb);
1177
1178         /* if seed, the result of scanning below will be partial */
1179         if (btrfs_super_flags(sb) & BTRFS_SUPER_FLAG_SEEDING) {
1180                 fprintf(stderr, "this device is seed device\n");
1181                 ret = -1;
1182                 goto fail_free_sb;
1183         }
1184
1185         ret = btrfs_scan_fs_devices(fd, path, &fs_devices);
1186         if (ret)
1187                 goto fail_free_sb;
1188
1189         rc->fs_devices = fs_devices;
1190
1191         if (rc->verbose)
1192                 print_all_devices(&rc->fs_devices->devices);
1193
1194 fail_free_sb:
1195         free(sb);
1196 fail_close_fd:
1197         close(fd);
1198         return ret;
1199 }
1200
1201 static int ask_user(char *question, int defval)
1202 {
1203         char answer[5];
1204         char *defstr;
1205         int i;
1206
1207         if (defval == 1)
1208                 defstr = "[Y/n]";
1209         else if (defval == 0)
1210                 defstr = "[y/N]";
1211         else if (defval == -1)
1212                 defstr = "[y/n]";
1213         else
1214                 BUG_ON(1);
1215 again:
1216         printf("%s%s? ", question, defstr);
1217
1218         i = 0;
1219         while (i < 4 && scanf("%c", &answer[i])) {
1220                 if (answer[i] == '\n') {
1221                         answer[i] = '\0';
1222                         break;
1223                 } else if (answer[i] == ' '){
1224                         answer[i] = '\0';
1225                         if (i == 0)
1226                                 continue;
1227                         else
1228                                 break;
1229                 } else if (answer[i] >= 'A' && answer[i] <= 'Z') {
1230                         answer[i] += 'a' - 'A';
1231                 }
1232                 i++;
1233         }
1234         answer[5] = '\0';
1235         __fpurge(stdin);
1236
1237         if (strlen(answer) == 0) {
1238                 if (defval != -1)
1239                         return defval;
1240                 else
1241                         goto again;
1242         }
1243
1244         if (!strcmp(answer, "yes") ||
1245             !strcmp(answer, "y"))
1246                 return 1;
1247
1248         if (!strcmp(answer, "no") ||
1249             !strcmp(answer, "n"))
1250                 return 0;
1251
1252         goto again;
1253 }
1254
1255 static int btrfs_get_device_extents(u64 chunk_object,
1256                                     struct list_head *orphan_devexts,
1257                                     struct list_head *ret_list)
1258 {
1259         struct device_extent_record *devext;
1260         struct device_extent_record *next;
1261         int count = 0;
1262
1263         list_for_each_entry_safe(devext, next, orphan_devexts, chunk_list) {
1264                 if (devext->chunk_offset == chunk_object) {
1265                         list_move_tail(&devext->chunk_list, ret_list);
1266                         count++;
1267                 }
1268         }
1269         return count;
1270 }
1271
1272 static int calc_num_stripes(u64 type)
1273 {
1274         if (type & (BTRFS_BLOCK_GROUP_RAID0 |
1275                     BTRFS_BLOCK_GROUP_RAID10 |
1276                     BTRFS_BLOCK_GROUP_RAID5 |
1277                     BTRFS_BLOCK_GROUP_RAID6))
1278                 return 0;
1279         else if (type & (BTRFS_BLOCK_GROUP_RAID1 |
1280                          BTRFS_BLOCK_GROUP_DUP))
1281                 return 2;
1282         else
1283                 return 1;
1284 }
1285
1286 static inline int calc_sub_nstripes(u64 type)
1287 {
1288         if (type & BTRFS_BLOCK_GROUP_RAID10)
1289                 return 2;
1290         else
1291                 return 1;
1292 }
1293
1294 static int btrfs_verify_device_extents(struct block_group_record *bg,
1295                                        struct list_head *devexts, int ndevexts)
1296 {
1297         struct device_extent_record *devext;
1298         u64 strpie_length;
1299         int expected_num_stripes;
1300
1301         expected_num_stripes = calc_num_stripes(bg->flags);
1302         if (!expected_num_stripes && expected_num_stripes != ndevexts)
1303                 return 1;
1304
1305         strpie_length = calc_stripe_length(bg->flags, bg->offset, ndevexts);
1306         list_for_each_entry(devext, devexts, chunk_list) {
1307                 if (devext->length != strpie_length)
1308                         return 1;
1309         }
1310         return 0;
1311 }
1312
1313 static int btrfs_rebuild_unordered_chunk_stripes(struct recover_control *rc,
1314                                                  struct chunk_record *chunk)
1315 {
1316         struct device_extent_record *devext;
1317         struct btrfs_device *device;
1318         int i;
1319
1320         devext = list_first_entry(&chunk->dextents, struct device_extent_record,
1321                                   chunk_list);
1322         for (i = 0; i < chunk->num_stripes; i++) {
1323                 chunk->stripes[i].devid = devext->objectid;
1324                 chunk->stripes[i].offset = devext->offset;
1325                 device = btrfs_find_device_by_devid(rc->fs_devices,
1326                                                     devext->objectid,
1327                                                     0);
1328                 if (!device)
1329                         return -ENOENT;
1330                 BUG_ON(btrfs_find_device_by_devid(rc->fs_devices,
1331                                                   devext->objectid,
1332                                                   1));
1333                 memcpy(chunk->stripes[i].dev_uuid, device->uuid,
1334                        BTRFS_UUID_SIZE);
1335                 devext = list_next_entry(devext, chunk_list);
1336         }
1337         return 0;
1338 }
1339
1340 static int btrfs_rebuild_chunk_stripes(struct recover_control *rc,
1341                                        struct chunk_record *chunk)
1342 {
1343         int ret;
1344
1345         if (chunk->type_flags & (BTRFS_BLOCK_GROUP_RAID10 |
1346                                  BTRFS_BLOCK_GROUP_RAID0 |
1347                                  BTRFS_BLOCK_GROUP_RAID5 |
1348                                  BTRFS_BLOCK_GROUP_RAID6))
1349                 BUG_ON(1);      /* Fixme: implement in the next patch */
1350         else
1351                 ret = btrfs_rebuild_unordered_chunk_stripes(rc, chunk);
1352
1353         return ret;
1354 }
1355
1356 static int btrfs_recover_chunks(struct recover_control *rc)
1357 {
1358         struct chunk_record *chunk;
1359         struct block_group_record *bg;
1360         struct block_group_record *next;
1361         LIST_HEAD(new_chunks);
1362         LIST_HEAD(devexts);
1363         int nstripes;
1364         int ret;
1365
1366         /* create the chunk by block group */
1367         list_for_each_entry_safe(bg, next, &rc->bg.block_groups, list) {
1368                 nstripes = btrfs_get_device_extents(bg->objectid,
1369                                                     &rc->devext.no_chunk_orphans,
1370                                                     &devexts);
1371                 chunk = malloc(btrfs_chunk_record_size(nstripes));
1372                 if (!chunk)
1373                         return -ENOMEM;
1374                 memset(chunk, 0, btrfs_chunk_record_size(nstripes));
1375                 INIT_LIST_HEAD(&chunk->dextents);
1376                 chunk->bg_rec = bg;
1377                 chunk->cache.start = bg->objectid;
1378                 chunk->cache.size = bg->offset;
1379                 chunk->objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
1380                 chunk->type = BTRFS_CHUNK_ITEM_KEY;
1381                 chunk->offset = bg->objectid;
1382                 chunk->generation = bg->generation;
1383                 chunk->length = bg->offset;
1384                 chunk->owner = BTRFS_CHUNK_TREE_OBJECTID;
1385                 chunk->stripe_len = BTRFS_STRIPE_LEN;
1386                 chunk->type_flags = bg->flags;
1387                 chunk->io_width = BTRFS_STRIPE_LEN;
1388                 chunk->io_align = BTRFS_STRIPE_LEN;
1389                 chunk->sector_size = rc->sectorsize;
1390                 chunk->sub_stripes = calc_sub_nstripes(bg->flags);
1391
1392                 ret = insert_cache_extent(&rc->chunk, &chunk->cache);
1393                 BUG_ON(ret);
1394
1395                 if (!nstripes) {
1396                         list_add_tail(&chunk->list, &rc->bad_chunks);
1397                         continue;
1398                 }
1399
1400                 list_splice_init(&devexts, &chunk->dextents);
1401
1402                 ret = btrfs_verify_device_extents(bg, &devexts, nstripes);
1403                 if (ret) {
1404                         list_add_tail(&chunk->list, &rc->bad_chunks);
1405                         continue;
1406                 }
1407
1408                 chunk->num_stripes = nstripes;
1409                 ret = btrfs_rebuild_chunk_stripes(rc, chunk);
1410                 if (ret)
1411                         list_add_tail(&chunk->list, &rc->bad_chunks);
1412                 else
1413                         list_add_tail(&chunk->list, &rc->good_chunks);
1414         }
1415         /*
1416          * Don't worry about the lost orphan device extents, they don't
1417          * have its chunk and block group, they must be the old ones that
1418          * we have dropped.
1419          */
1420         return 0;
1421 }
1422
1423 static int btrfs_recover_chunk_tree(char *path, int verbose, int yes)
1424 {
1425         int ret = 0;
1426         struct btrfs_root *root = NULL;
1427         struct btrfs_trans_handle *trans;
1428         struct recover_control rc;
1429
1430         init_recover_control(&rc, verbose, yes);
1431
1432         ret = recover_prepare(&rc, path);
1433         if (ret) {
1434                 fprintf(stderr, "recover prepare error\n");
1435                 return ret;
1436         }
1437
1438         ret = scan_devices(&rc);
1439         if (ret) {
1440                 fprintf(stderr, "scan chunk headers error\n");
1441                 goto fail_rc;
1442         }
1443
1444         if (cache_tree_empty(&rc.chunk) &&
1445             cache_tree_empty(&rc.bg.tree) &&
1446             cache_tree_empty(&rc.devext.tree)) {
1447                 fprintf(stderr, "no recoverable chunk\n");
1448                 goto fail_rc;
1449         }
1450
1451         print_scan_result(&rc);
1452
1453         ret = check_chunks(&rc.chunk, &rc.bg, &rc.devext, &rc.good_chunks,
1454                            &rc.bad_chunks, 1);
1455         print_check_result(&rc);
1456         if (ret) {
1457                 if (!list_empty(&rc.bg.block_groups) ||
1458                     !list_empty(&rc.devext.no_chunk_orphans)) {
1459                         ret = btrfs_recover_chunks(&rc);
1460                         if (ret)
1461                                 goto fail_rc;
1462                 }
1463                 /*
1464                  * If the chunk is healthy, its block group item and device
1465                  * extent item should be written on the disks. So, it is very
1466                  * likely that the bad chunk is a old one that has been
1467                  * droppped from the fs. Don't deal with them now, we will
1468                  * check it after the fs is opened.
1469                  */
1470         }
1471
1472         root = open_ctree_with_broken_chunk(&rc);
1473         if (IS_ERR(root)) {
1474                 fprintf(stderr, "open with broken chunk error\n");
1475                 ret = PTR_ERR(root);
1476                 goto fail_rc;
1477         }
1478
1479         ret = check_all_chunks_by_metadata(&rc, root);
1480         if (ret) {
1481                 fprintf(stderr, "The chunks in memory can not match the metadata of the fs. Repair failed.\n");
1482                 goto fail_close_ctree;
1483         }
1484
1485         if (!rc.yes) {
1486                 ret = ask_user("We are going to rebuild the chunk tree on disk, it might destroy the old metadata on the disk, Are you sure",
1487                                0);
1488                 if (!ret) {
1489                         ret = BTRFS_CHUNK_TREE_REBUILD_ABORTED;
1490                         goto fail_close_ctree;
1491                 }
1492         }
1493
1494         trans = btrfs_start_transaction(root, 1);
1495         ret = remove_chunk_extent_item(trans, &rc, root);
1496         BUG_ON(ret);
1497
1498         ret = rebuild_chunk_tree(trans, &rc, root);
1499         BUG_ON(ret);
1500
1501         ret = rebuild_sys_array(&rc, root);
1502         BUG_ON(ret);
1503
1504         btrfs_commit_transaction(trans, root);
1505 fail_close_ctree:
1506         close_ctree(root);
1507 fail_rc:
1508         free_recover_control(&rc);
1509         return ret;
1510 }
1511
1512 const char * const cmd_chunk_recover_usage[] = {
1513         "btrfs chunk-recover [options] <device>",
1514         "Recover the chunk tree by scaning the devices one by one.",
1515         "",
1516         "-y     Assume an answer of `yes' to all questions",
1517         "-v     Verbose mode",
1518         "-h     Help",
1519         NULL
1520 };
1521
1522 int cmd_chunk_recover(int argc, char *argv[])
1523 {
1524         int ret = 0;
1525         char *file;
1526         int yes = 0;
1527         int verbose = 0;
1528
1529         while (1) {
1530                 int c = getopt(argc, argv, "yvh");
1531                 if (c < 0)
1532                         break;
1533                 switch (c) {
1534                 case 'y':
1535                         yes = 1;
1536                         break;
1537                 case 'v':
1538                         verbose = 1;
1539                         break;
1540                 case 'h':
1541                 default:
1542                         usage(cmd_chunk_recover_usage);
1543                 }
1544         }
1545
1546         argc = argc - optind;
1547         if (argc == 0)
1548                 usage(cmd_chunk_recover_usage);
1549
1550         file = argv[optind];
1551
1552         ret = check_mounted(file);
1553         if (ret) {
1554                 fprintf(stderr, "the device is busy\n");
1555                 return ret;
1556         }
1557
1558         ret = btrfs_recover_chunk_tree(file, verbose, yes);
1559         if (!ret) {
1560                 fprintf(stdout, "Recover the chunk tree successfully.\n");
1561         } else if (ret == BTRFS_CHUNK_TREE_REBUILD_ABORTED) {
1562                 ret = 0;
1563                 fprintf(stdout, "Abort to rebuild the on-disk chunk tree.\n");
1564         } else {
1565                 fprintf(stdout, "Fail to recover the chunk tree.\n");
1566         }
1567         return ret;
1568 }