btrfs-progs: inspect: use btrfs_open_dir for btrfs inspect command
[platform/upstream/btrfs-progs.git] / cmds-check.c
1 /*
2  * Copyright (C) 2007 Oracle.  All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public
6  * License v2 as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public
14  * License along with this program; if not, write to the
15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16  * Boston, MA 021110-1307, USA.
17  */
18
19 #include <stdio.h>
20 #include <stdlib.h>
21 #include <unistd.h>
22 #include <fcntl.h>
23 #include <sys/types.h>
24 #include <sys/stat.h>
25 #include <unistd.h>
26 #include <getopt.h>
27 #include <uuid/uuid.h>
28 #include "ctree.h"
29 #include "volumes.h"
30 #include "repair.h"
31 #include "disk-io.h"
32 #include "print-tree.h"
33 #include "task-utils.h"
34 #include "transaction.h"
35 #include "utils.h"
36 #include "commands.h"
37 #include "free-space-cache.h"
38 #include "btrfsck.h"
39 #include "qgroup-verify.h"
40 #include "rbtree-utils.h"
41 #include "backref.h"
42 #include "ulist.h"
43
44 enum task_position {
45         TASK_EXTENTS,
46         TASK_FREE_SPACE,
47         TASK_FS_ROOTS,
48         TASK_NOTHING, /* have to be the last element */
49 };
50
51 struct task_ctx {
52         int progress_enabled;
53         enum task_position tp;
54
55         struct task_info *info;
56 };
57
58 static u64 bytes_used = 0;
59 static u64 total_csum_bytes = 0;
60 static u64 total_btree_bytes = 0;
61 static u64 total_fs_tree_bytes = 0;
62 static u64 total_extent_tree_bytes = 0;
63 static u64 btree_space_waste = 0;
64 static u64 data_bytes_allocated = 0;
65 static u64 data_bytes_referenced = 0;
66 static int found_old_backref = 0;
67 static LIST_HEAD(duplicate_extents);
68 static LIST_HEAD(delete_items);
69 static int repair = 0;
70 static int no_holes = 0;
71 static int init_extent_tree = 0;
72 static int check_data_csum = 0;
73 static struct btrfs_fs_info *global_info;
74 static struct task_ctx ctx = { 0 };
75
76 static void *print_status_check(void *p)
77 {
78         struct task_ctx *priv = p;
79         const char work_indicator[] = { '.', 'o', 'O', 'o' };
80         uint32_t count = 0;
81         static char *task_position_string[] = {
82                 "checking extents",
83                 "checking free space cache",
84                 "checking fs roots",
85         };
86
87         task_period_start(priv->info, 1000 /* 1s */);
88
89         if (priv->tp == TASK_NOTHING)
90                 return NULL;
91
92         while (1) {
93                 printf("%s [%c]\r", task_position_string[priv->tp],
94                                 work_indicator[count % 4]);
95                 count++;
96                 fflush(stdout);
97                 task_period_wait(priv->info);
98         }
99         return NULL;
100 }
101
102 static int print_status_return(void *p)
103 {
104         printf("\n");
105         fflush(stdout);
106
107         return 0;
108 }
109
110 struct extent_backref {
111         struct list_head list;
112         unsigned int is_data:1;
113         unsigned int found_extent_tree:1;
114         unsigned int full_backref:1;
115         unsigned int found_ref:1;
116         unsigned int broken:1;
117 };
118
119 struct data_backref {
120         struct extent_backref node;
121         union {
122                 u64 parent;
123                 u64 root;
124         };
125         u64 owner;
126         u64 offset;
127         u64 disk_bytenr;
128         u64 bytes;
129         u64 ram_bytes;
130         u32 num_refs;
131         u32 found_ref;
132 };
133
134 /*
135  * Much like data_backref, just removed the undetermined members
136  * and change it to use list_head.
137  * During extent scan, it is stored in root->orphan_data_extent.
138  * During fs tree scan, it is then moved to inode_rec->orphan_data_extents.
139  */
140 struct orphan_data_extent {
141         struct list_head list;
142         u64 root;
143         u64 objectid;
144         u64 offset;
145         u64 disk_bytenr;
146         u64 disk_len;
147 };
148
149 struct tree_backref {
150         struct extent_backref node;
151         union {
152                 u64 parent;
153                 u64 root;
154         };
155 };
156
157 struct extent_record {
158         struct list_head backrefs;
159         struct list_head dups;
160         struct list_head list;
161         struct cache_extent cache;
162         struct btrfs_disk_key parent_key;
163         u64 start;
164         u64 max_size;
165         u64 nr;
166         u64 refs;
167         u64 extent_item_refs;
168         u64 generation;
169         u64 parent_generation;
170         u64 info_objectid;
171         u32 num_duplicates;
172         u8 info_level;
173         int flag_block_full_backref;
174         unsigned int found_rec:1;
175         unsigned int content_checked:1;
176         unsigned int owner_ref_checked:1;
177         unsigned int is_root:1;
178         unsigned int metadata:1;
179         unsigned int bad_full_backref:1;
180         unsigned int crossing_stripes:1;
181         unsigned int wrong_chunk_type:1;
182 };
183
184 struct inode_backref {
185         struct list_head list;
186         unsigned int found_dir_item:1;
187         unsigned int found_dir_index:1;
188         unsigned int found_inode_ref:1;
189         unsigned int filetype:8;
190         int errors;
191         unsigned int ref_type;
192         u64 dir;
193         u64 index;
194         u16 namelen;
195         char name[0];
196 };
197
198 struct root_item_record {
199         struct list_head list;
200         u64 objectid;
201         u64 bytenr;
202         u64 last_snapshot;
203         u8 level;
204         u8 drop_level;
205         int level_size;
206         struct btrfs_key drop_key;
207 };
208
209 #define REF_ERR_NO_DIR_ITEM             (1 << 0)
210 #define REF_ERR_NO_DIR_INDEX            (1 << 1)
211 #define REF_ERR_NO_INODE_REF            (1 << 2)
212 #define REF_ERR_DUP_DIR_ITEM            (1 << 3)
213 #define REF_ERR_DUP_DIR_INDEX           (1 << 4)
214 #define REF_ERR_DUP_INODE_REF           (1 << 5)
215 #define REF_ERR_INDEX_UNMATCH           (1 << 6)
216 #define REF_ERR_FILETYPE_UNMATCH        (1 << 7)
217 #define REF_ERR_NAME_TOO_LONG           (1 << 8) // 100
218 #define REF_ERR_NO_ROOT_REF             (1 << 9)
219 #define REF_ERR_NO_ROOT_BACKREF         (1 << 10)
220 #define REF_ERR_DUP_ROOT_REF            (1 << 11)
221 #define REF_ERR_DUP_ROOT_BACKREF        (1 << 12)
222
223 struct file_extent_hole {
224         struct rb_node node;
225         u64 start;
226         u64 len;
227 };
228
229 /* Compatible function to allow reuse of old codes */
230 static u64 first_extent_gap(struct rb_root *holes)
231 {
232         struct file_extent_hole *hole;
233
234         if (RB_EMPTY_ROOT(holes))
235                 return (u64)-1;
236
237         hole = rb_entry(rb_first(holes), struct file_extent_hole, node);
238         return hole->start;
239 }
240
241 static int compare_hole(struct rb_node *node1, struct rb_node *node2)
242 {
243         struct file_extent_hole *hole1;
244         struct file_extent_hole *hole2;
245
246         hole1 = rb_entry(node1, struct file_extent_hole, node);
247         hole2 = rb_entry(node2, struct file_extent_hole, node);
248
249         if (hole1->start > hole2->start)
250                 return -1;
251         if (hole1->start < hole2->start)
252                 return 1;
253         /* Now hole1->start == hole2->start */
254         if (hole1->len >= hole2->len)
255                 /*
256                  * Hole 1 will be merge center
257                  * Same hole will be merged later
258                  */
259                 return -1;
260         /* Hole 2 will be merge center */
261         return 1;
262 }
263
264 /*
265  * Add a hole to the record
266  *
267  * This will do hole merge for copy_file_extent_holes(),
268  * which will ensure there won't be continuous holes.
269  */
270 static int add_file_extent_hole(struct rb_root *holes,
271                                 u64 start, u64 len)
272 {
273         struct file_extent_hole *hole;
274         struct file_extent_hole *prev = NULL;
275         struct file_extent_hole *next = NULL;
276
277         hole = malloc(sizeof(*hole));
278         if (!hole)
279                 return -ENOMEM;
280         hole->start = start;
281         hole->len = len;
282         /* Since compare will not return 0, no -EEXIST will happen */
283         rb_insert(holes, &hole->node, compare_hole);
284
285         /* simple merge with previous hole */
286         if (rb_prev(&hole->node))
287                 prev = rb_entry(rb_prev(&hole->node), struct file_extent_hole,
288                                 node);
289         if (prev && prev->start + prev->len >= hole->start) {
290                 hole->len = hole->start + hole->len - prev->start;
291                 hole->start = prev->start;
292                 rb_erase(&prev->node, holes);
293                 free(prev);
294                 prev = NULL;
295         }
296
297         /* iterate merge with next holes */
298         while (1) {
299                 if (!rb_next(&hole->node))
300                         break;
301                 next = rb_entry(rb_next(&hole->node), struct file_extent_hole,
302                                         node);
303                 if (hole->start + hole->len >= next->start) {
304                         if (hole->start + hole->len <= next->start + next->len)
305                                 hole->len = next->start + next->len -
306                                             hole->start;
307                         rb_erase(&next->node, holes);
308                         free(next);
309                         next = NULL;
310                 } else
311                         break;
312         }
313         return 0;
314 }
315
316 static int compare_hole_range(struct rb_node *node, void *data)
317 {
318         struct file_extent_hole *hole;
319         u64 start;
320
321         hole = (struct file_extent_hole *)data;
322         start = hole->start;
323
324         hole = rb_entry(node, struct file_extent_hole, node);
325         if (start < hole->start)
326                 return -1;
327         if (start >= hole->start && start < hole->start + hole->len)
328                 return 0;
329         return 1;
330 }
331
332 /*
333  * Delete a hole in the record
334  *
335  * This will do the hole split and is much restrict than add.
336  */
337 static int del_file_extent_hole(struct rb_root *holes,
338                                 u64 start, u64 len)
339 {
340         struct file_extent_hole *hole;
341         struct file_extent_hole tmp;
342         u64 prev_start = 0;
343         u64 prev_len = 0;
344         u64 next_start = 0;
345         u64 next_len = 0;
346         struct rb_node *node;
347         int have_prev = 0;
348         int have_next = 0;
349         int ret = 0;
350
351         tmp.start = start;
352         tmp.len = len;
353         node = rb_search(holes, &tmp, compare_hole_range, NULL);
354         if (!node)
355                 return -EEXIST;
356         hole = rb_entry(node, struct file_extent_hole, node);
357         if (start + len > hole->start + hole->len)
358                 return -EEXIST;
359
360         /*
361          * Now there will be no overflap, delete the hole and re-add the
362          * split(s) if they exists.
363          */
364         if (start > hole->start) {
365                 prev_start = hole->start;
366                 prev_len = start - hole->start;
367                 have_prev = 1;
368         }
369         if (hole->start + hole->len > start + len) {
370                 next_start = start + len;
371                 next_len = hole->start + hole->len - start - len;
372                 have_next = 1;
373         }
374         rb_erase(node, holes);
375         free(hole);
376         if (have_prev) {
377                 ret = add_file_extent_hole(holes, prev_start, prev_len);
378                 if (ret < 0)
379                         return ret;
380         }
381         if (have_next) {
382                 ret = add_file_extent_hole(holes, next_start, next_len);
383                 if (ret < 0)
384                         return ret;
385         }
386         return 0;
387 }
388
389 static int copy_file_extent_holes(struct rb_root *dst,
390                                   struct rb_root *src)
391 {
392         struct file_extent_hole *hole;
393         struct rb_node *node;
394         int ret = 0;
395
396         node = rb_first(src);
397         while (node) {
398                 hole = rb_entry(node, struct file_extent_hole, node);
399                 ret = add_file_extent_hole(dst, hole->start, hole->len);
400                 if (ret)
401                         break;
402                 node = rb_next(node);
403         }
404         return ret;
405 }
406
407 static void free_file_extent_holes(struct rb_root *holes)
408 {
409         struct rb_node *node;
410         struct file_extent_hole *hole;
411
412         node = rb_first(holes);
413         while (node) {
414                 hole = rb_entry(node, struct file_extent_hole, node);
415                 rb_erase(node, holes);
416                 free(hole);
417                 node = rb_first(holes);
418         }
419 }
420
421 struct inode_record {
422         struct list_head backrefs;
423         unsigned int checked:1;
424         unsigned int merging:1;
425         unsigned int found_inode_item:1;
426         unsigned int found_dir_item:1;
427         unsigned int found_file_extent:1;
428         unsigned int found_csum_item:1;
429         unsigned int some_csum_missing:1;
430         unsigned int nodatasum:1;
431         int errors;
432
433         u64 ino;
434         u32 nlink;
435         u32 imode;
436         u64 isize;
437         u64 nbytes;
438
439         u32 found_link;
440         u64 found_size;
441         u64 extent_start;
442         u64 extent_end;
443         struct rb_root holes;
444         struct list_head orphan_extents;
445
446         u32 refs;
447 };
448
449 #define I_ERR_NO_INODE_ITEM             (1 << 0)
450 #define I_ERR_NO_ORPHAN_ITEM            (1 << 1)
451 #define I_ERR_DUP_INODE_ITEM            (1 << 2)
452 #define I_ERR_DUP_DIR_INDEX             (1 << 3)
453 #define I_ERR_ODD_DIR_ITEM              (1 << 4)
454 #define I_ERR_ODD_FILE_EXTENT           (1 << 5)
455 #define I_ERR_BAD_FILE_EXTENT           (1 << 6)
456 #define I_ERR_FILE_EXTENT_OVERLAP       (1 << 7)
457 #define I_ERR_FILE_EXTENT_DISCOUNT      (1 << 8) // 100
458 #define I_ERR_DIR_ISIZE_WRONG           (1 << 9)
459 #define I_ERR_FILE_NBYTES_WRONG         (1 << 10) // 400
460 #define I_ERR_ODD_CSUM_ITEM             (1 << 11)
461 #define I_ERR_SOME_CSUM_MISSING         (1 << 12)
462 #define I_ERR_LINK_COUNT_WRONG          (1 << 13)
463 #define I_ERR_FILE_EXTENT_ORPHAN        (1 << 14)
464
465 struct root_backref {
466         struct list_head list;
467         unsigned int found_dir_item:1;
468         unsigned int found_dir_index:1;
469         unsigned int found_back_ref:1;
470         unsigned int found_forward_ref:1;
471         unsigned int reachable:1;
472         int errors;
473         u64 ref_root;
474         u64 dir;
475         u64 index;
476         u16 namelen;
477         char name[0];
478 };
479
480 struct root_record {
481         struct list_head backrefs;
482         struct cache_extent cache;
483         unsigned int found_root_item:1;
484         u64 objectid;
485         u32 found_ref;
486 };
487
488 struct ptr_node {
489         struct cache_extent cache;
490         void *data;
491 };
492
493 struct shared_node {
494         struct cache_extent cache;
495         struct cache_tree root_cache;
496         struct cache_tree inode_cache;
497         struct inode_record *current;
498         u32 refs;
499 };
500
501 struct block_info {
502         u64 start;
503         u32 size;
504 };
505
506 struct walk_control {
507         struct cache_tree shared;
508         struct shared_node *nodes[BTRFS_MAX_LEVEL];
509         int active_node;
510         int root_level;
511 };
512
513 struct bad_item {
514         struct btrfs_key key;
515         u64 root_id;
516         struct list_head list;
517 };
518
519 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info);
520
521 static void record_root_in_trans(struct btrfs_trans_handle *trans,
522                                  struct btrfs_root *root)
523 {
524         if (root->last_trans != trans->transid) {
525                 root->track_dirty = 1;
526                 root->last_trans = trans->transid;
527                 root->commit_root = root->node;
528                 extent_buffer_get(root->node);
529         }
530 }
531
532 static u8 imode_to_type(u32 imode)
533 {
534 #define S_SHIFT 12
535         static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = {
536                 [S_IFREG >> S_SHIFT]    = BTRFS_FT_REG_FILE,
537                 [S_IFDIR >> S_SHIFT]    = BTRFS_FT_DIR,
538                 [S_IFCHR >> S_SHIFT]    = BTRFS_FT_CHRDEV,
539                 [S_IFBLK >> S_SHIFT]    = BTRFS_FT_BLKDEV,
540                 [S_IFIFO >> S_SHIFT]    = BTRFS_FT_FIFO,
541                 [S_IFSOCK >> S_SHIFT]   = BTRFS_FT_SOCK,
542                 [S_IFLNK >> S_SHIFT]    = BTRFS_FT_SYMLINK,
543         };
544
545         return btrfs_type_by_mode[(imode & S_IFMT) >> S_SHIFT];
546 #undef S_SHIFT
547 }
548
549 static int device_record_compare(struct rb_node *node1, struct rb_node *node2)
550 {
551         struct device_record *rec1;
552         struct device_record *rec2;
553
554         rec1 = rb_entry(node1, struct device_record, node);
555         rec2 = rb_entry(node2, struct device_record, node);
556         if (rec1->devid > rec2->devid)
557                 return -1;
558         else if (rec1->devid < rec2->devid)
559                 return 1;
560         else
561                 return 0;
562 }
563
564 static struct inode_record *clone_inode_rec(struct inode_record *orig_rec)
565 {
566         struct inode_record *rec;
567         struct inode_backref *backref;
568         struct inode_backref *orig;
569         struct orphan_data_extent *src_orphan;
570         struct orphan_data_extent *dst_orphan;
571         size_t size;
572         int ret;
573
574         rec = malloc(sizeof(*rec));
575         memcpy(rec, orig_rec, sizeof(*rec));
576         rec->refs = 1;
577         INIT_LIST_HEAD(&rec->backrefs);
578         INIT_LIST_HEAD(&rec->orphan_extents);
579         rec->holes = RB_ROOT;
580
581         list_for_each_entry(orig, &orig_rec->backrefs, list) {
582                 size = sizeof(*orig) + orig->namelen + 1;
583                 backref = malloc(size);
584                 memcpy(backref, orig, size);
585                 list_add_tail(&backref->list, &rec->backrefs);
586         }
587         list_for_each_entry(src_orphan, &orig_rec->orphan_extents, list) {
588                 dst_orphan = malloc(sizeof(*dst_orphan));
589                 /* TODO: Fix all the HELL of un-catched -ENOMEM case */
590                 BUG_ON(!dst_orphan);
591                 memcpy(dst_orphan, src_orphan, sizeof(*src_orphan));
592                 list_add_tail(&dst_orphan->list, &rec->orphan_extents);
593         }
594         ret = copy_file_extent_holes(&rec->holes, &orig_rec->holes);
595         BUG_ON(ret < 0);
596
597         return rec;
598 }
599
600 static void print_orphan_data_extents(struct list_head *orphan_extents,
601                                       u64 objectid)
602 {
603         struct orphan_data_extent *orphan;
604
605         if (list_empty(orphan_extents))
606                 return;
607         printf("The following data extent is lost in tree %llu:\n",
608                objectid);
609         list_for_each_entry(orphan, orphan_extents, list) {
610                 printf("\tinode: %llu, offset:%llu, disk_bytenr: %llu, disk_len: %llu\n",
611                        orphan->objectid, orphan->offset, orphan->disk_bytenr,
612                        orphan->disk_len);
613         }
614 }
615
616 static void print_inode_error(struct btrfs_root *root, struct inode_record *rec)
617 {
618         u64 root_objectid = root->root_key.objectid;
619         int errors = rec->errors;
620
621         if (!errors)
622                 return;
623         /* reloc root errors, we print its corresponding fs root objectid*/
624         if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
625                 root_objectid = root->root_key.offset;
626                 fprintf(stderr, "reloc");
627         }
628         fprintf(stderr, "root %llu inode %llu errors %x",
629                 (unsigned long long) root_objectid,
630                 (unsigned long long) rec->ino, rec->errors);
631
632         if (errors & I_ERR_NO_INODE_ITEM)
633                 fprintf(stderr, ", no inode item");
634         if (errors & I_ERR_NO_ORPHAN_ITEM)
635                 fprintf(stderr, ", no orphan item");
636         if (errors & I_ERR_DUP_INODE_ITEM)
637                 fprintf(stderr, ", dup inode item");
638         if (errors & I_ERR_DUP_DIR_INDEX)
639                 fprintf(stderr, ", dup dir index");
640         if (errors & I_ERR_ODD_DIR_ITEM)
641                 fprintf(stderr, ", odd dir item");
642         if (errors & I_ERR_ODD_FILE_EXTENT)
643                 fprintf(stderr, ", odd file extent");
644         if (errors & I_ERR_BAD_FILE_EXTENT)
645                 fprintf(stderr, ", bad file extent");
646         if (errors & I_ERR_FILE_EXTENT_OVERLAP)
647                 fprintf(stderr, ", file extent overlap");
648         if (errors & I_ERR_FILE_EXTENT_DISCOUNT)
649                 fprintf(stderr, ", file extent discount");
650         if (errors & I_ERR_DIR_ISIZE_WRONG)
651                 fprintf(stderr, ", dir isize wrong");
652         if (errors & I_ERR_FILE_NBYTES_WRONG)
653                 fprintf(stderr, ", nbytes wrong");
654         if (errors & I_ERR_ODD_CSUM_ITEM)
655                 fprintf(stderr, ", odd csum item");
656         if (errors & I_ERR_SOME_CSUM_MISSING)
657                 fprintf(stderr, ", some csum missing");
658         if (errors & I_ERR_LINK_COUNT_WRONG)
659                 fprintf(stderr, ", link count wrong");
660         if (errors & I_ERR_FILE_EXTENT_ORPHAN)
661                 fprintf(stderr, ", orphan file extent");
662         fprintf(stderr, "\n");
663         /* Print the orphan extents if needed */
664         if (errors & I_ERR_FILE_EXTENT_ORPHAN)
665                 print_orphan_data_extents(&rec->orphan_extents, root->objectid);
666
667         /* Print the holes if needed */
668         if (errors & I_ERR_FILE_EXTENT_DISCOUNT) {
669                 struct file_extent_hole *hole;
670                 struct rb_node *node;
671                 int found = 0;
672
673                 node = rb_first(&rec->holes);
674                 fprintf(stderr, "Found file extent holes:\n");
675                 while (node) {
676                         found = 1;
677                         hole = rb_entry(node, struct file_extent_hole, node);
678                         fprintf(stderr, "\tstart: %llu, len: %llu\n",
679                                 hole->start, hole->len);
680                         node = rb_next(node);
681                 }
682                 if (!found)
683                         fprintf(stderr, "\tstart: 0, len: %llu\n",
684                                 round_up(rec->isize, root->sectorsize));
685         }
686 }
687
688 static void print_ref_error(int errors)
689 {
690         if (errors & REF_ERR_NO_DIR_ITEM)
691                 fprintf(stderr, ", no dir item");
692         if (errors & REF_ERR_NO_DIR_INDEX)
693                 fprintf(stderr, ", no dir index");
694         if (errors & REF_ERR_NO_INODE_REF)
695                 fprintf(stderr, ", no inode ref");
696         if (errors & REF_ERR_DUP_DIR_ITEM)
697                 fprintf(stderr, ", dup dir item");
698         if (errors & REF_ERR_DUP_DIR_INDEX)
699                 fprintf(stderr, ", dup dir index");
700         if (errors & REF_ERR_DUP_INODE_REF)
701                 fprintf(stderr, ", dup inode ref");
702         if (errors & REF_ERR_INDEX_UNMATCH)
703                 fprintf(stderr, ", index unmatch");
704         if (errors & REF_ERR_FILETYPE_UNMATCH)
705                 fprintf(stderr, ", filetype unmatch");
706         if (errors & REF_ERR_NAME_TOO_LONG)
707                 fprintf(stderr, ", name too long");
708         if (errors & REF_ERR_NO_ROOT_REF)
709                 fprintf(stderr, ", no root ref");
710         if (errors & REF_ERR_NO_ROOT_BACKREF)
711                 fprintf(stderr, ", no root backref");
712         if (errors & REF_ERR_DUP_ROOT_REF)
713                 fprintf(stderr, ", dup root ref");
714         if (errors & REF_ERR_DUP_ROOT_BACKREF)
715                 fprintf(stderr, ", dup root backref");
716         fprintf(stderr, "\n");
717 }
718
719 static struct inode_record *get_inode_rec(struct cache_tree *inode_cache,
720                                           u64 ino, int mod)
721 {
722         struct ptr_node *node;
723         struct cache_extent *cache;
724         struct inode_record *rec = NULL;
725         int ret;
726
727         cache = lookup_cache_extent(inode_cache, ino, 1);
728         if (cache) {
729                 node = container_of(cache, struct ptr_node, cache);
730                 rec = node->data;
731                 if (mod && rec->refs > 1) {
732                         node->data = clone_inode_rec(rec);
733                         rec->refs--;
734                         rec = node->data;
735                 }
736         } else if (mod) {
737                 rec = calloc(1, sizeof(*rec));
738                 rec->ino = ino;
739                 rec->extent_start = (u64)-1;
740                 rec->refs = 1;
741                 INIT_LIST_HEAD(&rec->backrefs);
742                 INIT_LIST_HEAD(&rec->orphan_extents);
743                 rec->holes = RB_ROOT;
744
745                 node = malloc(sizeof(*node));
746                 node->cache.start = ino;
747                 node->cache.size = 1;
748                 node->data = rec;
749
750                 if (ino == BTRFS_FREE_INO_OBJECTID)
751                         rec->found_link = 1;
752
753                 ret = insert_cache_extent(inode_cache, &node->cache);
754                 BUG_ON(ret);
755         }
756         return rec;
757 }
758
759 static void free_orphan_data_extents(struct list_head *orphan_extents)
760 {
761         struct orphan_data_extent *orphan;
762
763         while (!list_empty(orphan_extents)) {
764                 orphan = list_entry(orphan_extents->next,
765                                     struct orphan_data_extent, list);
766                 list_del(&orphan->list);
767                 free(orphan);
768         }
769 }
770
771 static void free_inode_rec(struct inode_record *rec)
772 {
773         struct inode_backref *backref;
774
775         if (--rec->refs > 0)
776                 return;
777
778         while (!list_empty(&rec->backrefs)) {
779                 backref = list_entry(rec->backrefs.next,
780                                      struct inode_backref, list);
781                 list_del(&backref->list);
782                 free(backref);
783         }
784         free_orphan_data_extents(&rec->orphan_extents);
785         free_file_extent_holes(&rec->holes);
786         free(rec);
787 }
788
789 static int can_free_inode_rec(struct inode_record *rec)
790 {
791         if (!rec->errors && rec->checked && rec->found_inode_item &&
792             rec->nlink == rec->found_link && list_empty(&rec->backrefs))
793                 return 1;
794         return 0;
795 }
796
797 static void maybe_free_inode_rec(struct cache_tree *inode_cache,
798                                  struct inode_record *rec)
799 {
800         struct cache_extent *cache;
801         struct inode_backref *tmp, *backref;
802         struct ptr_node *node;
803         unsigned char filetype;
804
805         if (!rec->found_inode_item)
806                 return;
807
808         filetype = imode_to_type(rec->imode);
809         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
810                 if (backref->found_dir_item && backref->found_dir_index) {
811                         if (backref->filetype != filetype)
812                                 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
813                         if (!backref->errors && backref->found_inode_ref) {
814                                 list_del(&backref->list);
815                                 free(backref);
816                         }
817                 }
818         }
819
820         if (!rec->checked || rec->merging)
821                 return;
822
823         if (S_ISDIR(rec->imode)) {
824                 if (rec->found_size != rec->isize)
825                         rec->errors |= I_ERR_DIR_ISIZE_WRONG;
826                 if (rec->found_file_extent)
827                         rec->errors |= I_ERR_ODD_FILE_EXTENT;
828         } else if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
829                 if (rec->found_dir_item)
830                         rec->errors |= I_ERR_ODD_DIR_ITEM;
831                 if (rec->found_size != rec->nbytes)
832                         rec->errors |= I_ERR_FILE_NBYTES_WRONG;
833                 if (rec->nlink > 0 && !no_holes &&
834                     (rec->extent_end < rec->isize ||
835                      first_extent_gap(&rec->holes) < rec->isize))
836                         rec->errors |= I_ERR_FILE_EXTENT_DISCOUNT;
837         }
838
839         if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
840                 if (rec->found_csum_item && rec->nodatasum)
841                         rec->errors |= I_ERR_ODD_CSUM_ITEM;
842                 if (rec->some_csum_missing && !rec->nodatasum)
843                         rec->errors |= I_ERR_SOME_CSUM_MISSING;
844         }
845
846         BUG_ON(rec->refs != 1);
847         if (can_free_inode_rec(rec)) {
848                 cache = lookup_cache_extent(inode_cache, rec->ino, 1);
849                 node = container_of(cache, struct ptr_node, cache);
850                 BUG_ON(node->data != rec);
851                 remove_cache_extent(inode_cache, &node->cache);
852                 free(node);
853                 free_inode_rec(rec);
854         }
855 }
856
857 static int check_orphan_item(struct btrfs_root *root, u64 ino)
858 {
859         struct btrfs_path path;
860         struct btrfs_key key;
861         int ret;
862
863         key.objectid = BTRFS_ORPHAN_OBJECTID;
864         key.type = BTRFS_ORPHAN_ITEM_KEY;
865         key.offset = ino;
866
867         btrfs_init_path(&path);
868         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
869         btrfs_release_path(&path);
870         if (ret > 0)
871                 ret = -ENOENT;
872         return ret;
873 }
874
875 static int process_inode_item(struct extent_buffer *eb,
876                               int slot, struct btrfs_key *key,
877                               struct shared_node *active_node)
878 {
879         struct inode_record *rec;
880         struct btrfs_inode_item *item;
881
882         rec = active_node->current;
883         BUG_ON(rec->ino != key->objectid || rec->refs > 1);
884         if (rec->found_inode_item) {
885                 rec->errors |= I_ERR_DUP_INODE_ITEM;
886                 return 1;
887         }
888         item = btrfs_item_ptr(eb, slot, struct btrfs_inode_item);
889         rec->nlink = btrfs_inode_nlink(eb, item);
890         rec->isize = btrfs_inode_size(eb, item);
891         rec->nbytes = btrfs_inode_nbytes(eb, item);
892         rec->imode = btrfs_inode_mode(eb, item);
893         if (btrfs_inode_flags(eb, item) & BTRFS_INODE_NODATASUM)
894                 rec->nodatasum = 1;
895         rec->found_inode_item = 1;
896         if (rec->nlink == 0)
897                 rec->errors |= I_ERR_NO_ORPHAN_ITEM;
898         maybe_free_inode_rec(&active_node->inode_cache, rec);
899         return 0;
900 }
901
902 static struct inode_backref *get_inode_backref(struct inode_record *rec,
903                                                 const char *name,
904                                                 int namelen, u64 dir)
905 {
906         struct inode_backref *backref;
907
908         list_for_each_entry(backref, &rec->backrefs, list) {
909                 if (rec->ino == BTRFS_MULTIPLE_OBJECTIDS)
910                         break;
911                 if (backref->dir != dir || backref->namelen != namelen)
912                         continue;
913                 if (memcmp(name, backref->name, namelen))
914                         continue;
915                 return backref;
916         }
917
918         backref = malloc(sizeof(*backref) + namelen + 1);
919         memset(backref, 0, sizeof(*backref));
920         backref->dir = dir;
921         backref->namelen = namelen;
922         memcpy(backref->name, name, namelen);
923         backref->name[namelen] = '\0';
924         list_add_tail(&backref->list, &rec->backrefs);
925         return backref;
926 }
927
928 static int add_inode_backref(struct cache_tree *inode_cache,
929                              u64 ino, u64 dir, u64 index,
930                              const char *name, int namelen,
931                              int filetype, int itemtype, int errors)
932 {
933         struct inode_record *rec;
934         struct inode_backref *backref;
935
936         rec = get_inode_rec(inode_cache, ino, 1);
937         backref = get_inode_backref(rec, name, namelen, dir);
938         if (errors)
939                 backref->errors |= errors;
940         if (itemtype == BTRFS_DIR_INDEX_KEY) {
941                 if (backref->found_dir_index)
942                         backref->errors |= REF_ERR_DUP_DIR_INDEX;
943                 if (backref->found_inode_ref && backref->index != index)
944                         backref->errors |= REF_ERR_INDEX_UNMATCH;
945                 if (backref->found_dir_item && backref->filetype != filetype)
946                         backref->errors |= REF_ERR_FILETYPE_UNMATCH;
947
948                 backref->index = index;
949                 backref->filetype = filetype;
950                 backref->found_dir_index = 1;
951         } else if (itemtype == BTRFS_DIR_ITEM_KEY) {
952                 rec->found_link++;
953                 if (backref->found_dir_item)
954                         backref->errors |= REF_ERR_DUP_DIR_ITEM;
955                 if (backref->found_dir_index && backref->filetype != filetype)
956                         backref->errors |= REF_ERR_FILETYPE_UNMATCH;
957
958                 backref->filetype = filetype;
959                 backref->found_dir_item = 1;
960         } else if ((itemtype == BTRFS_INODE_REF_KEY) ||
961                    (itemtype == BTRFS_INODE_EXTREF_KEY)) {
962                 if (backref->found_inode_ref)
963                         backref->errors |= REF_ERR_DUP_INODE_REF;
964                 if (backref->found_dir_index && backref->index != index)
965                         backref->errors |= REF_ERR_INDEX_UNMATCH;
966                 else
967                         backref->index = index;
968
969                 backref->ref_type = itemtype;
970                 backref->found_inode_ref = 1;
971         } else {
972                 BUG_ON(1);
973         }
974
975         maybe_free_inode_rec(inode_cache, rec);
976         return 0;
977 }
978
979 static int merge_inode_recs(struct inode_record *src, struct inode_record *dst,
980                             struct cache_tree *dst_cache)
981 {
982         struct inode_backref *backref;
983         u32 dir_count = 0;
984         int ret = 0;
985
986         dst->merging = 1;
987         list_for_each_entry(backref, &src->backrefs, list) {
988                 if (backref->found_dir_index) {
989                         add_inode_backref(dst_cache, dst->ino, backref->dir,
990                                         backref->index, backref->name,
991                                         backref->namelen, backref->filetype,
992                                         BTRFS_DIR_INDEX_KEY, backref->errors);
993                 }
994                 if (backref->found_dir_item) {
995                         dir_count++;
996                         add_inode_backref(dst_cache, dst->ino,
997                                         backref->dir, 0, backref->name,
998                                         backref->namelen, backref->filetype,
999                                         BTRFS_DIR_ITEM_KEY, backref->errors);
1000                 }
1001                 if (backref->found_inode_ref) {
1002                         add_inode_backref(dst_cache, dst->ino,
1003                                         backref->dir, backref->index,
1004                                         backref->name, backref->namelen, 0,
1005                                         backref->ref_type, backref->errors);
1006                 }
1007         }
1008
1009         if (src->found_dir_item)
1010                 dst->found_dir_item = 1;
1011         if (src->found_file_extent)
1012                 dst->found_file_extent = 1;
1013         if (src->found_csum_item)
1014                 dst->found_csum_item = 1;
1015         if (src->some_csum_missing)
1016                 dst->some_csum_missing = 1;
1017         if (first_extent_gap(&dst->holes) > first_extent_gap(&src->holes)) {
1018                 ret = copy_file_extent_holes(&dst->holes, &src->holes);
1019                 if (ret < 0)
1020                         return ret;
1021         }
1022
1023         BUG_ON(src->found_link < dir_count);
1024         dst->found_link += src->found_link - dir_count;
1025         dst->found_size += src->found_size;
1026         if (src->extent_start != (u64)-1) {
1027                 if (dst->extent_start == (u64)-1) {
1028                         dst->extent_start = src->extent_start;
1029                         dst->extent_end = src->extent_end;
1030                 } else {
1031                         if (dst->extent_end > src->extent_start)
1032                                 dst->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1033                         else if (dst->extent_end < src->extent_start) {
1034                                 ret = add_file_extent_hole(&dst->holes,
1035                                         dst->extent_end,
1036                                         src->extent_start - dst->extent_end);
1037                         }
1038                         if (dst->extent_end < src->extent_end)
1039                                 dst->extent_end = src->extent_end;
1040                 }
1041         }
1042
1043         dst->errors |= src->errors;
1044         if (src->found_inode_item) {
1045                 if (!dst->found_inode_item) {
1046                         dst->nlink = src->nlink;
1047                         dst->isize = src->isize;
1048                         dst->nbytes = src->nbytes;
1049                         dst->imode = src->imode;
1050                         dst->nodatasum = src->nodatasum;
1051                         dst->found_inode_item = 1;
1052                 } else {
1053                         dst->errors |= I_ERR_DUP_INODE_ITEM;
1054                 }
1055         }
1056         dst->merging = 0;
1057
1058         return 0;
1059 }
1060
1061 static int splice_shared_node(struct shared_node *src_node,
1062                               struct shared_node *dst_node)
1063 {
1064         struct cache_extent *cache;
1065         struct ptr_node *node, *ins;
1066         struct cache_tree *src, *dst;
1067         struct inode_record *rec, *conflict;
1068         u64 current_ino = 0;
1069         int splice = 0;
1070         int ret;
1071
1072         if (--src_node->refs == 0)
1073                 splice = 1;
1074         if (src_node->current)
1075                 current_ino = src_node->current->ino;
1076
1077         src = &src_node->root_cache;
1078         dst = &dst_node->root_cache;
1079 again:
1080         cache = search_cache_extent(src, 0);
1081         while (cache) {
1082                 node = container_of(cache, struct ptr_node, cache);
1083                 rec = node->data;
1084                 cache = next_cache_extent(cache);
1085
1086                 if (splice) {
1087                         remove_cache_extent(src, &node->cache);
1088                         ins = node;
1089                 } else {
1090                         ins = malloc(sizeof(*ins));
1091                         ins->cache.start = node->cache.start;
1092                         ins->cache.size = node->cache.size;
1093                         ins->data = rec;
1094                         rec->refs++;
1095                 }
1096                 ret = insert_cache_extent(dst, &ins->cache);
1097                 if (ret == -EEXIST) {
1098                         conflict = get_inode_rec(dst, rec->ino, 1);
1099                         merge_inode_recs(rec, conflict, dst);
1100                         if (rec->checked) {
1101                                 conflict->checked = 1;
1102                                 if (dst_node->current == conflict)
1103                                         dst_node->current = NULL;
1104                         }
1105                         maybe_free_inode_rec(dst, conflict);
1106                         free_inode_rec(rec);
1107                         free(ins);
1108                 } else {
1109                         BUG_ON(ret);
1110                 }
1111         }
1112
1113         if (src == &src_node->root_cache) {
1114                 src = &src_node->inode_cache;
1115                 dst = &dst_node->inode_cache;
1116                 goto again;
1117         }
1118
1119         if (current_ino > 0 && (!dst_node->current ||
1120             current_ino > dst_node->current->ino)) {
1121                 if (dst_node->current) {
1122                         dst_node->current->checked = 1;
1123                         maybe_free_inode_rec(dst, dst_node->current);
1124                 }
1125                 dst_node->current = get_inode_rec(dst, current_ino, 1);
1126         }
1127         return 0;
1128 }
1129
1130 static void free_inode_ptr(struct cache_extent *cache)
1131 {
1132         struct ptr_node *node;
1133         struct inode_record *rec;
1134
1135         node = container_of(cache, struct ptr_node, cache);
1136         rec = node->data;
1137         free_inode_rec(rec);
1138         free(node);
1139 }
1140
1141 FREE_EXTENT_CACHE_BASED_TREE(inode_recs, free_inode_ptr);
1142
1143 static struct shared_node *find_shared_node(struct cache_tree *shared,
1144                                             u64 bytenr)
1145 {
1146         struct cache_extent *cache;
1147         struct shared_node *node;
1148
1149         cache = lookup_cache_extent(shared, bytenr, 1);
1150         if (cache) {
1151                 node = container_of(cache, struct shared_node, cache);
1152                 return node;
1153         }
1154         return NULL;
1155 }
1156
1157 static int add_shared_node(struct cache_tree *shared, u64 bytenr, u32 refs)
1158 {
1159         int ret;
1160         struct shared_node *node;
1161
1162         node = calloc(1, sizeof(*node));
1163         node->cache.start = bytenr;
1164         node->cache.size = 1;
1165         cache_tree_init(&node->root_cache);
1166         cache_tree_init(&node->inode_cache);
1167         node->refs = refs;
1168
1169         ret = insert_cache_extent(shared, &node->cache);
1170         BUG_ON(ret);
1171         return 0;
1172 }
1173
1174 static int enter_shared_node(struct btrfs_root *root, u64 bytenr, u32 refs,
1175                              struct walk_control *wc, int level)
1176 {
1177         struct shared_node *node;
1178         struct shared_node *dest;
1179
1180         if (level == wc->active_node)
1181                 return 0;
1182
1183         BUG_ON(wc->active_node <= level);
1184         node = find_shared_node(&wc->shared, bytenr);
1185         if (!node) {
1186                 add_shared_node(&wc->shared, bytenr, refs);
1187                 node = find_shared_node(&wc->shared, bytenr);
1188                 wc->nodes[level] = node;
1189                 wc->active_node = level;
1190                 return 0;
1191         }
1192
1193         if (wc->root_level == wc->active_node &&
1194             btrfs_root_refs(&root->root_item) == 0) {
1195                 if (--node->refs == 0) {
1196                         free_inode_recs_tree(&node->root_cache);
1197                         free_inode_recs_tree(&node->inode_cache);
1198                         remove_cache_extent(&wc->shared, &node->cache);
1199                         free(node);
1200                 }
1201                 return 1;
1202         }
1203
1204         dest = wc->nodes[wc->active_node];
1205         splice_shared_node(node, dest);
1206         if (node->refs == 0) {
1207                 remove_cache_extent(&wc->shared, &node->cache);
1208                 free(node);
1209         }
1210         return 1;
1211 }
1212
1213 static int leave_shared_node(struct btrfs_root *root,
1214                              struct walk_control *wc, int level)
1215 {
1216         struct shared_node *node;
1217         struct shared_node *dest;
1218         int i;
1219
1220         if (level == wc->root_level)
1221                 return 0;
1222
1223         for (i = level + 1; i < BTRFS_MAX_LEVEL; i++) {
1224                 if (wc->nodes[i])
1225                         break;
1226         }
1227         BUG_ON(i >= BTRFS_MAX_LEVEL);
1228
1229         node = wc->nodes[wc->active_node];
1230         wc->nodes[wc->active_node] = NULL;
1231         wc->active_node = i;
1232
1233         dest = wc->nodes[wc->active_node];
1234         if (wc->active_node < wc->root_level ||
1235             btrfs_root_refs(&root->root_item) > 0) {
1236                 BUG_ON(node->refs <= 1);
1237                 splice_shared_node(node, dest);
1238         } else {
1239                 BUG_ON(node->refs < 2);
1240                 node->refs--;
1241         }
1242         return 0;
1243 }
1244
1245 /*
1246  * Returns:
1247  * < 0 - on error
1248  * 1   - if the root with id child_root_id is a child of root parent_root_id
1249  * 0   - if the root child_root_id isn't a child of the root parent_root_id but
1250  *       has other root(s) as parent(s)
1251  * 2   - if the root child_root_id doesn't have any parent roots
1252  */
1253 static int is_child_root(struct btrfs_root *root, u64 parent_root_id,
1254                          u64 child_root_id)
1255 {
1256         struct btrfs_path path;
1257         struct btrfs_key key;
1258         struct extent_buffer *leaf;
1259         int has_parent = 0;
1260         int ret;
1261
1262         btrfs_init_path(&path);
1263
1264         key.objectid = parent_root_id;
1265         key.type = BTRFS_ROOT_REF_KEY;
1266         key.offset = child_root_id;
1267         ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1268                                 0, 0);
1269         if (ret < 0)
1270                 return ret;
1271         btrfs_release_path(&path);
1272         if (!ret)
1273                 return 1;
1274
1275         key.objectid = child_root_id;
1276         key.type = BTRFS_ROOT_BACKREF_KEY;
1277         key.offset = 0;
1278         ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1279                                 0, 0);
1280         if (ret < 0)
1281                 goto out;
1282
1283         while (1) {
1284                 leaf = path.nodes[0];
1285                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1286                         ret = btrfs_next_leaf(root->fs_info->tree_root, &path);
1287                         if (ret)
1288                                 break;
1289                         leaf = path.nodes[0];
1290                 }
1291
1292                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1293                 if (key.objectid != child_root_id ||
1294                     key.type != BTRFS_ROOT_BACKREF_KEY)
1295                         break;
1296
1297                 has_parent = 1;
1298
1299                 if (key.offset == parent_root_id) {
1300                         btrfs_release_path(&path);
1301                         return 1;
1302                 }
1303
1304                 path.slots[0]++;
1305         }
1306 out:
1307         btrfs_release_path(&path);
1308         if (ret < 0)
1309                 return ret;
1310         return has_parent ? 0 : 2;
1311 }
1312
1313 static int process_dir_item(struct btrfs_root *root,
1314                             struct extent_buffer *eb,
1315                             int slot, struct btrfs_key *key,
1316                             struct shared_node *active_node)
1317 {
1318         u32 total;
1319         u32 cur = 0;
1320         u32 len;
1321         u32 name_len;
1322         u32 data_len;
1323         int error;
1324         int nritems = 0;
1325         int filetype;
1326         struct btrfs_dir_item *di;
1327         struct inode_record *rec;
1328         struct cache_tree *root_cache;
1329         struct cache_tree *inode_cache;
1330         struct btrfs_key location;
1331         char namebuf[BTRFS_NAME_LEN];
1332
1333         root_cache = &active_node->root_cache;
1334         inode_cache = &active_node->inode_cache;
1335         rec = active_node->current;
1336         rec->found_dir_item = 1;
1337
1338         di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
1339         total = btrfs_item_size_nr(eb, slot);
1340         while (cur < total) {
1341                 nritems++;
1342                 btrfs_dir_item_key_to_cpu(eb, di, &location);
1343                 name_len = btrfs_dir_name_len(eb, di);
1344                 data_len = btrfs_dir_data_len(eb, di);
1345                 filetype = btrfs_dir_type(eb, di);
1346
1347                 rec->found_size += name_len;
1348                 if (name_len <= BTRFS_NAME_LEN) {
1349                         len = name_len;
1350                         error = 0;
1351                 } else {
1352                         len = BTRFS_NAME_LEN;
1353                         error = REF_ERR_NAME_TOO_LONG;
1354                 }
1355                 read_extent_buffer(eb, namebuf, (unsigned long)(di + 1), len);
1356
1357                 if (location.type == BTRFS_INODE_ITEM_KEY) {
1358                         add_inode_backref(inode_cache, location.objectid,
1359                                           key->objectid, key->offset, namebuf,
1360                                           len, filetype, key->type, error);
1361                 } else if (location.type == BTRFS_ROOT_ITEM_KEY) {
1362                         add_inode_backref(root_cache, location.objectid,
1363                                           key->objectid, key->offset,
1364                                           namebuf, len, filetype,
1365                                           key->type, error);
1366                 } else {
1367                         fprintf(stderr, "invalid location in dir item %u\n",
1368                                 location.type);
1369                         add_inode_backref(inode_cache, BTRFS_MULTIPLE_OBJECTIDS,
1370                                           key->objectid, key->offset, namebuf,
1371                                           len, filetype, key->type, error);
1372                 }
1373
1374                 len = sizeof(*di) + name_len + data_len;
1375                 di = (struct btrfs_dir_item *)((char *)di + len);
1376                 cur += len;
1377         }
1378         if (key->type == BTRFS_DIR_INDEX_KEY && nritems > 1)
1379                 rec->errors |= I_ERR_DUP_DIR_INDEX;
1380
1381         return 0;
1382 }
1383
1384 static int process_inode_ref(struct extent_buffer *eb,
1385                              int slot, struct btrfs_key *key,
1386                              struct shared_node *active_node)
1387 {
1388         u32 total;
1389         u32 cur = 0;
1390         u32 len;
1391         u32 name_len;
1392         u64 index;
1393         int error;
1394         struct cache_tree *inode_cache;
1395         struct btrfs_inode_ref *ref;
1396         char namebuf[BTRFS_NAME_LEN];
1397
1398         inode_cache = &active_node->inode_cache;
1399
1400         ref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref);
1401         total = btrfs_item_size_nr(eb, slot);
1402         while (cur < total) {
1403                 name_len = btrfs_inode_ref_name_len(eb, ref);
1404                 index = btrfs_inode_ref_index(eb, ref);
1405                 if (name_len <= BTRFS_NAME_LEN) {
1406                         len = name_len;
1407                         error = 0;
1408                 } else {
1409                         len = BTRFS_NAME_LEN;
1410                         error = REF_ERR_NAME_TOO_LONG;
1411                 }
1412                 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
1413                 add_inode_backref(inode_cache, key->objectid, key->offset,
1414                                   index, namebuf, len, 0, key->type, error);
1415
1416                 len = sizeof(*ref) + name_len;
1417                 ref = (struct btrfs_inode_ref *)((char *)ref + len);
1418                 cur += len;
1419         }
1420         return 0;
1421 }
1422
1423 static int process_inode_extref(struct extent_buffer *eb,
1424                                 int slot, struct btrfs_key *key,
1425                                 struct shared_node *active_node)
1426 {
1427         u32 total;
1428         u32 cur = 0;
1429         u32 len;
1430         u32 name_len;
1431         u64 index;
1432         u64 parent;
1433         int error;
1434         struct cache_tree *inode_cache;
1435         struct btrfs_inode_extref *extref;
1436         char namebuf[BTRFS_NAME_LEN];
1437
1438         inode_cache = &active_node->inode_cache;
1439
1440         extref = btrfs_item_ptr(eb, slot, struct btrfs_inode_extref);
1441         total = btrfs_item_size_nr(eb, slot);
1442         while (cur < total) {
1443                 name_len = btrfs_inode_extref_name_len(eb, extref);
1444                 index = btrfs_inode_extref_index(eb, extref);
1445                 parent = btrfs_inode_extref_parent(eb, extref);
1446                 if (name_len <= BTRFS_NAME_LEN) {
1447                         len = name_len;
1448                         error = 0;
1449                 } else {
1450                         len = BTRFS_NAME_LEN;
1451                         error = REF_ERR_NAME_TOO_LONG;
1452                 }
1453                 read_extent_buffer(eb, namebuf,
1454                                    (unsigned long)(extref + 1), len);
1455                 add_inode_backref(inode_cache, key->objectid, parent,
1456                                   index, namebuf, len, 0, key->type, error);
1457
1458                 len = sizeof(*extref) + name_len;
1459                 extref = (struct btrfs_inode_extref *)((char *)extref + len);
1460                 cur += len;
1461         }
1462         return 0;
1463
1464 }
1465
1466 static int count_csum_range(struct btrfs_root *root, u64 start,
1467                             u64 len, u64 *found)
1468 {
1469         struct btrfs_key key;
1470         struct btrfs_path path;
1471         struct extent_buffer *leaf;
1472         int ret;
1473         size_t size;
1474         *found = 0;
1475         u64 csum_end;
1476         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
1477
1478         btrfs_init_path(&path);
1479
1480         key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
1481         key.offset = start;
1482         key.type = BTRFS_EXTENT_CSUM_KEY;
1483
1484         ret = btrfs_search_slot(NULL, root->fs_info->csum_root,
1485                                 &key, &path, 0, 0);
1486         if (ret < 0)
1487                 goto out;
1488         if (ret > 0 && path.slots[0] > 0) {
1489                 leaf = path.nodes[0];
1490                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0] - 1);
1491                 if (key.objectid == BTRFS_EXTENT_CSUM_OBJECTID &&
1492                     key.type == BTRFS_EXTENT_CSUM_KEY)
1493                         path.slots[0]--;
1494         }
1495
1496         while (len > 0) {
1497                 leaf = path.nodes[0];
1498                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1499                         ret = btrfs_next_leaf(root->fs_info->csum_root, &path);
1500                         if (ret > 0)
1501                                 break;
1502                         else if (ret < 0)
1503                                 goto out;
1504                         leaf = path.nodes[0];
1505                 }
1506
1507                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1508                 if (key.objectid != BTRFS_EXTENT_CSUM_OBJECTID ||
1509                     key.type != BTRFS_EXTENT_CSUM_KEY)
1510                         break;
1511
1512                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1513                 if (key.offset >= start + len)
1514                         break;
1515
1516                 if (key.offset > start)
1517                         start = key.offset;
1518
1519                 size = btrfs_item_size_nr(leaf, path.slots[0]);
1520                 csum_end = key.offset + (size / csum_size) * root->sectorsize;
1521                 if (csum_end > start) {
1522                         size = min(csum_end - start, len);
1523                         len -= size;
1524                         start += size;
1525                         *found += size;
1526                 }
1527
1528                 path.slots[0]++;
1529         }
1530 out:
1531         btrfs_release_path(&path);
1532         if (ret < 0)
1533                 return ret;
1534         return 0;
1535 }
1536
1537 static int process_file_extent(struct btrfs_root *root,
1538                                 struct extent_buffer *eb,
1539                                 int slot, struct btrfs_key *key,
1540                                 struct shared_node *active_node)
1541 {
1542         struct inode_record *rec;
1543         struct btrfs_file_extent_item *fi;
1544         u64 num_bytes = 0;
1545         u64 disk_bytenr = 0;
1546         u64 extent_offset = 0;
1547         u64 mask = root->sectorsize - 1;
1548         int extent_type;
1549         int ret;
1550
1551         rec = active_node->current;
1552         BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1553         rec->found_file_extent = 1;
1554
1555         if (rec->extent_start == (u64)-1) {
1556                 rec->extent_start = key->offset;
1557                 rec->extent_end = key->offset;
1558         }
1559
1560         if (rec->extent_end > key->offset)
1561                 rec->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1562         else if (rec->extent_end < key->offset) {
1563                 ret = add_file_extent_hole(&rec->holes, rec->extent_end,
1564                                            key->offset - rec->extent_end);
1565                 if (ret < 0)
1566                         return ret;
1567         }
1568
1569         fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
1570         extent_type = btrfs_file_extent_type(eb, fi);
1571
1572         if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
1573                 num_bytes = btrfs_file_extent_inline_len(eb, slot, fi);
1574                 if (num_bytes == 0)
1575                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1576                 rec->found_size += num_bytes;
1577                 num_bytes = (num_bytes + mask) & ~mask;
1578         } else if (extent_type == BTRFS_FILE_EXTENT_REG ||
1579                    extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1580                 num_bytes = btrfs_file_extent_num_bytes(eb, fi);
1581                 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
1582                 extent_offset = btrfs_file_extent_offset(eb, fi);
1583                 if (num_bytes == 0 || (num_bytes & mask))
1584                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1585                 if (num_bytes + extent_offset >
1586                     btrfs_file_extent_ram_bytes(eb, fi))
1587                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1588                 if (extent_type == BTRFS_FILE_EXTENT_PREALLOC &&
1589                     (btrfs_file_extent_compression(eb, fi) ||
1590                      btrfs_file_extent_encryption(eb, fi) ||
1591                      btrfs_file_extent_other_encoding(eb, fi)))
1592                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1593                 if (disk_bytenr > 0)
1594                         rec->found_size += num_bytes;
1595         } else {
1596                 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1597         }
1598         rec->extent_end = key->offset + num_bytes;
1599
1600         /*
1601          * The data reloc tree will copy full extents into its inode and then
1602          * copy the corresponding csums.  Because the extent it copied could be
1603          * a preallocated extent that hasn't been written to yet there may be no
1604          * csums to copy, ergo we won't have csums for our file extent.  This is
1605          * ok so just don't bother checking csums if the inode belongs to the
1606          * data reloc tree.
1607          */
1608         if (disk_bytenr > 0 &&
1609             btrfs_header_owner(eb) != BTRFS_DATA_RELOC_TREE_OBJECTID) {
1610                 u64 found;
1611                 if (btrfs_file_extent_compression(eb, fi))
1612                         num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
1613                 else
1614                         disk_bytenr += extent_offset;
1615
1616                 ret = count_csum_range(root, disk_bytenr, num_bytes, &found);
1617                 if (ret < 0)
1618                         return ret;
1619                 if (extent_type == BTRFS_FILE_EXTENT_REG) {
1620                         if (found > 0)
1621                                 rec->found_csum_item = 1;
1622                         if (found < num_bytes)
1623                                 rec->some_csum_missing = 1;
1624                 } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1625                         if (found > 0)
1626                                 rec->errors |= I_ERR_ODD_CSUM_ITEM;
1627                 }
1628         }
1629         return 0;
1630 }
1631
1632 static int process_one_leaf(struct btrfs_root *root, struct extent_buffer *eb,
1633                             struct walk_control *wc)
1634 {
1635         struct btrfs_key key;
1636         u32 nritems;
1637         int i;
1638         int ret = 0;
1639         struct cache_tree *inode_cache;
1640         struct shared_node *active_node;
1641
1642         if (wc->root_level == wc->active_node &&
1643             btrfs_root_refs(&root->root_item) == 0)
1644                 return 0;
1645
1646         active_node = wc->nodes[wc->active_node];
1647         inode_cache = &active_node->inode_cache;
1648         nritems = btrfs_header_nritems(eb);
1649         for (i = 0; i < nritems; i++) {
1650                 btrfs_item_key_to_cpu(eb, &key, i);
1651
1652                 if (key.objectid == BTRFS_FREE_SPACE_OBJECTID)
1653                         continue;
1654                 if (key.type == BTRFS_ORPHAN_ITEM_KEY)
1655                         continue;
1656
1657                 if (active_node->current == NULL ||
1658                     active_node->current->ino < key.objectid) {
1659                         if (active_node->current) {
1660                                 active_node->current->checked = 1;
1661                                 maybe_free_inode_rec(inode_cache,
1662                                                      active_node->current);
1663                         }
1664                         active_node->current = get_inode_rec(inode_cache,
1665                                                              key.objectid, 1);
1666                 }
1667                 switch (key.type) {
1668                 case BTRFS_DIR_ITEM_KEY:
1669                 case BTRFS_DIR_INDEX_KEY:
1670                         ret = process_dir_item(root, eb, i, &key, active_node);
1671                         break;
1672                 case BTRFS_INODE_REF_KEY:
1673                         ret = process_inode_ref(eb, i, &key, active_node);
1674                         break;
1675                 case BTRFS_INODE_EXTREF_KEY:
1676                         ret = process_inode_extref(eb, i, &key, active_node);
1677                         break;
1678                 case BTRFS_INODE_ITEM_KEY:
1679                         ret = process_inode_item(eb, i, &key, active_node);
1680                         break;
1681                 case BTRFS_EXTENT_DATA_KEY:
1682                         ret = process_file_extent(root, eb, i, &key,
1683                                                   active_node);
1684                         break;
1685                 default:
1686                         break;
1687                 };
1688         }
1689         return ret;
1690 }
1691
1692 static void reada_walk_down(struct btrfs_root *root,
1693                             struct extent_buffer *node, int slot)
1694 {
1695         u64 bytenr;
1696         u64 ptr_gen;
1697         u32 nritems;
1698         u32 blocksize;
1699         int i;
1700         int level;
1701
1702         level = btrfs_header_level(node);
1703         if (level != 1)
1704                 return;
1705
1706         nritems = btrfs_header_nritems(node);
1707         blocksize = btrfs_level_size(root, level - 1);
1708         for (i = slot; i < nritems; i++) {
1709                 bytenr = btrfs_node_blockptr(node, i);
1710                 ptr_gen = btrfs_node_ptr_generation(node, i);
1711                 readahead_tree_block(root, bytenr, blocksize, ptr_gen);
1712         }
1713 }
1714
1715 /*
1716  * Check the child node/leaf by the following condition:
1717  * 1. the first item key of the node/leaf should be the same with the one
1718  *    in parent.
1719  * 2. block in parent node should match the child node/leaf.
1720  * 3. generation of parent node and child's header should be consistent.
1721  *
1722  * Or the child node/leaf pointed by the key in parent is not valid.
1723  *
1724  * We hope to check leaf owner too, but since subvol may share leaves,
1725  * which makes leaf owner check not so strong, key check should be
1726  * sufficient enough for that case.
1727  */
1728 static int check_child_node(struct btrfs_root *root,
1729                             struct extent_buffer *parent, int slot,
1730                             struct extent_buffer *child)
1731 {
1732         struct btrfs_key parent_key;
1733         struct btrfs_key child_key;
1734         int ret = 0;
1735
1736         btrfs_node_key_to_cpu(parent, &parent_key, slot);
1737         if (btrfs_header_level(child) == 0)
1738                 btrfs_item_key_to_cpu(child, &child_key, 0);
1739         else
1740                 btrfs_node_key_to_cpu(child, &child_key, 0);
1741
1742         if (memcmp(&parent_key, &child_key, sizeof(parent_key))) {
1743                 ret = -EINVAL;
1744                 fprintf(stderr,
1745                         "Wrong key of child node/leaf, wanted: (%llu, %u, %llu), have: (%llu, %u, %llu)\n",
1746                         parent_key.objectid, parent_key.type, parent_key.offset,
1747                         child_key.objectid, child_key.type, child_key.offset);
1748         }
1749         if (btrfs_header_bytenr(child) != btrfs_node_blockptr(parent, slot)) {
1750                 ret = -EINVAL;
1751                 fprintf(stderr, "Wrong block of child node/leaf, wanted: %llu, have: %llu\n",
1752                         btrfs_node_blockptr(parent, slot),
1753                         btrfs_header_bytenr(child));
1754         }
1755         if (btrfs_node_ptr_generation(parent, slot) !=
1756             btrfs_header_generation(child)) {
1757                 ret = -EINVAL;
1758                 fprintf(stderr, "Wrong generation of child node/leaf, wanted: %llu, have: %llu\n",
1759                         btrfs_header_generation(child),
1760                         btrfs_node_ptr_generation(parent, slot));
1761         }
1762         return ret;
1763 }
1764
1765 static int walk_down_tree(struct btrfs_root *root, struct btrfs_path *path,
1766                           struct walk_control *wc, int *level)
1767 {
1768         enum btrfs_tree_block_status status;
1769         u64 bytenr;
1770         u64 ptr_gen;
1771         struct extent_buffer *next;
1772         struct extent_buffer *cur;
1773         u32 blocksize;
1774         int ret, err = 0;
1775         u64 refs;
1776
1777         WARN_ON(*level < 0);
1778         WARN_ON(*level >= BTRFS_MAX_LEVEL);
1779         ret = btrfs_lookup_extent_info(NULL, root,
1780                                        path->nodes[*level]->start,
1781                                        *level, 1, &refs, NULL);
1782         if (ret < 0) {
1783                 err = ret;
1784                 goto out;
1785         }
1786
1787         if (refs > 1) {
1788                 ret = enter_shared_node(root, path->nodes[*level]->start,
1789                                         refs, wc, *level);
1790                 if (ret > 0) {
1791                         err = ret;
1792                         goto out;
1793                 }
1794         }
1795
1796         while (*level >= 0) {
1797                 WARN_ON(*level < 0);
1798                 WARN_ON(*level >= BTRFS_MAX_LEVEL);
1799                 cur = path->nodes[*level];
1800
1801                 if (btrfs_header_level(cur) != *level)
1802                         WARN_ON(1);
1803
1804                 if (path->slots[*level] >= btrfs_header_nritems(cur))
1805                         break;
1806                 if (*level == 0) {
1807                         ret = process_one_leaf(root, cur, wc);
1808                         if (ret < 0)
1809                                 err = ret;
1810                         break;
1811                 }
1812                 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
1813                 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
1814                 blocksize = btrfs_level_size(root, *level - 1);
1815                 ret = btrfs_lookup_extent_info(NULL, root, bytenr, *level - 1,
1816                                                1, &refs, NULL);
1817                 if (ret < 0)
1818                         refs = 0;
1819
1820                 if (refs > 1) {
1821                         ret = enter_shared_node(root, bytenr, refs,
1822                                                 wc, *level - 1);
1823                         if (ret > 0) {
1824                                 path->slots[*level]++;
1825                                 continue;
1826                         }
1827                 }
1828
1829                 next = btrfs_find_tree_block(root, bytenr, blocksize);
1830                 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
1831                         free_extent_buffer(next);
1832                         reada_walk_down(root, cur, path->slots[*level]);
1833                         next = read_tree_block(root, bytenr, blocksize,
1834                                                ptr_gen);
1835                         if (!extent_buffer_uptodate(next)) {
1836                                 struct btrfs_key node_key;
1837
1838                                 btrfs_node_key_to_cpu(path->nodes[*level],
1839                                                       &node_key,
1840                                                       path->slots[*level]);
1841                                 btrfs_add_corrupt_extent_record(root->fs_info,
1842                                                 &node_key,
1843                                                 path->nodes[*level]->start,
1844                                                 root->leafsize, *level);
1845                                 err = -EIO;
1846                                 goto out;
1847                         }
1848                 }
1849
1850                 ret = check_child_node(root, cur, path->slots[*level], next);
1851                 if (ret) {
1852                         err = ret;
1853                         goto out;
1854                 }
1855
1856                 if (btrfs_is_leaf(next))
1857                         status = btrfs_check_leaf(root, NULL, next);
1858                 else
1859                         status = btrfs_check_node(root, NULL, next);
1860                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
1861                         free_extent_buffer(next);
1862                         err = -EIO;
1863                         goto out;
1864                 }
1865
1866                 *level = *level - 1;
1867                 free_extent_buffer(path->nodes[*level]);
1868                 path->nodes[*level] = next;
1869                 path->slots[*level] = 0;
1870         }
1871 out:
1872         path->slots[*level] = btrfs_header_nritems(path->nodes[*level]);
1873         return err;
1874 }
1875
1876 static int walk_up_tree(struct btrfs_root *root, struct btrfs_path *path,
1877                         struct walk_control *wc, int *level)
1878 {
1879         int i;
1880         struct extent_buffer *leaf;
1881
1882         for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
1883                 leaf = path->nodes[i];
1884                 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
1885                         path->slots[i]++;
1886                         *level = i;
1887                         return 0;
1888                 } else {
1889                         free_extent_buffer(path->nodes[*level]);
1890                         path->nodes[*level] = NULL;
1891                         BUG_ON(*level > wc->active_node);
1892                         if (*level == wc->active_node)
1893                                 leave_shared_node(root, wc, *level);
1894                         *level = i + 1;
1895                 }
1896         }
1897         return 1;
1898 }
1899
1900 static int check_root_dir(struct inode_record *rec)
1901 {
1902         struct inode_backref *backref;
1903         int ret = -1;
1904
1905         if (!rec->found_inode_item || rec->errors)
1906                 goto out;
1907         if (rec->nlink != 1 || rec->found_link != 0)
1908                 goto out;
1909         if (list_empty(&rec->backrefs))
1910                 goto out;
1911         backref = list_entry(rec->backrefs.next, struct inode_backref, list);
1912         if (!backref->found_inode_ref)
1913                 goto out;
1914         if (backref->index != 0 || backref->namelen != 2 ||
1915             memcmp(backref->name, "..", 2))
1916                 goto out;
1917         if (backref->found_dir_index || backref->found_dir_item)
1918                 goto out;
1919         ret = 0;
1920 out:
1921         return ret;
1922 }
1923
1924 static int repair_inode_isize(struct btrfs_trans_handle *trans,
1925                               struct btrfs_root *root, struct btrfs_path *path,
1926                               struct inode_record *rec)
1927 {
1928         struct btrfs_inode_item *ei;
1929         struct btrfs_key key;
1930         int ret;
1931
1932         key.objectid = rec->ino;
1933         key.type = BTRFS_INODE_ITEM_KEY;
1934         key.offset = (u64)-1;
1935
1936         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
1937         if (ret < 0)
1938                 goto out;
1939         if (ret) {
1940                 if (!path->slots[0]) {
1941                         ret = -ENOENT;
1942                         goto out;
1943                 }
1944                 path->slots[0]--;
1945                 ret = 0;
1946         }
1947         btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
1948         if (key.objectid != rec->ino) {
1949                 ret = -ENOENT;
1950                 goto out;
1951         }
1952
1953         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
1954                             struct btrfs_inode_item);
1955         btrfs_set_inode_size(path->nodes[0], ei, rec->found_size);
1956         btrfs_mark_buffer_dirty(path->nodes[0]);
1957         rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
1958         printf("reset isize for dir %Lu root %Lu\n", rec->ino,
1959                root->root_key.objectid);
1960 out:
1961         btrfs_release_path(path);
1962         return ret;
1963 }
1964
1965 static int repair_inode_orphan_item(struct btrfs_trans_handle *trans,
1966                                     struct btrfs_root *root,
1967                                     struct btrfs_path *path,
1968                                     struct inode_record *rec)
1969 {
1970         int ret;
1971
1972         ret = btrfs_add_orphan_item(trans, root, path, rec->ino);
1973         btrfs_release_path(path);
1974         if (!ret)
1975                 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
1976         return ret;
1977 }
1978
1979 static int repair_inode_nbytes(struct btrfs_trans_handle *trans,
1980                                struct btrfs_root *root,
1981                                struct btrfs_path *path,
1982                                struct inode_record *rec)
1983 {
1984         struct btrfs_inode_item *ei;
1985         struct btrfs_key key;
1986         int ret = 0;
1987
1988         key.objectid = rec->ino;
1989         key.type = BTRFS_INODE_ITEM_KEY;
1990         key.offset = 0;
1991
1992         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
1993         if (ret) {
1994                 if (ret > 0)
1995                         ret = -ENOENT;
1996                 goto out;
1997         }
1998
1999         /* Since ret == 0, no need to check anything */
2000         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2001                             struct btrfs_inode_item);
2002         btrfs_set_inode_nbytes(path->nodes[0], ei, rec->found_size);
2003         btrfs_mark_buffer_dirty(path->nodes[0]);
2004         rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2005         printf("reset nbytes for ino %llu root %llu\n",
2006                rec->ino, root->root_key.objectid);
2007 out:
2008         btrfs_release_path(path);
2009         return ret;
2010 }
2011
2012 static int add_missing_dir_index(struct btrfs_root *root,
2013                                  struct cache_tree *inode_cache,
2014                                  struct inode_record *rec,
2015                                  struct inode_backref *backref)
2016 {
2017         struct btrfs_path *path;
2018         struct btrfs_trans_handle *trans;
2019         struct btrfs_dir_item *dir_item;
2020         struct extent_buffer *leaf;
2021         struct btrfs_key key;
2022         struct btrfs_disk_key disk_key;
2023         struct inode_record *dir_rec;
2024         unsigned long name_ptr;
2025         u32 data_size = sizeof(*dir_item) + backref->namelen;
2026         int ret;
2027
2028         path = btrfs_alloc_path();
2029         if (!path)
2030                 return -ENOMEM;
2031
2032         trans = btrfs_start_transaction(root, 1);
2033         if (IS_ERR(trans)) {
2034                 btrfs_free_path(path);
2035                 return PTR_ERR(trans);
2036         }
2037
2038         fprintf(stderr, "repairing missing dir index item for inode %llu\n",
2039                 (unsigned long long)rec->ino);
2040         key.objectid = backref->dir;
2041         key.type = BTRFS_DIR_INDEX_KEY;
2042         key.offset = backref->index;
2043
2044         ret = btrfs_insert_empty_item(trans, root, path, &key, data_size);
2045         BUG_ON(ret);
2046
2047         leaf = path->nodes[0];
2048         dir_item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_dir_item);
2049
2050         disk_key.objectid = cpu_to_le64(rec->ino);
2051         disk_key.type = BTRFS_INODE_ITEM_KEY;
2052         disk_key.offset = 0;
2053
2054         btrfs_set_dir_item_key(leaf, dir_item, &disk_key);
2055         btrfs_set_dir_type(leaf, dir_item, imode_to_type(rec->imode));
2056         btrfs_set_dir_data_len(leaf, dir_item, 0);
2057         btrfs_set_dir_name_len(leaf, dir_item, backref->namelen);
2058         name_ptr = (unsigned long)(dir_item + 1);
2059         write_extent_buffer(leaf, backref->name, name_ptr, backref->namelen);
2060         btrfs_mark_buffer_dirty(leaf);
2061         btrfs_free_path(path);
2062         btrfs_commit_transaction(trans, root);
2063
2064         backref->found_dir_index = 1;
2065         dir_rec = get_inode_rec(inode_cache, backref->dir, 0);
2066         if (!dir_rec)
2067                 return 0;
2068         dir_rec->found_size += backref->namelen;
2069         if (dir_rec->found_size == dir_rec->isize &&
2070             (dir_rec->errors & I_ERR_DIR_ISIZE_WRONG))
2071                 dir_rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2072         if (dir_rec->found_size != dir_rec->isize)
2073                 dir_rec->errors |= I_ERR_DIR_ISIZE_WRONG;
2074
2075         return 0;
2076 }
2077
2078 static int delete_dir_index(struct btrfs_root *root,
2079                             struct cache_tree *inode_cache,
2080                             struct inode_record *rec,
2081                             struct inode_backref *backref)
2082 {
2083         struct btrfs_trans_handle *trans;
2084         struct btrfs_dir_item *di;
2085         struct btrfs_path *path;
2086         int ret = 0;
2087
2088         path = btrfs_alloc_path();
2089         if (!path)
2090                 return -ENOMEM;
2091
2092         trans = btrfs_start_transaction(root, 1);
2093         if (IS_ERR(trans)) {
2094                 btrfs_free_path(path);
2095                 return PTR_ERR(trans);
2096         }
2097
2098
2099         fprintf(stderr, "Deleting bad dir index [%llu,%u,%llu] root %llu\n",
2100                 (unsigned long long)backref->dir,
2101                 BTRFS_DIR_INDEX_KEY, (unsigned long long)backref->index,
2102                 (unsigned long long)root->objectid);
2103
2104         di = btrfs_lookup_dir_index(trans, root, path, backref->dir,
2105                                     backref->name, backref->namelen,
2106                                     backref->index, -1);
2107         if (IS_ERR(di)) {
2108                 ret = PTR_ERR(di);
2109                 btrfs_free_path(path);
2110                 btrfs_commit_transaction(trans, root);
2111                 if (ret == -ENOENT)
2112                         return 0;
2113                 return ret;
2114         }
2115
2116         if (!di)
2117                 ret = btrfs_del_item(trans, root, path);
2118         else
2119                 ret = btrfs_delete_one_dir_name(trans, root, path, di);
2120         BUG_ON(ret);
2121         btrfs_free_path(path);
2122         btrfs_commit_transaction(trans, root);
2123         return ret;
2124 }
2125
2126 static int create_inode_item(struct btrfs_root *root,
2127                              struct inode_record *rec,
2128                              struct inode_backref *backref, int root_dir)
2129 {
2130         struct btrfs_trans_handle *trans;
2131         struct btrfs_inode_item inode_item;
2132         time_t now = time(NULL);
2133         int ret;
2134
2135         trans = btrfs_start_transaction(root, 1);
2136         if (IS_ERR(trans)) {
2137                 ret = PTR_ERR(trans);
2138                 return ret;
2139         }
2140
2141         fprintf(stderr, "root %llu inode %llu recreating inode item, this may "
2142                 "be incomplete, please check permissions and content after "
2143                 "the fsck completes.\n", (unsigned long long)root->objectid,
2144                 (unsigned long long)rec->ino);
2145
2146         memset(&inode_item, 0, sizeof(inode_item));
2147         btrfs_set_stack_inode_generation(&inode_item, trans->transid);
2148         if (root_dir)
2149                 btrfs_set_stack_inode_nlink(&inode_item, 1);
2150         else
2151                 btrfs_set_stack_inode_nlink(&inode_item, rec->found_link);
2152         btrfs_set_stack_inode_nbytes(&inode_item, rec->found_size);
2153         if (rec->found_dir_item) {
2154                 if (rec->found_file_extent)
2155                         fprintf(stderr, "root %llu inode %llu has both a dir "
2156                                 "item and extents, unsure if it is a dir or a "
2157                                 "regular file so setting it as a directory\n",
2158                                 (unsigned long long)root->objectid,
2159                                 (unsigned long long)rec->ino);
2160                 btrfs_set_stack_inode_mode(&inode_item, S_IFDIR | 0755);
2161                 btrfs_set_stack_inode_size(&inode_item, rec->found_size);
2162         } else if (!rec->found_dir_item) {
2163                 btrfs_set_stack_inode_size(&inode_item, rec->extent_end);
2164                 btrfs_set_stack_inode_mode(&inode_item, S_IFREG | 0755);
2165         }
2166         btrfs_set_stack_timespec_sec(&inode_item.atime, now);
2167         btrfs_set_stack_timespec_nsec(&inode_item.atime, 0);
2168         btrfs_set_stack_timespec_sec(&inode_item.ctime, now);
2169         btrfs_set_stack_timespec_nsec(&inode_item.ctime, 0);
2170         btrfs_set_stack_timespec_sec(&inode_item.mtime, now);
2171         btrfs_set_stack_timespec_nsec(&inode_item.mtime, 0);
2172         btrfs_set_stack_timespec_sec(&inode_item.otime, 0);
2173         btrfs_set_stack_timespec_nsec(&inode_item.otime, 0);
2174
2175         ret = btrfs_insert_inode(trans, root, rec->ino, &inode_item);
2176         BUG_ON(ret);
2177         btrfs_commit_transaction(trans, root);
2178         return 0;
2179 }
2180
2181 static int repair_inode_backrefs(struct btrfs_root *root,
2182                                  struct inode_record *rec,
2183                                  struct cache_tree *inode_cache,
2184                                  int delete)
2185 {
2186         struct inode_backref *tmp, *backref;
2187         u64 root_dirid = btrfs_root_dirid(&root->root_item);
2188         int ret = 0;
2189         int repaired = 0;
2190
2191         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2192                 if (!delete && rec->ino == root_dirid) {
2193                         if (!rec->found_inode_item) {
2194                                 ret = create_inode_item(root, rec, backref, 1);
2195                                 if (ret)
2196                                         break;
2197                                 repaired++;
2198                         }
2199                 }
2200
2201                 /* Index 0 for root dir's are special, don't mess with it */
2202                 if (rec->ino == root_dirid && backref->index == 0)
2203                         continue;
2204
2205                 if (delete &&
2206                     ((backref->found_dir_index && !backref->found_inode_ref) ||
2207                      (backref->found_dir_index && backref->found_inode_ref &&
2208                       (backref->errors & REF_ERR_INDEX_UNMATCH)))) {
2209                         ret = delete_dir_index(root, inode_cache, rec, backref);
2210                         if (ret)
2211                                 break;
2212                         repaired++;
2213                         list_del(&backref->list);
2214                         free(backref);
2215                 }
2216
2217                 if (!delete && !backref->found_dir_index &&
2218                     backref->found_dir_item && backref->found_inode_ref) {
2219                         ret = add_missing_dir_index(root, inode_cache, rec,
2220                                                     backref);
2221                         if (ret)
2222                                 break;
2223                         repaired++;
2224                         if (backref->found_dir_item &&
2225                             backref->found_dir_index &&
2226                             backref->found_dir_index) {
2227                                 if (!backref->errors &&
2228                                     backref->found_inode_ref) {
2229                                         list_del(&backref->list);
2230                                         free(backref);
2231                                 }
2232                         }
2233                 }
2234
2235                 if (!delete && (!backref->found_dir_index &&
2236                                 !backref->found_dir_item &&
2237                                 backref->found_inode_ref)) {
2238                         struct btrfs_trans_handle *trans;
2239                         struct btrfs_key location;
2240
2241                         ret = check_dir_conflict(root, backref->name,
2242                                                  backref->namelen,
2243                                                  backref->dir,
2244                                                  backref->index);
2245                         if (ret) {
2246                                 /*
2247                                  * let nlink fixing routine to handle it,
2248                                  * which can do it better.
2249                                  */
2250                                 ret = 0;
2251                                 break;
2252                         }
2253                         location.objectid = rec->ino;
2254                         location.type = BTRFS_INODE_ITEM_KEY;
2255                         location.offset = 0;
2256
2257                         trans = btrfs_start_transaction(root, 1);
2258                         if (IS_ERR(trans)) {
2259                                 ret = PTR_ERR(trans);
2260                                 break;
2261                         }
2262                         fprintf(stderr, "adding missing dir index/item pair "
2263                                 "for inode %llu\n",
2264                                 (unsigned long long)rec->ino);
2265                         ret = btrfs_insert_dir_item(trans, root, backref->name,
2266                                                     backref->namelen,
2267                                                     backref->dir, &location,
2268                                                     imode_to_type(rec->imode),
2269                                                     backref->index);
2270                         BUG_ON(ret);
2271                         btrfs_commit_transaction(trans, root);
2272                         repaired++;
2273                 }
2274
2275                 if (!delete && (backref->found_inode_ref &&
2276                                 backref->found_dir_index &&
2277                                 backref->found_dir_item &&
2278                                 !(backref->errors & REF_ERR_INDEX_UNMATCH) &&
2279                                 !rec->found_inode_item)) {
2280                         ret = create_inode_item(root, rec, backref, 0);
2281                         if (ret)
2282                                 break;
2283                         repaired++;
2284                 }
2285
2286         }
2287         return ret ? ret : repaired;
2288 }
2289
2290 /*
2291  * To determine the file type for nlink/inode_item repair
2292  *
2293  * Return 0 if file type is found and BTRFS_FT_* is stored into type.
2294  * Return -ENOENT if file type is not found.
2295  */
2296 static int find_file_type(struct inode_record *rec, u8 *type)
2297 {
2298         struct inode_backref *backref;
2299
2300         /* For inode item recovered case */
2301         if (rec->found_inode_item) {
2302                 *type = imode_to_type(rec->imode);
2303                 return 0;
2304         }
2305
2306         list_for_each_entry(backref, &rec->backrefs, list) {
2307                 if (backref->found_dir_index || backref->found_dir_item) {
2308                         *type = backref->filetype;
2309                         return 0;
2310                 }
2311         }
2312         return -ENOENT;
2313 }
2314
2315 /*
2316  * To determine the file name for nlink repair
2317  *
2318  * Return 0 if file name is found, set name and namelen.
2319  * Return -ENOENT if file name is not found.
2320  */
2321 static int find_file_name(struct inode_record *rec,
2322                           char *name, int *namelen)
2323 {
2324         struct inode_backref *backref;
2325
2326         list_for_each_entry(backref, &rec->backrefs, list) {
2327                 if (backref->found_dir_index || backref->found_dir_item ||
2328                     backref->found_inode_ref) {
2329                         memcpy(name, backref->name, backref->namelen);
2330                         *namelen = backref->namelen;
2331                         return 0;
2332                 }
2333         }
2334         return -ENOENT;
2335 }
2336
2337 /* Reset the nlink of the inode to the correct one */
2338 static int reset_nlink(struct btrfs_trans_handle *trans,
2339                        struct btrfs_root *root,
2340                        struct btrfs_path *path,
2341                        struct inode_record *rec)
2342 {
2343         struct inode_backref *backref;
2344         struct inode_backref *tmp;
2345         struct btrfs_key key;
2346         struct btrfs_inode_item *inode_item;
2347         int ret = 0;
2348
2349         /* We don't believe this either, reset it and iterate backref */
2350         rec->found_link = 0;
2351
2352         /* Remove all backref including the valid ones */
2353         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2354                 ret = btrfs_unlink(trans, root, rec->ino, backref->dir,
2355                                    backref->index, backref->name,
2356                                    backref->namelen, 0);
2357                 if (ret < 0)
2358                         goto out;
2359
2360                 /* remove invalid backref, so it won't be added back */
2361                 if (!(backref->found_dir_index &&
2362                       backref->found_dir_item &&
2363                       backref->found_inode_ref)) {
2364                         list_del(&backref->list);
2365                         free(backref);
2366                 } else {
2367                         rec->found_link++;
2368                 }
2369         }
2370
2371         /* Set nlink to 0 */
2372         key.objectid = rec->ino;
2373         key.type = BTRFS_INODE_ITEM_KEY;
2374         key.offset = 0;
2375         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2376         if (ret < 0)
2377                 goto out;
2378         if (ret > 0) {
2379                 ret = -ENOENT;
2380                 goto out;
2381         }
2382         inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
2383                                     struct btrfs_inode_item);
2384         btrfs_set_inode_nlink(path->nodes[0], inode_item, 0);
2385         btrfs_mark_buffer_dirty(path->nodes[0]);
2386         btrfs_release_path(path);
2387
2388         /*
2389          * Add back valid inode_ref/dir_item/dir_index,
2390          * add_link() will handle the nlink inc, so new nlink must be correct
2391          */
2392         list_for_each_entry(backref, &rec->backrefs, list) {
2393                 ret = btrfs_add_link(trans, root, rec->ino, backref->dir,
2394                                      backref->name, backref->namelen,
2395                                      backref->ref_type, &backref->index, 1);
2396                 if (ret < 0)
2397                         goto out;
2398         }
2399 out:
2400         btrfs_release_path(path);
2401         return ret;
2402 }
2403
2404 static int repair_inode_nlinks(struct btrfs_trans_handle *trans,
2405                                struct btrfs_root *root,
2406                                struct btrfs_path *path,
2407                                struct inode_record *rec)
2408 {
2409         char *dir_name = "lost+found";
2410         char namebuf[BTRFS_NAME_LEN] = {0};
2411         u64 lost_found_ino;
2412         u32 mode = 0700;
2413         u8 type = 0;
2414         int namelen = 0;
2415         int name_recovered = 0;
2416         int type_recovered = 0;
2417         int ret = 0;
2418
2419         /*
2420          * Get file name and type first before these invalid inode ref
2421          * are deleted by remove_all_invalid_backref()
2422          */
2423         name_recovered = !find_file_name(rec, namebuf, &namelen);
2424         type_recovered = !find_file_type(rec, &type);
2425
2426         if (!name_recovered) {
2427                 printf("Can't get file name for inode %llu, using '%llu' as fallback\n",
2428                        rec->ino, rec->ino);
2429                 namelen = count_digits(rec->ino);
2430                 sprintf(namebuf, "%llu", rec->ino);
2431                 name_recovered = 1;
2432         }
2433         if (!type_recovered) {
2434                 printf("Can't get file type for inode %llu, using FILE as fallback\n",
2435                        rec->ino);
2436                 type = BTRFS_FT_REG_FILE;
2437                 type_recovered = 1;
2438         }
2439
2440         ret = reset_nlink(trans, root, path, rec);
2441         if (ret < 0) {
2442                 fprintf(stderr,
2443                         "Failed to reset nlink for inode %llu: %s\n",
2444                         rec->ino, strerror(-ret));
2445                 goto out;
2446         }
2447
2448         if (rec->found_link == 0) {
2449                 lost_found_ino = root->highest_inode;
2450                 if (lost_found_ino >= BTRFS_LAST_FREE_OBJECTID) {
2451                         ret = -EOVERFLOW;
2452                         goto out;
2453                 }
2454                 lost_found_ino++;
2455                 ret = btrfs_mkdir(trans, root, dir_name, strlen(dir_name),
2456                                   BTRFS_FIRST_FREE_OBJECTID, &lost_found_ino,
2457                                   mode);
2458                 if (ret < 0) {
2459                         fprintf(stderr, "Failed to create '%s' dir: %s\n",
2460                                 dir_name, strerror(-ret));
2461                         goto out;
2462                 }
2463                 ret = btrfs_add_link(trans, root, rec->ino, lost_found_ino,
2464                                      namebuf, namelen, type, NULL, 1);
2465                 /*
2466                  * Add ".INO" suffix several times to handle case where
2467                  * "FILENAME.INO" is already taken by another file.
2468                  */
2469                 while (ret == -EEXIST) {
2470                         /*
2471                          * Conflicting file name, add ".INO" as suffix * +1 for '.'
2472                          */
2473                         if (namelen + count_digits(rec->ino) + 1 >
2474                             BTRFS_NAME_LEN) {
2475                                 ret = -EFBIG;
2476                                 goto out;
2477                         }
2478                         snprintf(namebuf + namelen, BTRFS_NAME_LEN - namelen,
2479                                  ".%llu", rec->ino);
2480                         namelen += count_digits(rec->ino) + 1;
2481                         ret = btrfs_add_link(trans, root, rec->ino,
2482                                              lost_found_ino, namebuf,
2483                                              namelen, type, NULL, 1);
2484                 }
2485                 if (ret < 0) {
2486                         fprintf(stderr,
2487                                 "Failed to link the inode %llu to %s dir: %s\n",
2488                                 rec->ino, dir_name, strerror(-ret));
2489                         goto out;
2490                 }
2491                 /*
2492                  * Just increase the found_link, don't actually add the
2493                  * backref. This will make things easier and this inode
2494                  * record will be freed after the repair is done.
2495                  * So fsck will not report problem about this inode.
2496                  */
2497                 rec->found_link++;
2498                 printf("Moving file '%.*s' to '%s' dir since it has no valid backref\n",
2499                        namelen, namebuf, dir_name);
2500         }
2501         printf("Fixed the nlink of inode %llu\n", rec->ino);
2502 out:
2503         /*
2504          * Clear the flag anyway, or we will loop forever for the same inode
2505          * as it will not be removed from the bad inode list and the dead loop
2506          * happens.
2507          */
2508         rec->errors &= ~I_ERR_LINK_COUNT_WRONG;
2509         btrfs_release_path(path);
2510         return ret;
2511 }
2512
2513 /*
2514  * Check if there is any normal(reg or prealloc) file extent for given
2515  * ino.
2516  * This is used to determine the file type when neither its dir_index/item or
2517  * inode_item exists.
2518  *
2519  * This will *NOT* report error, if any error happens, just consider it does
2520  * not have any normal file extent.
2521  */
2522 static int find_normal_file_extent(struct btrfs_root *root, u64 ino)
2523 {
2524         struct btrfs_path *path;
2525         struct btrfs_key key;
2526         struct btrfs_key found_key;
2527         struct btrfs_file_extent_item *fi;
2528         u8 type;
2529         int ret = 0;
2530
2531         path = btrfs_alloc_path();
2532         if (!path)
2533                 goto out;
2534         key.objectid = ino;
2535         key.type = BTRFS_EXTENT_DATA_KEY;
2536         key.offset = 0;
2537
2538         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
2539         if (ret < 0) {
2540                 ret = 0;
2541                 goto out;
2542         }
2543         if (ret && path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
2544                 ret = btrfs_next_leaf(root, path);
2545                 if (ret) {
2546                         ret = 0;
2547                         goto out;
2548                 }
2549         }
2550         while (1) {
2551                 btrfs_item_key_to_cpu(path->nodes[0], &found_key,
2552                                       path->slots[0]);
2553                 if (found_key.objectid != ino ||
2554                     found_key.type != BTRFS_EXTENT_DATA_KEY)
2555                         break;
2556                 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
2557                                     struct btrfs_file_extent_item);
2558                 type = btrfs_file_extent_type(path->nodes[0], fi);
2559                 if (type != BTRFS_FILE_EXTENT_INLINE) {
2560                         ret = 1;
2561                         goto out;
2562                 }
2563         }
2564 out:
2565         btrfs_free_path(path);
2566         return ret;
2567 }
2568
2569 static u32 btrfs_type_to_imode(u8 type)
2570 {
2571         static u32 imode_by_btrfs_type[] = {
2572                 [BTRFS_FT_REG_FILE]     = S_IFREG,
2573                 [BTRFS_FT_DIR]          = S_IFDIR,
2574                 [BTRFS_FT_CHRDEV]       = S_IFCHR,
2575                 [BTRFS_FT_BLKDEV]       = S_IFBLK,
2576                 [BTRFS_FT_FIFO]         = S_IFIFO,
2577                 [BTRFS_FT_SOCK]         = S_IFSOCK,
2578                 [BTRFS_FT_SYMLINK]      = S_IFLNK,
2579         };
2580
2581         return imode_by_btrfs_type[(type)];
2582 }
2583
2584 static int repair_inode_no_item(struct btrfs_trans_handle *trans,
2585                                 struct btrfs_root *root,
2586                                 struct btrfs_path *path,
2587                                 struct inode_record *rec)
2588 {
2589         u8 filetype;
2590         u32 mode = 0700;
2591         int type_recovered = 0;
2592         int ret = 0;
2593
2594         printf("Trying to rebuild inode:%llu\n", rec->ino);
2595
2596         type_recovered = !find_file_type(rec, &filetype);
2597
2598         /*
2599          * Try to determine inode type if type not found.
2600          *
2601          * For found regular file extent, it must be FILE.
2602          * For found dir_item/index, it must be DIR.
2603          *
2604          * For undetermined one, use FILE as fallback.
2605          *
2606          * TODO:
2607          * 1. If found backref(inode_index/item is already handled) to it,
2608          *    it must be DIR.
2609          *    Need new inode-inode ref structure to allow search for that.
2610          */
2611         if (!type_recovered) {
2612                 if (rec->found_file_extent &&
2613                     find_normal_file_extent(root, rec->ino)) {
2614                         type_recovered = 1;
2615                         filetype = BTRFS_FT_REG_FILE;
2616                 } else if (rec->found_dir_item) {
2617                         type_recovered = 1;
2618                         filetype = BTRFS_FT_DIR;
2619                 } else if (!list_empty(&rec->orphan_extents)) {
2620                         type_recovered = 1;
2621                         filetype = BTRFS_FT_REG_FILE;
2622                 } else{
2623                         printf("Can't determint the filetype for inode %llu, assume it is a normal file\n",
2624                                rec->ino);
2625                         type_recovered = 1;
2626                         filetype = BTRFS_FT_REG_FILE;
2627                 }
2628         }
2629
2630         ret = btrfs_new_inode(trans, root, rec->ino,
2631                               mode | btrfs_type_to_imode(filetype));
2632         if (ret < 0)
2633                 goto out;
2634
2635         /*
2636          * Here inode rebuild is done, we only rebuild the inode item,
2637          * don't repair the nlink(like move to lost+found).
2638          * That is the job of nlink repair.
2639          *
2640          * We just fill the record and return
2641          */
2642         rec->found_dir_item = 1;
2643         rec->imode = mode | btrfs_type_to_imode(filetype);
2644         rec->nlink = 0;
2645         rec->errors &= ~I_ERR_NO_INODE_ITEM;
2646         /* Ensure the inode_nlinks repair function will be called */
2647         rec->errors |= I_ERR_LINK_COUNT_WRONG;
2648 out:
2649         return ret;
2650 }
2651
2652 static int repair_inode_orphan_extent(struct btrfs_trans_handle *trans,
2653                                       struct btrfs_root *root,
2654                                       struct btrfs_path *path,
2655                                       struct inode_record *rec)
2656 {
2657         struct orphan_data_extent *orphan;
2658         struct orphan_data_extent *tmp;
2659         int ret = 0;
2660
2661         list_for_each_entry_safe(orphan, tmp, &rec->orphan_extents, list) {
2662                 /*
2663                  * Check for conflicting file extents
2664                  *
2665                  * Here we don't know whether the extents is compressed or not,
2666                  * so we can only assume it not compressed nor data offset,
2667                  * and use its disk_len as extent length.
2668                  */
2669                 ret = btrfs_get_extent(NULL, root, path, orphan->objectid,
2670                                        orphan->offset, orphan->disk_len, 0);
2671                 btrfs_release_path(path);
2672                 if (ret < 0)
2673                         goto out;
2674                 if (!ret) {
2675                         fprintf(stderr,
2676                                 "orphan extent (%llu, %llu) conflicts, delete the orphan\n",
2677                                 orphan->disk_bytenr, orphan->disk_len);
2678                         ret = btrfs_free_extent(trans,
2679                                         root->fs_info->extent_root,
2680                                         orphan->disk_bytenr, orphan->disk_len,
2681                                         0, root->objectid, orphan->objectid,
2682                                         orphan->offset);
2683                         if (ret < 0)
2684                                 goto out;
2685                 }
2686                 ret = btrfs_insert_file_extent(trans, root, orphan->objectid,
2687                                 orphan->offset, orphan->disk_bytenr,
2688                                 orphan->disk_len, orphan->disk_len);
2689                 if (ret < 0)
2690                         goto out;
2691
2692                 /* Update file size info */
2693                 rec->found_size += orphan->disk_len;
2694                 if (rec->found_size == rec->nbytes)
2695                         rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2696
2697                 /* Update the file extent hole info too */
2698                 ret = del_file_extent_hole(&rec->holes, orphan->offset,
2699                                            orphan->disk_len);
2700                 if (ret < 0)
2701                         goto out;
2702                 if (RB_EMPTY_ROOT(&rec->holes))
2703                         rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
2704
2705                 list_del(&orphan->list);
2706                 free(orphan);
2707         }
2708         rec->errors &= ~I_ERR_FILE_EXTENT_ORPHAN;
2709 out:
2710         return ret;
2711 }
2712
2713 static int repair_inode_discount_extent(struct btrfs_trans_handle *trans,
2714                                         struct btrfs_root *root,
2715                                         struct btrfs_path *path,
2716                                         struct inode_record *rec)
2717 {
2718         struct rb_node *node;
2719         struct file_extent_hole *hole;
2720         int found = 0;
2721         int ret = 0;
2722
2723         node = rb_first(&rec->holes);
2724
2725         while (node) {
2726                 found = 1;
2727                 hole = rb_entry(node, struct file_extent_hole, node);
2728                 ret = btrfs_punch_hole(trans, root, rec->ino,
2729                                        hole->start, hole->len);
2730                 if (ret < 0)
2731                         goto out;
2732                 ret = del_file_extent_hole(&rec->holes, hole->start,
2733                                            hole->len);
2734                 if (ret < 0)
2735                         goto out;
2736                 if (RB_EMPTY_ROOT(&rec->holes))
2737                         rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
2738                 node = rb_first(&rec->holes);
2739         }
2740         /* special case for a file losing all its file extent */
2741         if (!found) {
2742                 ret = btrfs_punch_hole(trans, root, rec->ino, 0,
2743                                        round_up(rec->isize, root->sectorsize));
2744                 if (ret < 0)
2745                         goto out;
2746         }
2747         printf("Fixed discount file extents for inode: %llu in root: %llu\n",
2748                rec->ino, root->objectid);
2749 out:
2750         return ret;
2751 }
2752
2753 static int try_repair_inode(struct btrfs_root *root, struct inode_record *rec)
2754 {
2755         struct btrfs_trans_handle *trans;
2756         struct btrfs_path *path;
2757         int ret = 0;
2758
2759         if (!(rec->errors & (I_ERR_DIR_ISIZE_WRONG |
2760                              I_ERR_NO_ORPHAN_ITEM |
2761                              I_ERR_LINK_COUNT_WRONG |
2762                              I_ERR_NO_INODE_ITEM |
2763                              I_ERR_FILE_EXTENT_ORPHAN |
2764                              I_ERR_FILE_EXTENT_DISCOUNT|
2765                              I_ERR_FILE_NBYTES_WRONG)))
2766                 return rec->errors;
2767
2768         path = btrfs_alloc_path();
2769         if (!path)
2770                 return -ENOMEM;
2771
2772         /*
2773          * For nlink repair, it may create a dir and add link, so
2774          * 2 for parent(256)'s dir_index and dir_item
2775          * 2 for lost+found dir's inode_item and inode_ref
2776          * 1 for the new inode_ref of the file
2777          * 2 for lost+found dir's dir_index and dir_item for the file
2778          */
2779         trans = btrfs_start_transaction(root, 7);
2780         if (IS_ERR(trans)) {
2781                 btrfs_free_path(path);
2782                 return PTR_ERR(trans);
2783         }
2784
2785         if (rec->errors & I_ERR_NO_INODE_ITEM)
2786                 ret = repair_inode_no_item(trans, root, path, rec);
2787         if (!ret && rec->errors & I_ERR_FILE_EXTENT_ORPHAN)
2788                 ret = repair_inode_orphan_extent(trans, root, path, rec);
2789         if (!ret && rec->errors & I_ERR_FILE_EXTENT_DISCOUNT)
2790                 ret = repair_inode_discount_extent(trans, root, path, rec);
2791         if (!ret && rec->errors & I_ERR_DIR_ISIZE_WRONG)
2792                 ret = repair_inode_isize(trans, root, path, rec);
2793         if (!ret && rec->errors & I_ERR_NO_ORPHAN_ITEM)
2794                 ret = repair_inode_orphan_item(trans, root, path, rec);
2795         if (!ret && rec->errors & I_ERR_LINK_COUNT_WRONG)
2796                 ret = repair_inode_nlinks(trans, root, path, rec);
2797         if (!ret && rec->errors & I_ERR_FILE_NBYTES_WRONG)
2798                 ret = repair_inode_nbytes(trans, root, path, rec);
2799         btrfs_commit_transaction(trans, root);
2800         btrfs_free_path(path);
2801         return ret;
2802 }
2803
2804 static int check_inode_recs(struct btrfs_root *root,
2805                             struct cache_tree *inode_cache)
2806 {
2807         struct cache_extent *cache;
2808         struct ptr_node *node;
2809         struct inode_record *rec;
2810         struct inode_backref *backref;
2811         int stage = 0;
2812         int ret = 0;
2813         int err = 0;
2814         u64 error = 0;
2815         u64 root_dirid = btrfs_root_dirid(&root->root_item);
2816
2817         if (btrfs_root_refs(&root->root_item) == 0) {
2818                 if (!cache_tree_empty(inode_cache))
2819                         fprintf(stderr, "warning line %d\n", __LINE__);
2820                 return 0;
2821         }
2822
2823         /*
2824          * We need to record the highest inode number for later 'lost+found'
2825          * dir creation.
2826          * We must select a ino not used/refered by any existing inode, or
2827          * 'lost+found' ino may be a missing ino in a corrupted leaf,
2828          * this may cause 'lost+found' dir has wrong nlinks.
2829          */
2830         cache = last_cache_extent(inode_cache);
2831         if (cache) {
2832                 node = container_of(cache, struct ptr_node, cache);
2833                 rec = node->data;
2834                 if (rec->ino > root->highest_inode)
2835                         root->highest_inode = rec->ino;
2836         }
2837
2838         /*
2839          * We need to repair backrefs first because we could change some of the
2840          * errors in the inode recs.
2841          *
2842          * We also need to go through and delete invalid backrefs first and then
2843          * add the correct ones second.  We do this because we may get EEXIST
2844          * when adding back the correct index because we hadn't yet deleted the
2845          * invalid index.
2846          *
2847          * For example, if we were missing a dir index then the directories
2848          * isize would be wrong, so if we fixed the isize to what we thought it
2849          * would be and then fixed the backref we'd still have a invalid fs, so
2850          * we need to add back the dir index and then check to see if the isize
2851          * is still wrong.
2852          */
2853         while (stage < 3) {
2854                 stage++;
2855                 if (stage == 3 && !err)
2856                         break;
2857
2858                 cache = search_cache_extent(inode_cache, 0);
2859                 while (repair && cache) {
2860                         node = container_of(cache, struct ptr_node, cache);
2861                         rec = node->data;
2862                         cache = next_cache_extent(cache);
2863
2864                         /* Need to free everything up and rescan */
2865                         if (stage == 3) {
2866                                 remove_cache_extent(inode_cache, &node->cache);
2867                                 free(node);
2868                                 free_inode_rec(rec);
2869                                 continue;
2870                         }
2871
2872                         if (list_empty(&rec->backrefs))
2873                                 continue;
2874
2875                         ret = repair_inode_backrefs(root, rec, inode_cache,
2876                                                     stage == 1);
2877                         if (ret < 0) {
2878                                 err = ret;
2879                                 stage = 2;
2880                                 break;
2881                         } if (ret > 0) {
2882                                 err = -EAGAIN;
2883                         }
2884                 }
2885         }
2886         if (err)
2887                 return err;
2888
2889         rec = get_inode_rec(inode_cache, root_dirid, 0);
2890         if (rec) {
2891                 ret = check_root_dir(rec);
2892                 if (ret) {
2893                         fprintf(stderr, "root %llu root dir %llu error\n",
2894                                 (unsigned long long)root->root_key.objectid,
2895                                 (unsigned long long)root_dirid);
2896                         print_inode_error(root, rec);
2897                         error++;
2898                 }
2899         } else {
2900                 if (repair) {
2901                         struct btrfs_trans_handle *trans;
2902
2903                         trans = btrfs_start_transaction(root, 1);
2904                         if (IS_ERR(trans)) {
2905                                 err = PTR_ERR(trans);
2906                                 return err;
2907                         }
2908
2909                         fprintf(stderr,
2910                                 "root %llu missing its root dir, recreating\n",
2911                                 (unsigned long long)root->objectid);
2912
2913                         ret = btrfs_make_root_dir(trans, root, root_dirid);
2914                         BUG_ON(ret);
2915
2916                         btrfs_commit_transaction(trans, root);
2917                         return -EAGAIN;
2918                 }
2919
2920                 fprintf(stderr, "root %llu root dir %llu not found\n",
2921                         (unsigned long long)root->root_key.objectid,
2922                         (unsigned long long)root_dirid);
2923         }
2924
2925         while (1) {
2926                 cache = search_cache_extent(inode_cache, 0);
2927                 if (!cache)
2928                         break;
2929                 node = container_of(cache, struct ptr_node, cache);
2930                 rec = node->data;
2931                 remove_cache_extent(inode_cache, &node->cache);
2932                 free(node);
2933                 if (rec->ino == root_dirid ||
2934                     rec->ino == BTRFS_ORPHAN_OBJECTID) {
2935                         free_inode_rec(rec);
2936                         continue;
2937                 }
2938
2939                 if (rec->errors & I_ERR_NO_ORPHAN_ITEM) {
2940                         ret = check_orphan_item(root, rec->ino);
2941                         if (ret == 0)
2942                                 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
2943                         if (can_free_inode_rec(rec)) {
2944                                 free_inode_rec(rec);
2945                                 continue;
2946                         }
2947                 }
2948
2949                 if (!rec->found_inode_item)
2950                         rec->errors |= I_ERR_NO_INODE_ITEM;
2951                 if (rec->found_link != rec->nlink)
2952                         rec->errors |= I_ERR_LINK_COUNT_WRONG;
2953                 if (repair) {
2954                         ret = try_repair_inode(root, rec);
2955                         if (ret == 0 && can_free_inode_rec(rec)) {
2956                                 free_inode_rec(rec);
2957                                 continue;
2958                         }
2959                         ret = 0;
2960                 }
2961
2962                 if (!(repair && ret == 0))
2963                         error++;
2964                 print_inode_error(root, rec);
2965                 list_for_each_entry(backref, &rec->backrefs, list) {
2966                         if (!backref->found_dir_item)
2967                                 backref->errors |= REF_ERR_NO_DIR_ITEM;
2968                         if (!backref->found_dir_index)
2969                                 backref->errors |= REF_ERR_NO_DIR_INDEX;
2970                         if (!backref->found_inode_ref)
2971                                 backref->errors |= REF_ERR_NO_INODE_REF;
2972                         fprintf(stderr, "\tunresolved ref dir %llu index %llu"
2973                                 " namelen %u name %s filetype %d errors %x",
2974                                 (unsigned long long)backref->dir,
2975                                 (unsigned long long)backref->index,
2976                                 backref->namelen, backref->name,
2977                                 backref->filetype, backref->errors);
2978                         print_ref_error(backref->errors);
2979                 }
2980                 free_inode_rec(rec);
2981         }
2982         return (error > 0) ? -1 : 0;
2983 }
2984
2985 static struct root_record *get_root_rec(struct cache_tree *root_cache,
2986                                         u64 objectid)
2987 {
2988         struct cache_extent *cache;
2989         struct root_record *rec = NULL;
2990         int ret;
2991
2992         cache = lookup_cache_extent(root_cache, objectid, 1);
2993         if (cache) {
2994                 rec = container_of(cache, struct root_record, cache);
2995         } else {
2996                 rec = calloc(1, sizeof(*rec));
2997                 rec->objectid = objectid;
2998                 INIT_LIST_HEAD(&rec->backrefs);
2999                 rec->cache.start = objectid;
3000                 rec->cache.size = 1;
3001
3002                 ret = insert_cache_extent(root_cache, &rec->cache);
3003                 BUG_ON(ret);
3004         }
3005         return rec;
3006 }
3007
3008 static struct root_backref *get_root_backref(struct root_record *rec,
3009                                              u64 ref_root, u64 dir, u64 index,
3010                                              const char *name, int namelen)
3011 {
3012         struct root_backref *backref;
3013
3014         list_for_each_entry(backref, &rec->backrefs, list) {
3015                 if (backref->ref_root != ref_root || backref->dir != dir ||
3016                     backref->namelen != namelen)
3017                         continue;
3018                 if (memcmp(name, backref->name, namelen))
3019                         continue;
3020                 return backref;
3021         }
3022
3023         backref = calloc(1, sizeof(*backref) + namelen + 1);
3024         backref->ref_root = ref_root;
3025         backref->dir = dir;
3026         backref->index = index;
3027         backref->namelen = namelen;
3028         memcpy(backref->name, name, namelen);
3029         backref->name[namelen] = '\0';
3030         list_add_tail(&backref->list, &rec->backrefs);
3031         return backref;
3032 }
3033
3034 static void free_root_record(struct cache_extent *cache)
3035 {
3036         struct root_record *rec;
3037         struct root_backref *backref;
3038
3039         rec = container_of(cache, struct root_record, cache);
3040         while (!list_empty(&rec->backrefs)) {
3041                 backref = list_entry(rec->backrefs.next,
3042                                      struct root_backref, list);
3043                 list_del(&backref->list);
3044                 free(backref);
3045         }
3046
3047         kfree(rec);
3048 }
3049
3050 FREE_EXTENT_CACHE_BASED_TREE(root_recs, free_root_record);
3051
3052 static int add_root_backref(struct cache_tree *root_cache,
3053                             u64 root_id, u64 ref_root, u64 dir, u64 index,
3054                             const char *name, int namelen,
3055                             int item_type, int errors)
3056 {
3057         struct root_record *rec;
3058         struct root_backref *backref;
3059
3060         rec = get_root_rec(root_cache, root_id);
3061         backref = get_root_backref(rec, ref_root, dir, index, name, namelen);
3062
3063         backref->errors |= errors;
3064
3065         if (item_type != BTRFS_DIR_ITEM_KEY) {
3066                 if (backref->found_dir_index || backref->found_back_ref ||
3067                     backref->found_forward_ref) {
3068                         if (backref->index != index)
3069                                 backref->errors |= REF_ERR_INDEX_UNMATCH;
3070                 } else {
3071                         backref->index = index;
3072                 }
3073         }
3074
3075         if (item_type == BTRFS_DIR_ITEM_KEY) {
3076                 if (backref->found_forward_ref)
3077                         rec->found_ref++;
3078                 backref->found_dir_item = 1;
3079         } else if (item_type == BTRFS_DIR_INDEX_KEY) {
3080                 backref->found_dir_index = 1;
3081         } else if (item_type == BTRFS_ROOT_REF_KEY) {
3082                 if (backref->found_forward_ref)
3083                         backref->errors |= REF_ERR_DUP_ROOT_REF;
3084                 else if (backref->found_dir_item)
3085                         rec->found_ref++;
3086                 backref->found_forward_ref = 1;
3087         } else if (item_type == BTRFS_ROOT_BACKREF_KEY) {
3088                 if (backref->found_back_ref)
3089                         backref->errors |= REF_ERR_DUP_ROOT_BACKREF;
3090                 backref->found_back_ref = 1;
3091         } else {
3092                 BUG_ON(1);
3093         }
3094
3095         if (backref->found_forward_ref && backref->found_dir_item)
3096                 backref->reachable = 1;
3097         return 0;
3098 }
3099
3100 static int merge_root_recs(struct btrfs_root *root,
3101                            struct cache_tree *src_cache,
3102                            struct cache_tree *dst_cache)
3103 {
3104         struct cache_extent *cache;
3105         struct ptr_node *node;
3106         struct inode_record *rec;
3107         struct inode_backref *backref;
3108         int ret = 0;
3109
3110         if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3111                 free_inode_recs_tree(src_cache);
3112                 return 0;
3113         }
3114
3115         while (1) {
3116                 cache = search_cache_extent(src_cache, 0);
3117                 if (!cache)
3118                         break;
3119                 node = container_of(cache, struct ptr_node, cache);
3120                 rec = node->data;
3121                 remove_cache_extent(src_cache, &node->cache);
3122                 free(node);
3123
3124                 ret = is_child_root(root, root->objectid, rec->ino);
3125                 if (ret < 0)
3126                         break;
3127                 else if (ret == 0)
3128                         goto skip;
3129
3130                 list_for_each_entry(backref, &rec->backrefs, list) {
3131                         BUG_ON(backref->found_inode_ref);
3132                         if (backref->found_dir_item)
3133                                 add_root_backref(dst_cache, rec->ino,
3134                                         root->root_key.objectid, backref->dir,
3135                                         backref->index, backref->name,
3136                                         backref->namelen, BTRFS_DIR_ITEM_KEY,
3137                                         backref->errors);
3138                         if (backref->found_dir_index)
3139                                 add_root_backref(dst_cache, rec->ino,
3140                                         root->root_key.objectid, backref->dir,
3141                                         backref->index, backref->name,
3142                                         backref->namelen, BTRFS_DIR_INDEX_KEY,
3143                                         backref->errors);
3144                 }
3145 skip:
3146                 free_inode_rec(rec);
3147         }
3148         if (ret < 0)
3149                 return ret;
3150         return 0;
3151 }
3152
3153 static int check_root_refs(struct btrfs_root *root,
3154                            struct cache_tree *root_cache)
3155 {
3156         struct root_record *rec;
3157         struct root_record *ref_root;
3158         struct root_backref *backref;
3159         struct cache_extent *cache;
3160         int loop = 1;
3161         int ret;
3162         int error;
3163         int errors = 0;
3164
3165         rec = get_root_rec(root_cache, BTRFS_FS_TREE_OBJECTID);
3166         rec->found_ref = 1;
3167
3168         /* fixme: this can not detect circular references */
3169         while (loop) {
3170                 loop = 0;
3171                 cache = search_cache_extent(root_cache, 0);
3172                 while (1) {
3173                         if (!cache)
3174                                 break;
3175                         rec = container_of(cache, struct root_record, cache);
3176                         cache = next_cache_extent(cache);
3177
3178                         if (rec->found_ref == 0)
3179                                 continue;
3180
3181                         list_for_each_entry(backref, &rec->backrefs, list) {
3182                                 if (!backref->reachable)
3183                                         continue;
3184
3185                                 ref_root = get_root_rec(root_cache,
3186                                                         backref->ref_root);
3187                                 if (ref_root->found_ref > 0)
3188                                         continue;
3189
3190                                 backref->reachable = 0;
3191                                 rec->found_ref--;
3192                                 if (rec->found_ref == 0)
3193                                         loop = 1;
3194                         }
3195                 }
3196         }
3197
3198         cache = search_cache_extent(root_cache, 0);
3199         while (1) {
3200                 if (!cache)
3201                         break;
3202                 rec = container_of(cache, struct root_record, cache);
3203                 cache = next_cache_extent(cache);
3204
3205                 if (rec->found_ref == 0 &&
3206                     rec->objectid >= BTRFS_FIRST_FREE_OBJECTID &&
3207                     rec->objectid <= BTRFS_LAST_FREE_OBJECTID) {
3208                         ret = check_orphan_item(root->fs_info->tree_root,
3209                                                 rec->objectid);
3210                         if (ret == 0)
3211                                 continue;
3212
3213                         /*
3214                          * If we don't have a root item then we likely just have
3215                          * a dir item in a snapshot for this root but no actual
3216                          * ref key or anything so it's meaningless.
3217                          */
3218                         if (!rec->found_root_item)
3219                                 continue;
3220                         errors++;
3221                         fprintf(stderr, "fs tree %llu not referenced\n",
3222                                 (unsigned long long)rec->objectid);
3223                 }
3224
3225                 error = 0;
3226                 if (rec->found_ref > 0 && !rec->found_root_item)
3227                         error = 1;
3228                 list_for_each_entry(backref, &rec->backrefs, list) {
3229                         if (!backref->found_dir_item)
3230                                 backref->errors |= REF_ERR_NO_DIR_ITEM;
3231                         if (!backref->found_dir_index)
3232                                 backref->errors |= REF_ERR_NO_DIR_INDEX;
3233                         if (!backref->found_back_ref)
3234                                 backref->errors |= REF_ERR_NO_ROOT_BACKREF;
3235                         if (!backref->found_forward_ref)
3236                                 backref->errors |= REF_ERR_NO_ROOT_REF;
3237                         if (backref->reachable && backref->errors)
3238                                 error = 1;
3239                 }
3240                 if (!error)
3241                         continue;
3242
3243                 errors++;
3244                 fprintf(stderr, "fs tree %llu refs %u %s\n",
3245                         (unsigned long long)rec->objectid, rec->found_ref,
3246                          rec->found_root_item ? "" : "not found");
3247
3248                 list_for_each_entry(backref, &rec->backrefs, list) {
3249                         if (!backref->reachable)
3250                                 continue;
3251                         if (!backref->errors && rec->found_root_item)
3252                                 continue;
3253                         fprintf(stderr, "\tunresolved ref root %llu dir %llu"
3254                                 " index %llu namelen %u name %s errors %x\n",
3255                                 (unsigned long long)backref->ref_root,
3256                                 (unsigned long long)backref->dir,
3257                                 (unsigned long long)backref->index,
3258                                 backref->namelen, backref->name,
3259                                 backref->errors);
3260                         print_ref_error(backref->errors);
3261                 }
3262         }
3263         return errors > 0 ? 1 : 0;
3264 }
3265
3266 static int process_root_ref(struct extent_buffer *eb, int slot,
3267                             struct btrfs_key *key,
3268                             struct cache_tree *root_cache)
3269 {
3270         u64 dirid;
3271         u64 index;
3272         u32 len;
3273         u32 name_len;
3274         struct btrfs_root_ref *ref;
3275         char namebuf[BTRFS_NAME_LEN];
3276         int error;
3277
3278         ref = btrfs_item_ptr(eb, slot, struct btrfs_root_ref);
3279
3280         dirid = btrfs_root_ref_dirid(eb, ref);
3281         index = btrfs_root_ref_sequence(eb, ref);
3282         name_len = btrfs_root_ref_name_len(eb, ref);
3283
3284         if (name_len <= BTRFS_NAME_LEN) {
3285                 len = name_len;
3286                 error = 0;
3287         } else {
3288                 len = BTRFS_NAME_LEN;
3289                 error = REF_ERR_NAME_TOO_LONG;
3290         }
3291         read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
3292
3293         if (key->type == BTRFS_ROOT_REF_KEY) {
3294                 add_root_backref(root_cache, key->offset, key->objectid, dirid,
3295                                  index, namebuf, len, key->type, error);
3296         } else {
3297                 add_root_backref(root_cache, key->objectid, key->offset, dirid,
3298                                  index, namebuf, len, key->type, error);
3299         }
3300         return 0;
3301 }
3302
3303 static void free_corrupt_block(struct cache_extent *cache)
3304 {
3305         struct btrfs_corrupt_block *corrupt;
3306
3307         corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
3308         free(corrupt);
3309 }
3310
3311 FREE_EXTENT_CACHE_BASED_TREE(corrupt_blocks, free_corrupt_block);
3312
3313 /*
3314  * Repair the btree of the given root.
3315  *
3316  * The fix is to remove the node key in corrupt_blocks cache_tree.
3317  * and rebalance the tree.
3318  * After the fix, the btree should be writeable.
3319  */
3320 static int repair_btree(struct btrfs_root *root,
3321                         struct cache_tree *corrupt_blocks)
3322 {
3323         struct btrfs_trans_handle *trans;
3324         struct btrfs_path *path;
3325         struct btrfs_corrupt_block *corrupt;
3326         struct cache_extent *cache;
3327         struct btrfs_key key;
3328         u64 offset;
3329         int level;
3330         int ret = 0;
3331
3332         if (cache_tree_empty(corrupt_blocks))
3333                 return 0;
3334
3335         path = btrfs_alloc_path();
3336         if (!path)
3337                 return -ENOMEM;
3338
3339         trans = btrfs_start_transaction(root, 1);
3340         if (IS_ERR(trans)) {
3341                 ret = PTR_ERR(trans);
3342                 fprintf(stderr, "Error starting transaction: %s\n",
3343                         strerror(-ret));
3344                 goto out_free_path;
3345         }
3346         cache = first_cache_extent(corrupt_blocks);
3347         while (cache) {
3348                 corrupt = container_of(cache, struct btrfs_corrupt_block,
3349                                        cache);
3350                 level = corrupt->level;
3351                 path->lowest_level = level;
3352                 key.objectid = corrupt->key.objectid;
3353                 key.type = corrupt->key.type;
3354                 key.offset = corrupt->key.offset;
3355
3356                 /*
3357                  * Here we don't want to do any tree balance, since it may
3358                  * cause a balance with corrupted brother leaf/node,
3359                  * so ins_len set to 0 here.
3360                  * Balance will be done after all corrupt node/leaf is deleted.
3361                  */
3362                 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
3363                 if (ret < 0)
3364                         goto out;
3365                 offset = btrfs_node_blockptr(path->nodes[level],
3366                                              path->slots[level]);
3367
3368                 /* Remove the ptr */
3369                 ret = btrfs_del_ptr(trans, root, path, level,
3370                                     path->slots[level]);
3371                 if (ret < 0)
3372                         goto out;
3373                 /*
3374                  * Remove the corresponding extent
3375                  * return value is not concerned.
3376                  */
3377                 btrfs_release_path(path);
3378                 ret = btrfs_free_extent(trans, root, offset, root->nodesize,
3379                                         0, root->root_key.objectid,
3380                                         level - 1, 0);
3381                 cache = next_cache_extent(cache);
3382         }
3383
3384         /* Balance the btree using btrfs_search_slot() */
3385         cache = first_cache_extent(corrupt_blocks);
3386         while (cache) {
3387                 corrupt = container_of(cache, struct btrfs_corrupt_block,
3388                                        cache);
3389                 memcpy(&key, &corrupt->key, sizeof(key));
3390                 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
3391                 if (ret < 0)
3392                         goto out;
3393                 /* return will always >0 since it won't find the item */
3394                 ret = 0;
3395                 btrfs_release_path(path);
3396                 cache = next_cache_extent(cache);
3397         }
3398 out:
3399         btrfs_commit_transaction(trans, root);
3400 out_free_path:
3401         btrfs_free_path(path);
3402         return ret;
3403 }
3404
3405 static int check_fs_root(struct btrfs_root *root,
3406                          struct cache_tree *root_cache,
3407                          struct walk_control *wc)
3408 {
3409         int ret = 0;
3410         int err = 0;
3411         int wret;
3412         int level;
3413         struct btrfs_path path;
3414         struct shared_node root_node;
3415         struct root_record *rec;
3416         struct btrfs_root_item *root_item = &root->root_item;
3417         struct cache_tree corrupt_blocks;
3418         struct orphan_data_extent *orphan;
3419         struct orphan_data_extent *tmp;
3420         enum btrfs_tree_block_status status;
3421
3422         /*
3423          * Reuse the corrupt_block cache tree to record corrupted tree block
3424          *
3425          * Unlike the usage in extent tree check, here we do it in a per
3426          * fs/subvol tree base.
3427          */
3428         cache_tree_init(&corrupt_blocks);
3429         root->fs_info->corrupt_blocks = &corrupt_blocks;
3430
3431         if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
3432                 rec = get_root_rec(root_cache, root->root_key.objectid);
3433                 if (btrfs_root_refs(root_item) > 0)
3434                         rec->found_root_item = 1;
3435         }
3436
3437         btrfs_init_path(&path);
3438         memset(&root_node, 0, sizeof(root_node));
3439         cache_tree_init(&root_node.root_cache);
3440         cache_tree_init(&root_node.inode_cache);
3441
3442         /* Move the orphan extent record to corresponding inode_record */
3443         list_for_each_entry_safe(orphan, tmp,
3444                                  &root->orphan_data_extents, list) {
3445                 struct inode_record *inode;
3446
3447                 inode = get_inode_rec(&root_node.inode_cache, orphan->objectid,
3448                                       1);
3449                 inode->errors |= I_ERR_FILE_EXTENT_ORPHAN;
3450                 list_move(&orphan->list, &inode->orphan_extents);
3451         }
3452
3453         level = btrfs_header_level(root->node);
3454         memset(wc->nodes, 0, sizeof(wc->nodes));
3455         wc->nodes[level] = &root_node;
3456         wc->active_node = level;
3457         wc->root_level = level;
3458
3459         /* We may not have checked the root block, lets do that now */
3460         if (btrfs_is_leaf(root->node))
3461                 status = btrfs_check_leaf(root, NULL, root->node);
3462         else
3463                 status = btrfs_check_node(root, NULL, root->node);
3464         if (status != BTRFS_TREE_BLOCK_CLEAN)
3465                 return -EIO;
3466
3467         if (btrfs_root_refs(root_item) > 0 ||
3468             btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
3469                 path.nodes[level] = root->node;
3470                 extent_buffer_get(root->node);
3471                 path.slots[level] = 0;
3472         } else {
3473                 struct btrfs_key key;
3474                 struct btrfs_disk_key found_key;
3475
3476                 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
3477                 level = root_item->drop_level;
3478                 path.lowest_level = level;
3479                 wret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
3480                 if (wret < 0)
3481                         goto skip_walking;
3482                 btrfs_node_key(path.nodes[level], &found_key,
3483                                 path.slots[level]);
3484                 WARN_ON(memcmp(&found_key, &root_item->drop_progress,
3485                                         sizeof(found_key)));
3486         }
3487
3488         while (1) {
3489                 wret = walk_down_tree(root, &path, wc, &level);
3490                 if (wret < 0)
3491                         ret = wret;
3492                 if (wret != 0)
3493                         break;
3494
3495                 wret = walk_up_tree(root, &path, wc, &level);
3496                 if (wret < 0)
3497                         ret = wret;
3498                 if (wret != 0)
3499                         break;
3500         }
3501 skip_walking:
3502         btrfs_release_path(&path);
3503
3504         if (!cache_tree_empty(&corrupt_blocks)) {
3505                 struct cache_extent *cache;
3506                 struct btrfs_corrupt_block *corrupt;
3507
3508                 printf("The following tree block(s) is corrupted in tree %llu:\n",
3509                        root->root_key.objectid);
3510                 cache = first_cache_extent(&corrupt_blocks);
3511                 while (cache) {
3512                         corrupt = container_of(cache,
3513                                                struct btrfs_corrupt_block,
3514                                                cache);
3515                         printf("\ttree block bytenr: %llu, level: %d, node key: (%llu, %u, %llu)\n",
3516                                cache->start, corrupt->level,
3517                                corrupt->key.objectid, corrupt->key.type,
3518                                corrupt->key.offset);
3519                         cache = next_cache_extent(cache);
3520                 }
3521                 if (repair) {
3522                         printf("Try to repair the btree for root %llu\n",
3523                                root->root_key.objectid);
3524                         ret = repair_btree(root, &corrupt_blocks);
3525                         if (ret < 0)
3526                                 fprintf(stderr, "Failed to repair btree: %s\n",
3527                                         strerror(-ret));
3528                         if (!ret)
3529                                 printf("Btree for root %llu is fixed\n",
3530                                        root->root_key.objectid);
3531                 }
3532         }
3533
3534         err = merge_root_recs(root, &root_node.root_cache, root_cache);
3535         if (err < 0)
3536                 ret = err;
3537
3538         if (root_node.current) {
3539                 root_node.current->checked = 1;
3540                 maybe_free_inode_rec(&root_node.inode_cache,
3541                                 root_node.current);
3542         }
3543
3544         err = check_inode_recs(root, &root_node.inode_cache);
3545         if (!ret)
3546                 ret = err;
3547
3548         free_corrupt_blocks_tree(&corrupt_blocks);
3549         root->fs_info->corrupt_blocks = NULL;
3550         free_orphan_data_extents(&root->orphan_data_extents);
3551         return ret;
3552 }
3553
3554 static int fs_root_objectid(u64 objectid)
3555 {
3556         if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
3557             objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
3558                 return 1;
3559         return is_fstree(objectid);
3560 }
3561
3562 static int check_fs_roots(struct btrfs_root *root,
3563                           struct cache_tree *root_cache)
3564 {
3565         struct btrfs_path path;
3566         struct btrfs_key key;
3567         struct walk_control wc;
3568         struct extent_buffer *leaf, *tree_node;
3569         struct btrfs_root *tmp_root;
3570         struct btrfs_root *tree_root = root->fs_info->tree_root;
3571         int ret;
3572         int err = 0;
3573
3574         if (ctx.progress_enabled) {
3575                 ctx.tp = TASK_FS_ROOTS;
3576                 task_start(ctx.info);
3577         }
3578
3579         /*
3580          * Just in case we made any changes to the extent tree that weren't
3581          * reflected into the free space cache yet.
3582          */
3583         if (repair)
3584                 reset_cached_block_groups(root->fs_info);
3585         memset(&wc, 0, sizeof(wc));
3586         cache_tree_init(&wc.shared);
3587         btrfs_init_path(&path);
3588
3589 again:
3590         key.offset = 0;
3591         key.objectid = 0;
3592         key.type = BTRFS_ROOT_ITEM_KEY;
3593         ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
3594         if (ret < 0) {
3595                 err = 1;
3596                 goto out;
3597         }
3598         tree_node = tree_root->node;
3599         while (1) {
3600                 if (tree_node != tree_root->node) {
3601                         free_root_recs_tree(root_cache);
3602                         btrfs_release_path(&path);
3603                         goto again;
3604                 }
3605                 leaf = path.nodes[0];
3606                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
3607                         ret = btrfs_next_leaf(tree_root, &path);
3608                         if (ret) {
3609                                 if (ret < 0)
3610                                         err = 1;
3611                                 break;
3612                         }
3613                         leaf = path.nodes[0];
3614                 }
3615                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
3616                 if (key.type == BTRFS_ROOT_ITEM_KEY &&
3617                     fs_root_objectid(key.objectid)) {
3618                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3619                                 tmp_root = btrfs_read_fs_root_no_cache(
3620                                                 root->fs_info, &key);
3621                         } else {
3622                                 key.offset = (u64)-1;
3623                                 tmp_root = btrfs_read_fs_root(
3624                                                 root->fs_info, &key);
3625                         }
3626                         if (IS_ERR(tmp_root)) {
3627                                 err = 1;
3628                                 goto next;
3629                         }
3630                         ret = check_fs_root(tmp_root, root_cache, &wc);
3631                         if (ret == -EAGAIN) {
3632                                 free_root_recs_tree(root_cache);
3633                                 btrfs_release_path(&path);
3634                                 goto again;
3635                         }
3636                         if (ret)
3637                                 err = 1;
3638                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
3639                                 btrfs_free_fs_root(tmp_root);
3640                 } else if (key.type == BTRFS_ROOT_REF_KEY ||
3641                            key.type == BTRFS_ROOT_BACKREF_KEY) {
3642                         process_root_ref(leaf, path.slots[0], &key,
3643                                          root_cache);
3644                 }
3645 next:
3646                 path.slots[0]++;
3647         }
3648 out:
3649         btrfs_release_path(&path);
3650         if (err)
3651                 free_extent_cache_tree(&wc.shared);
3652         if (!cache_tree_empty(&wc.shared))
3653                 fprintf(stderr, "warning line %d\n", __LINE__);
3654
3655         task_stop(ctx.info);
3656
3657         return err;
3658 }
3659
3660 static int all_backpointers_checked(struct extent_record *rec, int print_errs)
3661 {
3662         struct list_head *cur = rec->backrefs.next;
3663         struct extent_backref *back;
3664         struct tree_backref *tback;
3665         struct data_backref *dback;
3666         u64 found = 0;
3667         int err = 0;
3668
3669         while(cur != &rec->backrefs) {
3670                 back = list_entry(cur, struct extent_backref, list);
3671                 cur = cur->next;
3672                 if (!back->found_extent_tree) {
3673                         err = 1;
3674                         if (!print_errs)
3675                                 goto out;
3676                         if (back->is_data) {
3677                                 dback = (struct data_backref *)back;
3678                                 fprintf(stderr, "Backref %llu %s %llu"
3679                                         " owner %llu offset %llu num_refs %lu"
3680                                         " not found in extent tree\n",
3681                                         (unsigned long long)rec->start,
3682                                         back->full_backref ?
3683                                         "parent" : "root",
3684                                         back->full_backref ?
3685                                         (unsigned long long)dback->parent:
3686                                         (unsigned long long)dback->root,
3687                                         (unsigned long long)dback->owner,
3688                                         (unsigned long long)dback->offset,
3689                                         (unsigned long)dback->num_refs);
3690                         } else {
3691                                 tback = (struct tree_backref *)back;
3692                                 fprintf(stderr, "Backref %llu parent %llu"
3693                                         " root %llu not found in extent tree\n",
3694                                         (unsigned long long)rec->start,
3695                                         (unsigned long long)tback->parent,
3696                                         (unsigned long long)tback->root);
3697                         }
3698                 }
3699                 if (!back->is_data && !back->found_ref) {
3700                         err = 1;
3701                         if (!print_errs)
3702                                 goto out;
3703                         tback = (struct tree_backref *)back;
3704                         fprintf(stderr, "Backref %llu %s %llu not referenced back %p\n",
3705                                 (unsigned long long)rec->start,
3706                                 back->full_backref ? "parent" : "root",
3707                                 back->full_backref ?
3708                                 (unsigned long long)tback->parent :
3709                                 (unsigned long long)tback->root, back);
3710                 }
3711                 if (back->is_data) {
3712                         dback = (struct data_backref *)back;
3713                         if (dback->found_ref != dback->num_refs) {
3714                                 err = 1;
3715                                 if (!print_errs)
3716                                         goto out;
3717                                 fprintf(stderr, "Incorrect local backref count"
3718                                         " on %llu %s %llu owner %llu"
3719                                         " offset %llu found %u wanted %u back %p\n",
3720                                         (unsigned long long)rec->start,
3721                                         back->full_backref ?
3722                                         "parent" : "root",
3723                                         back->full_backref ?
3724                                         (unsigned long long)dback->parent:
3725                                         (unsigned long long)dback->root,
3726                                         (unsigned long long)dback->owner,
3727                                         (unsigned long long)dback->offset,
3728                                         dback->found_ref, dback->num_refs, back);
3729                         }
3730                         if (dback->disk_bytenr != rec->start) {
3731                                 err = 1;
3732                                 if (!print_errs)
3733                                         goto out;
3734                                 fprintf(stderr, "Backref disk bytenr does not"
3735                                         " match extent record, bytenr=%llu, "
3736                                         "ref bytenr=%llu\n",
3737                                         (unsigned long long)rec->start,
3738                                         (unsigned long long)dback->disk_bytenr);
3739                         }
3740
3741                         if (dback->bytes != rec->nr) {
3742                                 err = 1;
3743                                 if (!print_errs)
3744                                         goto out;
3745                                 fprintf(stderr, "Backref bytes do not match "
3746                                         "extent backref, bytenr=%llu, ref "
3747                                         "bytes=%llu, backref bytes=%llu\n",
3748                                         (unsigned long long)rec->start,
3749                                         (unsigned long long)rec->nr,
3750                                         (unsigned long long)dback->bytes);
3751                         }
3752                 }
3753                 if (!back->is_data) {
3754                         found += 1;
3755                 } else {
3756                         dback = (struct data_backref *)back;
3757                         found += dback->found_ref;
3758                 }
3759         }
3760         if (found != rec->refs) {
3761                 err = 1;
3762                 if (!print_errs)
3763                         goto out;
3764                 fprintf(stderr, "Incorrect global backref count "
3765                         "on %llu found %llu wanted %llu\n",
3766                         (unsigned long long)rec->start,
3767                         (unsigned long long)found,
3768                         (unsigned long long)rec->refs);
3769         }
3770 out:
3771         return err;
3772 }
3773
3774 static int free_all_extent_backrefs(struct extent_record *rec)
3775 {
3776         struct extent_backref *back;
3777         struct list_head *cur;
3778         while (!list_empty(&rec->backrefs)) {
3779                 cur = rec->backrefs.next;
3780                 back = list_entry(cur, struct extent_backref, list);
3781                 list_del(cur);
3782                 free(back);
3783         }
3784         return 0;
3785 }
3786
3787 static void free_extent_record_cache(struct btrfs_fs_info *fs_info,
3788                                      struct cache_tree *extent_cache)
3789 {
3790         struct cache_extent *cache;
3791         struct extent_record *rec;
3792
3793         while (1) {
3794                 cache = first_cache_extent(extent_cache);
3795                 if (!cache)
3796                         break;
3797                 rec = container_of(cache, struct extent_record, cache);
3798                 remove_cache_extent(extent_cache, cache);
3799                 free_all_extent_backrefs(rec);
3800                 free(rec);
3801         }
3802 }
3803
3804 static int maybe_free_extent_rec(struct cache_tree *extent_cache,
3805                                  struct extent_record *rec)
3806 {
3807         if (rec->content_checked && rec->owner_ref_checked &&
3808             rec->extent_item_refs == rec->refs && rec->refs > 0 &&
3809             rec->num_duplicates == 0 && !all_backpointers_checked(rec, 0) &&
3810             !rec->bad_full_backref && !rec->crossing_stripes &&
3811             !rec->wrong_chunk_type) {
3812                 remove_cache_extent(extent_cache, &rec->cache);
3813                 free_all_extent_backrefs(rec);
3814                 list_del_init(&rec->list);
3815                 free(rec);
3816         }
3817         return 0;
3818 }
3819
3820 static int check_owner_ref(struct btrfs_root *root,
3821                             struct extent_record *rec,
3822                             struct extent_buffer *buf)
3823 {
3824         struct extent_backref *node;
3825         struct tree_backref *back;
3826         struct btrfs_root *ref_root;
3827         struct btrfs_key key;
3828         struct btrfs_path path;
3829         struct extent_buffer *parent;
3830         int level;
3831         int found = 0;
3832         int ret;
3833
3834         list_for_each_entry(node, &rec->backrefs, list) {
3835                 if (node->is_data)
3836                         continue;
3837                 if (!node->found_ref)
3838                         continue;
3839                 if (node->full_backref)
3840                         continue;
3841                 back = (struct tree_backref *)node;
3842                 if (btrfs_header_owner(buf) == back->root)
3843                         return 0;
3844         }
3845         BUG_ON(rec->is_root);
3846
3847         /* try to find the block by search corresponding fs tree */
3848         key.objectid = btrfs_header_owner(buf);
3849         key.type = BTRFS_ROOT_ITEM_KEY;
3850         key.offset = (u64)-1;
3851
3852         ref_root = btrfs_read_fs_root(root->fs_info, &key);
3853         if (IS_ERR(ref_root))
3854                 return 1;
3855
3856         level = btrfs_header_level(buf);
3857         if (level == 0)
3858                 btrfs_item_key_to_cpu(buf, &key, 0);
3859         else
3860                 btrfs_node_key_to_cpu(buf, &key, 0);
3861
3862         btrfs_init_path(&path);
3863         path.lowest_level = level + 1;
3864         ret = btrfs_search_slot(NULL, ref_root, &key, &path, 0, 0);
3865         if (ret < 0)
3866                 return 0;
3867
3868         parent = path.nodes[level + 1];
3869         if (parent && buf->start == btrfs_node_blockptr(parent,
3870                                                         path.slots[level + 1]))
3871                 found = 1;
3872
3873         btrfs_release_path(&path);
3874         return found ? 0 : 1;
3875 }
3876
3877 static int is_extent_tree_record(struct extent_record *rec)
3878 {
3879         struct list_head *cur = rec->backrefs.next;
3880         struct extent_backref *node;
3881         struct tree_backref *back;
3882         int is_extent = 0;
3883
3884         while(cur != &rec->backrefs) {
3885                 node = list_entry(cur, struct extent_backref, list);
3886                 cur = cur->next;
3887                 if (node->is_data)
3888                         return 0;
3889                 back = (struct tree_backref *)node;
3890                 if (node->full_backref)
3891                         return 0;
3892                 if (back->root == BTRFS_EXTENT_TREE_OBJECTID)
3893                         is_extent = 1;
3894         }
3895         return is_extent;
3896 }
3897
3898
3899 static int record_bad_block_io(struct btrfs_fs_info *info,
3900                                struct cache_tree *extent_cache,
3901                                u64 start, u64 len)
3902 {
3903         struct extent_record *rec;
3904         struct cache_extent *cache;
3905         struct btrfs_key key;
3906
3907         cache = lookup_cache_extent(extent_cache, start, len);
3908         if (!cache)
3909                 return 0;
3910
3911         rec = container_of(cache, struct extent_record, cache);
3912         if (!is_extent_tree_record(rec))
3913                 return 0;
3914
3915         btrfs_disk_key_to_cpu(&key, &rec->parent_key);
3916         return btrfs_add_corrupt_extent_record(info, &key, start, len, 0);
3917 }
3918
3919 static int swap_values(struct btrfs_root *root, struct btrfs_path *path,
3920                        struct extent_buffer *buf, int slot)
3921 {
3922         if (btrfs_header_level(buf)) {
3923                 struct btrfs_key_ptr ptr1, ptr2;
3924
3925                 read_extent_buffer(buf, &ptr1, btrfs_node_key_ptr_offset(slot),
3926                                    sizeof(struct btrfs_key_ptr));
3927                 read_extent_buffer(buf, &ptr2,
3928                                    btrfs_node_key_ptr_offset(slot + 1),
3929                                    sizeof(struct btrfs_key_ptr));
3930                 write_extent_buffer(buf, &ptr1,
3931                                     btrfs_node_key_ptr_offset(slot + 1),
3932                                     sizeof(struct btrfs_key_ptr));
3933                 write_extent_buffer(buf, &ptr2,
3934                                     btrfs_node_key_ptr_offset(slot),
3935                                     sizeof(struct btrfs_key_ptr));
3936                 if (slot == 0) {
3937                         struct btrfs_disk_key key;
3938                         btrfs_node_key(buf, &key, 0);
3939                         btrfs_fixup_low_keys(root, path, &key,
3940                                              btrfs_header_level(buf) + 1);
3941                 }
3942         } else {
3943                 struct btrfs_item *item1, *item2;
3944                 struct btrfs_key k1, k2;
3945                 char *item1_data, *item2_data;
3946                 u32 item1_offset, item2_offset, item1_size, item2_size;
3947
3948                 item1 = btrfs_item_nr(slot);
3949                 item2 = btrfs_item_nr(slot + 1);
3950                 btrfs_item_key_to_cpu(buf, &k1, slot);
3951                 btrfs_item_key_to_cpu(buf, &k2, slot + 1);
3952                 item1_offset = btrfs_item_offset(buf, item1);
3953                 item2_offset = btrfs_item_offset(buf, item2);
3954                 item1_size = btrfs_item_size(buf, item1);
3955                 item2_size = btrfs_item_size(buf, item2);
3956
3957                 item1_data = malloc(item1_size);
3958                 if (!item1_data)
3959                         return -ENOMEM;
3960                 item2_data = malloc(item2_size);
3961                 if (!item2_data) {
3962                         free(item1_data);
3963                         return -ENOMEM;
3964                 }
3965
3966                 read_extent_buffer(buf, item1_data, item1_offset, item1_size);
3967                 read_extent_buffer(buf, item2_data, item2_offset, item2_size);
3968
3969                 write_extent_buffer(buf, item1_data, item2_offset, item2_size);
3970                 write_extent_buffer(buf, item2_data, item1_offset, item1_size);
3971                 free(item1_data);
3972                 free(item2_data);
3973
3974                 btrfs_set_item_offset(buf, item1, item2_offset);
3975                 btrfs_set_item_offset(buf, item2, item1_offset);
3976                 btrfs_set_item_size(buf, item1, item2_size);
3977                 btrfs_set_item_size(buf, item2, item1_size);
3978
3979                 path->slots[0] = slot;
3980                 btrfs_set_item_key_unsafe(root, path, &k2);
3981                 path->slots[0] = slot + 1;
3982                 btrfs_set_item_key_unsafe(root, path, &k1);
3983         }
3984         return 0;
3985 }
3986
3987 static int fix_key_order(struct btrfs_trans_handle *trans,
3988                          struct btrfs_root *root,
3989                          struct btrfs_path *path)
3990 {
3991         struct extent_buffer *buf;
3992         struct btrfs_key k1, k2;
3993         int i;
3994         int level = path->lowest_level;
3995         int ret = -EIO;
3996
3997         buf = path->nodes[level];
3998         for (i = 0; i < btrfs_header_nritems(buf) - 1; i++) {
3999                 if (level) {
4000                         btrfs_node_key_to_cpu(buf, &k1, i);
4001                         btrfs_node_key_to_cpu(buf, &k2, i + 1);
4002                 } else {
4003                         btrfs_item_key_to_cpu(buf, &k1, i);
4004                         btrfs_item_key_to_cpu(buf, &k2, i + 1);
4005                 }
4006                 if (btrfs_comp_cpu_keys(&k1, &k2) < 0)
4007                         continue;
4008                 ret = swap_values(root, path, buf, i);
4009                 if (ret)
4010                         break;
4011                 btrfs_mark_buffer_dirty(buf);
4012                 i = 0;
4013         }
4014         return ret;
4015 }
4016
4017 static int delete_bogus_item(struct btrfs_trans_handle *trans,
4018                              struct btrfs_root *root,
4019                              struct btrfs_path *path,
4020                              struct extent_buffer *buf, int slot)
4021 {
4022         struct btrfs_key key;
4023         int nritems = btrfs_header_nritems(buf);
4024
4025         btrfs_item_key_to_cpu(buf, &key, slot);
4026
4027         /* These are all the keys we can deal with missing. */
4028         if (key.type != BTRFS_DIR_INDEX_KEY &&
4029             key.type != BTRFS_EXTENT_ITEM_KEY &&
4030             key.type != BTRFS_METADATA_ITEM_KEY &&
4031             key.type != BTRFS_TREE_BLOCK_REF_KEY &&
4032             key.type != BTRFS_EXTENT_DATA_REF_KEY)
4033                 return -1;
4034
4035         printf("Deleting bogus item [%llu,%u,%llu] at slot %d on block %llu\n",
4036                (unsigned long long)key.objectid, key.type,
4037                (unsigned long long)key.offset, slot, buf->start);
4038         memmove_extent_buffer(buf, btrfs_item_nr_offset(slot),
4039                               btrfs_item_nr_offset(slot + 1),
4040                               sizeof(struct btrfs_item) *
4041                               (nritems - slot - 1));
4042         btrfs_set_header_nritems(buf, nritems - 1);
4043         if (slot == 0) {
4044                 struct btrfs_disk_key disk_key;
4045
4046                 btrfs_item_key(buf, &disk_key, 0);
4047                 btrfs_fixup_low_keys(root, path, &disk_key, 1);
4048         }
4049         btrfs_mark_buffer_dirty(buf);
4050         return 0;
4051 }
4052
4053 static int fix_item_offset(struct btrfs_trans_handle *trans,
4054                            struct btrfs_root *root,
4055                            struct btrfs_path *path)
4056 {
4057         struct extent_buffer *buf;
4058         int i;
4059         int ret = 0;
4060
4061         /* We should only get this for leaves */
4062         BUG_ON(path->lowest_level);
4063         buf = path->nodes[0];
4064 again:
4065         for (i = 0; i < btrfs_header_nritems(buf); i++) {
4066                 unsigned int shift = 0, offset;
4067
4068                 if (i == 0 && btrfs_item_end_nr(buf, i) !=
4069                     BTRFS_LEAF_DATA_SIZE(root)) {
4070                         if (btrfs_item_end_nr(buf, i) >
4071                             BTRFS_LEAF_DATA_SIZE(root)) {
4072                                 ret = delete_bogus_item(trans, root, path,
4073                                                         buf, i);
4074                                 if (!ret)
4075                                         goto again;
4076                                 fprintf(stderr, "item is off the end of the "
4077                                         "leaf, can't fix\n");
4078                                 ret = -EIO;
4079                                 break;
4080                         }
4081                         shift = BTRFS_LEAF_DATA_SIZE(root) -
4082                                 btrfs_item_end_nr(buf, i);
4083                 } else if (i > 0 && btrfs_item_end_nr(buf, i) !=
4084                            btrfs_item_offset_nr(buf, i - 1)) {
4085                         if (btrfs_item_end_nr(buf, i) >
4086                             btrfs_item_offset_nr(buf, i - 1)) {
4087                                 ret = delete_bogus_item(trans, root, path,
4088                                                         buf, i);
4089                                 if (!ret)
4090                                         goto again;
4091                                 fprintf(stderr, "items overlap, can't fix\n");
4092                                 ret = -EIO;
4093                                 break;
4094                         }
4095                         shift = btrfs_item_offset_nr(buf, i - 1) -
4096                                 btrfs_item_end_nr(buf, i);
4097                 }
4098                 if (!shift)
4099                         continue;
4100
4101                 printf("Shifting item nr %d by %u bytes in block %llu\n",
4102                        i, shift, (unsigned long long)buf->start);
4103                 offset = btrfs_item_offset_nr(buf, i);
4104                 memmove_extent_buffer(buf,
4105                                       btrfs_leaf_data(buf) + offset + shift,
4106                                       btrfs_leaf_data(buf) + offset,
4107                                       btrfs_item_size_nr(buf, i));
4108                 btrfs_set_item_offset(buf, btrfs_item_nr(i),
4109                                       offset + shift);
4110                 btrfs_mark_buffer_dirty(buf);
4111         }
4112
4113         /*
4114          * We may have moved things, in which case we want to exit so we don't
4115          * write those changes out.  Once we have proper abort functionality in
4116          * progs this can be changed to something nicer.
4117          */
4118         BUG_ON(ret);
4119         return ret;
4120 }
4121
4122 /*
4123  * Attempt to fix basic block failures.  If we can't fix it for whatever reason
4124  * then just return -EIO.
4125  */
4126 static int try_to_fix_bad_block(struct btrfs_root *root,
4127                                 struct extent_buffer *buf,
4128                                 enum btrfs_tree_block_status status)
4129 {
4130         struct btrfs_trans_handle *trans;
4131         struct ulist *roots;
4132         struct ulist_node *node;
4133         struct btrfs_root *search_root;
4134         struct btrfs_path *path;
4135         struct ulist_iterator iter;
4136         struct btrfs_key root_key, key;
4137         int ret;
4138
4139         if (status != BTRFS_TREE_BLOCK_BAD_KEY_ORDER &&
4140             status != BTRFS_TREE_BLOCK_INVALID_OFFSETS)
4141                 return -EIO;
4142
4143         path = btrfs_alloc_path();
4144         if (!path)
4145                 return -EIO;
4146
4147         ret = btrfs_find_all_roots(NULL, root->fs_info, buf->start,
4148                                    0, &roots);
4149         if (ret) {
4150                 btrfs_free_path(path);
4151                 return -EIO;
4152         }
4153
4154         ULIST_ITER_INIT(&iter);
4155         while ((node = ulist_next(roots, &iter))) {
4156                 root_key.objectid = node->val;
4157                 root_key.type = BTRFS_ROOT_ITEM_KEY;
4158                 root_key.offset = (u64)-1;
4159
4160                 search_root = btrfs_read_fs_root(root->fs_info, &root_key);
4161                 if (IS_ERR(root)) {
4162                         ret = -EIO;
4163                         break;
4164                 }
4165
4166
4167                 trans = btrfs_start_transaction(search_root, 0);
4168                 if (IS_ERR(trans)) {
4169                         ret = PTR_ERR(trans);
4170                         break;
4171                 }
4172
4173                 path->lowest_level = btrfs_header_level(buf);
4174                 path->skip_check_block = 1;
4175                 if (path->lowest_level)
4176                         btrfs_node_key_to_cpu(buf, &key, 0);
4177                 else
4178                         btrfs_item_key_to_cpu(buf, &key, 0);
4179                 ret = btrfs_search_slot(trans, search_root, &key, path, 0, 1);
4180                 if (ret) {
4181                         ret = -EIO;
4182                         btrfs_commit_transaction(trans, search_root);
4183                         break;
4184                 }
4185                 if (status == BTRFS_TREE_BLOCK_BAD_KEY_ORDER)
4186                         ret = fix_key_order(trans, search_root, path);
4187                 else if (status == BTRFS_TREE_BLOCK_INVALID_OFFSETS)
4188                         ret = fix_item_offset(trans, search_root, path);
4189                 if (ret) {
4190                         btrfs_commit_transaction(trans, search_root);
4191                         break;
4192                 }
4193                 btrfs_release_path(path);
4194                 btrfs_commit_transaction(trans, search_root);
4195         }
4196         ulist_free(roots);
4197         btrfs_free_path(path);
4198         return ret;
4199 }
4200
4201 static int check_block(struct btrfs_root *root,
4202                        struct cache_tree *extent_cache,
4203                        struct extent_buffer *buf, u64 flags)
4204 {
4205         struct extent_record *rec;
4206         struct cache_extent *cache;
4207         struct btrfs_key key;
4208         enum btrfs_tree_block_status status;
4209         int ret = 0;
4210         int level;
4211
4212         cache = lookup_cache_extent(extent_cache, buf->start, buf->len);
4213         if (!cache)
4214                 return 1;
4215         rec = container_of(cache, struct extent_record, cache);
4216         rec->generation = btrfs_header_generation(buf);
4217
4218         level = btrfs_header_level(buf);
4219         if (btrfs_header_nritems(buf) > 0) {
4220
4221                 if (level == 0)
4222                         btrfs_item_key_to_cpu(buf, &key, 0);
4223                 else
4224                         btrfs_node_key_to_cpu(buf, &key, 0);
4225
4226                 rec->info_objectid = key.objectid;
4227         }
4228         rec->info_level = level;
4229
4230         if (btrfs_is_leaf(buf))
4231                 status = btrfs_check_leaf(root, &rec->parent_key, buf);
4232         else
4233                 status = btrfs_check_node(root, &rec->parent_key, buf);
4234
4235         if (status != BTRFS_TREE_BLOCK_CLEAN) {
4236                 if (repair)
4237                         status = try_to_fix_bad_block(root, buf, status);
4238                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
4239                         ret = -EIO;
4240                         fprintf(stderr, "bad block %llu\n",
4241                                 (unsigned long long)buf->start);
4242                 } else {
4243                         /*
4244                          * Signal to callers we need to start the scan over
4245                          * again since we'll have cow'ed blocks.
4246                          */
4247                         ret = -EAGAIN;
4248                 }
4249         } else {
4250                 rec->content_checked = 1;
4251                 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
4252                         rec->owner_ref_checked = 1;
4253                 else {
4254                         ret = check_owner_ref(root, rec, buf);
4255                         if (!ret)
4256                                 rec->owner_ref_checked = 1;
4257                 }
4258         }
4259         if (!ret)
4260                 maybe_free_extent_rec(extent_cache, rec);
4261         return ret;
4262 }
4263
4264 static struct tree_backref *find_tree_backref(struct extent_record *rec,
4265                                                 u64 parent, u64 root)
4266 {
4267         struct list_head *cur = rec->backrefs.next;
4268         struct extent_backref *node;
4269         struct tree_backref *back;
4270
4271         while(cur != &rec->backrefs) {
4272                 node = list_entry(cur, struct extent_backref, list);
4273                 cur = cur->next;
4274                 if (node->is_data)
4275                         continue;
4276                 back = (struct tree_backref *)node;
4277                 if (parent > 0) {
4278                         if (!node->full_backref)
4279                                 continue;
4280                         if (parent == back->parent)
4281                                 return back;
4282                 } else {
4283                         if (node->full_backref)
4284                                 continue;
4285                         if (back->root == root)
4286                                 return back;
4287                 }
4288         }
4289         return NULL;
4290 }
4291
4292 static struct tree_backref *alloc_tree_backref(struct extent_record *rec,
4293                                                 u64 parent, u64 root)
4294 {
4295         struct tree_backref *ref = malloc(sizeof(*ref));
4296         memset(&ref->node, 0, sizeof(ref->node));
4297         if (parent > 0) {
4298                 ref->parent = parent;
4299                 ref->node.full_backref = 1;
4300         } else {
4301                 ref->root = root;
4302                 ref->node.full_backref = 0;
4303         }
4304         list_add_tail(&ref->node.list, &rec->backrefs);
4305
4306         return ref;
4307 }
4308
4309 static struct data_backref *find_data_backref(struct extent_record *rec,
4310                                                 u64 parent, u64 root,
4311                                                 u64 owner, u64 offset,
4312                                                 int found_ref,
4313                                                 u64 disk_bytenr, u64 bytes)
4314 {
4315         struct list_head *cur = rec->backrefs.next;
4316         struct extent_backref *node;
4317         struct data_backref *back;
4318
4319         while(cur != &rec->backrefs) {
4320                 node = list_entry(cur, struct extent_backref, list);
4321                 cur = cur->next;
4322                 if (!node->is_data)
4323                         continue;
4324                 back = (struct data_backref *)node;
4325                 if (parent > 0) {
4326                         if (!node->full_backref)
4327                                 continue;
4328                         if (parent == back->parent)
4329                                 return back;
4330                 } else {
4331                         if (node->full_backref)
4332                                 continue;
4333                         if (back->root == root && back->owner == owner &&
4334                             back->offset == offset) {
4335                                 if (found_ref && node->found_ref &&
4336                                     (back->bytes != bytes ||
4337                                     back->disk_bytenr != disk_bytenr))
4338                                         continue;
4339                                 return back;
4340                         }
4341                 }
4342         }
4343         return NULL;
4344 }
4345
4346 static struct data_backref *alloc_data_backref(struct extent_record *rec,
4347                                                 u64 parent, u64 root,
4348                                                 u64 owner, u64 offset,
4349                                                 u64 max_size)
4350 {
4351         struct data_backref *ref = malloc(sizeof(*ref));
4352         memset(&ref->node, 0, sizeof(ref->node));
4353         ref->node.is_data = 1;
4354
4355         if (parent > 0) {
4356                 ref->parent = parent;
4357                 ref->owner = 0;
4358                 ref->offset = 0;
4359                 ref->node.full_backref = 1;
4360         } else {
4361                 ref->root = root;
4362                 ref->owner = owner;
4363                 ref->offset = offset;
4364                 ref->node.full_backref = 0;
4365         }
4366         ref->bytes = max_size;
4367         ref->found_ref = 0;
4368         ref->num_refs = 0;
4369         list_add_tail(&ref->node.list, &rec->backrefs);
4370         if (max_size > rec->max_size)
4371                 rec->max_size = max_size;
4372         return ref;
4373 }
4374
4375 /* Check if the type of extent matches with its chunk */
4376 static void check_extent_type(struct extent_record *rec)
4377 {
4378         struct btrfs_block_group_cache *bg_cache;
4379
4380         bg_cache = btrfs_lookup_first_block_group(global_info, rec->start);
4381         if (!bg_cache)
4382                 return;
4383
4384         /* data extent, check chunk directly*/
4385         if (!rec->metadata) {
4386                 if (!(bg_cache->flags & BTRFS_BLOCK_GROUP_DATA))
4387                         rec->wrong_chunk_type = 1;
4388                 return;
4389         }
4390
4391         /* metadata extent, check the obvious case first */
4392         if (!(bg_cache->flags & (BTRFS_BLOCK_GROUP_SYSTEM |
4393                                  BTRFS_BLOCK_GROUP_METADATA))) {
4394                 rec->wrong_chunk_type = 1;
4395                 return;
4396         }
4397
4398         /*
4399          * Check SYSTEM extent, as it's also marked as metadata, we can only
4400          * make sure it's a SYSTEM extent by its backref
4401          */
4402         if (!list_empty(&rec->backrefs)) {
4403                 struct extent_backref *node;
4404                 struct tree_backref *tback;
4405                 u64 bg_type;
4406
4407                 node = list_entry(rec->backrefs.next, struct extent_backref,
4408                                   list);
4409                 if (node->is_data) {
4410                         /* tree block shouldn't have data backref */
4411                         rec->wrong_chunk_type = 1;
4412                         return;
4413                 }
4414                 tback = container_of(node, struct tree_backref, node);
4415
4416                 if (tback->root == BTRFS_CHUNK_TREE_OBJECTID)
4417                         bg_type = BTRFS_BLOCK_GROUP_SYSTEM;
4418                 else
4419                         bg_type = BTRFS_BLOCK_GROUP_METADATA;
4420                 if (!(bg_cache->flags & bg_type))
4421                         rec->wrong_chunk_type = 1;
4422         }
4423 }
4424
4425 static int add_extent_rec(struct cache_tree *extent_cache,
4426                           struct btrfs_key *parent_key, u64 parent_gen,
4427                           u64 start, u64 nr, u64 extent_item_refs,
4428                           int is_root, int inc_ref, int set_checked,
4429                           int metadata, int extent_rec, u64 max_size)
4430 {
4431         struct extent_record *rec;
4432         struct cache_extent *cache;
4433         int ret = 0;
4434         int dup = 0;
4435
4436         cache = lookup_cache_extent(extent_cache, start, nr);
4437         if (cache) {
4438                 rec = container_of(cache, struct extent_record, cache);
4439                 if (inc_ref)
4440                         rec->refs++;
4441                 if (rec->nr == 1)
4442                         rec->nr = max(nr, max_size);
4443
4444                 /*
4445                  * We need to make sure to reset nr to whatever the extent
4446                  * record says was the real size, this way we can compare it to
4447                  * the backrefs.
4448                  */
4449                 if (extent_rec) {
4450                         if (start != rec->start || rec->found_rec) {
4451                                 struct extent_record *tmp;
4452
4453                                 dup = 1;
4454                                 if (list_empty(&rec->list))
4455                                         list_add_tail(&rec->list,
4456                                                       &duplicate_extents);
4457
4458                                 /*
4459                                  * We have to do this song and dance in case we
4460                                  * find an extent record that falls inside of
4461                                  * our current extent record but does not have
4462                                  * the same objectid.
4463                                  */
4464                                 tmp = malloc(sizeof(*tmp));
4465                                 if (!tmp)
4466                                         return -ENOMEM;
4467                                 tmp->start = start;
4468                                 tmp->max_size = max_size;
4469                                 tmp->nr = nr;
4470                                 tmp->found_rec = 1;
4471                                 tmp->metadata = metadata;
4472                                 tmp->extent_item_refs = extent_item_refs;
4473                                 INIT_LIST_HEAD(&tmp->list);
4474                                 list_add_tail(&tmp->list, &rec->dups);
4475                                 rec->num_duplicates++;
4476                         } else {
4477                                 rec->nr = nr;
4478                                 rec->found_rec = 1;
4479                         }
4480                 }
4481
4482                 if (extent_item_refs && !dup) {
4483                         if (rec->extent_item_refs) {
4484                                 fprintf(stderr, "block %llu rec "
4485                                         "extent_item_refs %llu, passed %llu\n",
4486                                         (unsigned long long)start,
4487                                         (unsigned long long)
4488                                                         rec->extent_item_refs,
4489                                         (unsigned long long)extent_item_refs);
4490                         }
4491                         rec->extent_item_refs = extent_item_refs;
4492                 }
4493                 if (is_root)
4494                         rec->is_root = 1;
4495                 if (set_checked) {
4496                         rec->content_checked = 1;
4497                         rec->owner_ref_checked = 1;
4498                 }
4499
4500                 if (parent_key)
4501                         btrfs_cpu_key_to_disk(&rec->parent_key, parent_key);
4502                 if (parent_gen)
4503                         rec->parent_generation = parent_gen;
4504
4505                 if (rec->max_size < max_size)
4506                         rec->max_size = max_size;
4507
4508                 /*
4509                  * A metadata extent can't cross stripe_len boundary, otherwise
4510                  * kernel scrub won't be able to handle it.
4511                  * As now stripe_len is fixed to BTRFS_STRIPE_LEN, just check
4512                  * it.
4513                  */
4514                 if (metadata && check_crossing_stripes(rec->start,
4515                                                        rec->max_size))
4516                                 rec->crossing_stripes = 1;
4517                 check_extent_type(rec);
4518                 maybe_free_extent_rec(extent_cache, rec);
4519                 return ret;
4520         }
4521         rec = malloc(sizeof(*rec));
4522         rec->start = start;
4523         rec->max_size = max_size;
4524         rec->nr = max(nr, max_size);
4525         rec->found_rec = !!extent_rec;
4526         rec->content_checked = 0;
4527         rec->owner_ref_checked = 0;
4528         rec->num_duplicates = 0;
4529         rec->metadata = metadata;
4530         rec->flag_block_full_backref = -1;
4531         rec->bad_full_backref = 0;
4532         rec->crossing_stripes = 0;
4533         rec->wrong_chunk_type = 0;
4534         INIT_LIST_HEAD(&rec->backrefs);
4535         INIT_LIST_HEAD(&rec->dups);
4536         INIT_LIST_HEAD(&rec->list);
4537
4538         if (is_root)
4539                 rec->is_root = 1;
4540         else
4541                 rec->is_root = 0;
4542
4543         if (inc_ref)
4544                 rec->refs = 1;
4545         else
4546                 rec->refs = 0;
4547
4548         if (extent_item_refs)
4549                 rec->extent_item_refs = extent_item_refs;
4550         else
4551                 rec->extent_item_refs = 0;
4552
4553         if (parent_key)
4554                 btrfs_cpu_key_to_disk(&rec->parent_key, parent_key);
4555         else
4556                 memset(&rec->parent_key, 0, sizeof(*parent_key));
4557
4558         if (parent_gen)
4559                 rec->parent_generation = parent_gen;
4560         else
4561                 rec->parent_generation = 0;
4562
4563         rec->cache.start = start;
4564         rec->cache.size = nr;
4565         ret = insert_cache_extent(extent_cache, &rec->cache);
4566         BUG_ON(ret);
4567         bytes_used += nr;
4568         if (set_checked) {
4569                 rec->content_checked = 1;
4570                 rec->owner_ref_checked = 1;
4571         }
4572
4573         if (metadata)
4574                 if (check_crossing_stripes(rec->start, rec->max_size))
4575                         rec->crossing_stripes = 1;
4576         check_extent_type(rec);
4577         return ret;
4578 }
4579
4580 static int add_tree_backref(struct cache_tree *extent_cache, u64 bytenr,
4581                             u64 parent, u64 root, int found_ref)
4582 {
4583         struct extent_record *rec;
4584         struct tree_backref *back;
4585         struct cache_extent *cache;
4586
4587         cache = lookup_cache_extent(extent_cache, bytenr, 1);
4588         if (!cache) {
4589                 add_extent_rec(extent_cache, NULL, 0, bytenr,
4590                                1, 0, 0, 0, 0, 1, 0, 0);
4591                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
4592                 if (!cache)
4593                         abort();
4594         }
4595
4596         rec = container_of(cache, struct extent_record, cache);
4597         if (rec->start != bytenr) {
4598                 abort();
4599         }
4600
4601         back = find_tree_backref(rec, parent, root);
4602         if (!back)
4603                 back = alloc_tree_backref(rec, parent, root);
4604
4605         if (found_ref) {
4606                 if (back->node.found_ref) {
4607                         fprintf(stderr, "Extent back ref already exists "
4608                                 "for %llu parent %llu root %llu \n",
4609                                 (unsigned long long)bytenr,
4610                                 (unsigned long long)parent,
4611                                 (unsigned long long)root);
4612                 }
4613                 back->node.found_ref = 1;
4614         } else {
4615                 if (back->node.found_extent_tree) {
4616                         fprintf(stderr, "Extent back ref already exists "
4617                                 "for %llu parent %llu root %llu \n",
4618                                 (unsigned long long)bytenr,
4619                                 (unsigned long long)parent,
4620                                 (unsigned long long)root);
4621                 }
4622                 back->node.found_extent_tree = 1;
4623         }
4624         check_extent_type(rec);
4625         maybe_free_extent_rec(extent_cache, rec);
4626         return 0;
4627 }
4628
4629 static int add_data_backref(struct cache_tree *extent_cache, u64 bytenr,
4630                             u64 parent, u64 root, u64 owner, u64 offset,
4631                             u32 num_refs, int found_ref, u64 max_size)
4632 {
4633         struct extent_record *rec;
4634         struct data_backref *back;
4635         struct cache_extent *cache;
4636
4637         cache = lookup_cache_extent(extent_cache, bytenr, 1);
4638         if (!cache) {
4639                 add_extent_rec(extent_cache, NULL, 0, bytenr, 1, 0, 0, 0, 0,
4640                                0, 0, max_size);
4641                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
4642                 if (!cache)
4643                         abort();
4644         }
4645
4646         rec = container_of(cache, struct extent_record, cache);
4647         if (rec->max_size < max_size)
4648                 rec->max_size = max_size;
4649
4650         /*
4651          * If found_ref is set then max_size is the real size and must match the
4652          * existing refs.  So if we have already found a ref then we need to
4653          * make sure that this ref matches the existing one, otherwise we need
4654          * to add a new backref so we can notice that the backrefs don't match
4655          * and we need to figure out who is telling the truth.  This is to
4656          * account for that awful fsync bug I introduced where we'd end up with
4657          * a btrfs_file_extent_item that would have its length include multiple
4658          * prealloc extents or point inside of a prealloc extent.
4659          */
4660         back = find_data_backref(rec, parent, root, owner, offset, found_ref,
4661                                  bytenr, max_size);
4662         if (!back)
4663                 back = alloc_data_backref(rec, parent, root, owner, offset,
4664                                           max_size);
4665
4666         if (found_ref) {
4667                 BUG_ON(num_refs != 1);
4668                 if (back->node.found_ref)
4669                         BUG_ON(back->bytes != max_size);
4670                 back->node.found_ref = 1;
4671                 back->found_ref += 1;
4672                 back->bytes = max_size;
4673                 back->disk_bytenr = bytenr;
4674                 rec->refs += 1;
4675                 rec->content_checked = 1;
4676                 rec->owner_ref_checked = 1;
4677         } else {
4678                 if (back->node.found_extent_tree) {
4679                         fprintf(stderr, "Extent back ref already exists "
4680                                 "for %llu parent %llu root %llu "
4681                                 "owner %llu offset %llu num_refs %lu\n",
4682                                 (unsigned long long)bytenr,
4683                                 (unsigned long long)parent,
4684                                 (unsigned long long)root,
4685                                 (unsigned long long)owner,
4686                                 (unsigned long long)offset,
4687                                 (unsigned long)num_refs);
4688                 }
4689                 back->num_refs = num_refs;
4690                 back->node.found_extent_tree = 1;
4691         }
4692         maybe_free_extent_rec(extent_cache, rec);
4693         return 0;
4694 }
4695
4696 static int add_pending(struct cache_tree *pending,
4697                        struct cache_tree *seen, u64 bytenr, u32 size)
4698 {
4699         int ret;
4700         ret = add_cache_extent(seen, bytenr, size);
4701         if (ret)
4702                 return ret;
4703         add_cache_extent(pending, bytenr, size);
4704         return 0;
4705 }
4706
4707 static int pick_next_pending(struct cache_tree *pending,
4708                         struct cache_tree *reada,
4709                         struct cache_tree *nodes,
4710                         u64 last, struct block_info *bits, int bits_nr,
4711                         int *reada_bits)
4712 {
4713         unsigned long node_start = last;
4714         struct cache_extent *cache;
4715         int ret;
4716
4717         cache = search_cache_extent(reada, 0);
4718         if (cache) {
4719                 bits[0].start = cache->start;
4720                 bits[0].size = cache->size;
4721                 *reada_bits = 1;
4722                 return 1;
4723         }
4724         *reada_bits = 0;
4725         if (node_start > 32768)
4726                 node_start -= 32768;
4727
4728         cache = search_cache_extent(nodes, node_start);
4729         if (!cache)
4730                 cache = search_cache_extent(nodes, 0);
4731
4732         if (!cache) {
4733                  cache = search_cache_extent(pending, 0);
4734                  if (!cache)
4735                          return 0;
4736                  ret = 0;
4737                  do {
4738                          bits[ret].start = cache->start;
4739                          bits[ret].size = cache->size;
4740                          cache = next_cache_extent(cache);
4741                          ret++;
4742                  } while (cache && ret < bits_nr);
4743                  return ret;
4744         }
4745
4746         ret = 0;
4747         do {
4748                 bits[ret].start = cache->start;
4749                 bits[ret].size = cache->size;
4750                 cache = next_cache_extent(cache);
4751                 ret++;
4752         } while (cache && ret < bits_nr);
4753
4754         if (bits_nr - ret > 8) {
4755                 u64 lookup = bits[0].start + bits[0].size;
4756                 struct cache_extent *next;
4757                 next = search_cache_extent(pending, lookup);
4758                 while(next) {
4759                         if (next->start - lookup > 32768)
4760                                 break;
4761                         bits[ret].start = next->start;
4762                         bits[ret].size = next->size;
4763                         lookup = next->start + next->size;
4764                         ret++;
4765                         if (ret == bits_nr)
4766                                 break;
4767                         next = next_cache_extent(next);
4768                         if (!next)
4769                                 break;
4770                 }
4771         }
4772         return ret;
4773 }
4774
4775 static void free_chunk_record(struct cache_extent *cache)
4776 {
4777         struct chunk_record *rec;
4778
4779         rec = container_of(cache, struct chunk_record, cache);
4780         list_del_init(&rec->list);
4781         list_del_init(&rec->dextents);
4782         free(rec);
4783 }
4784
4785 void free_chunk_cache_tree(struct cache_tree *chunk_cache)
4786 {
4787         cache_tree_free_extents(chunk_cache, free_chunk_record);
4788 }
4789
4790 static void free_device_record(struct rb_node *node)
4791 {
4792         struct device_record *rec;
4793
4794         rec = container_of(node, struct device_record, node);
4795         free(rec);
4796 }
4797
4798 FREE_RB_BASED_TREE(device_cache, free_device_record);
4799
4800 int insert_block_group_record(struct block_group_tree *tree,
4801                               struct block_group_record *bg_rec)
4802 {
4803         int ret;
4804
4805         ret = insert_cache_extent(&tree->tree, &bg_rec->cache);
4806         if (ret)
4807                 return ret;
4808
4809         list_add_tail(&bg_rec->list, &tree->block_groups);
4810         return 0;
4811 }
4812
4813 static void free_block_group_record(struct cache_extent *cache)
4814 {
4815         struct block_group_record *rec;
4816
4817         rec = container_of(cache, struct block_group_record, cache);
4818         list_del_init(&rec->list);
4819         free(rec);
4820 }
4821
4822 void free_block_group_tree(struct block_group_tree *tree)
4823 {
4824         cache_tree_free_extents(&tree->tree, free_block_group_record);
4825 }
4826
4827 int insert_device_extent_record(struct device_extent_tree *tree,
4828                                 struct device_extent_record *de_rec)
4829 {
4830         int ret;
4831
4832         /*
4833          * Device extent is a bit different from the other extents, because
4834          * the extents which belong to the different devices may have the
4835          * same start and size, so we need use the special extent cache
4836          * search/insert functions.
4837          */
4838         ret = insert_cache_extent2(&tree->tree, &de_rec->cache);
4839         if (ret)
4840                 return ret;
4841
4842         list_add_tail(&de_rec->chunk_list, &tree->no_chunk_orphans);
4843         list_add_tail(&de_rec->device_list, &tree->no_device_orphans);
4844         return 0;
4845 }
4846
4847 static void free_device_extent_record(struct cache_extent *cache)
4848 {
4849         struct device_extent_record *rec;
4850
4851         rec = container_of(cache, struct device_extent_record, cache);
4852         if (!list_empty(&rec->chunk_list))
4853                 list_del_init(&rec->chunk_list);
4854         if (!list_empty(&rec->device_list))
4855                 list_del_init(&rec->device_list);
4856         free(rec);
4857 }
4858
4859 void free_device_extent_tree(struct device_extent_tree *tree)
4860 {
4861         cache_tree_free_extents(&tree->tree, free_device_extent_record);
4862 }
4863
4864 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
4865 static int process_extent_ref_v0(struct cache_tree *extent_cache,
4866                                  struct extent_buffer *leaf, int slot)
4867 {
4868         struct btrfs_extent_ref_v0 *ref0;
4869         struct btrfs_key key;
4870
4871         btrfs_item_key_to_cpu(leaf, &key, slot);
4872         ref0 = btrfs_item_ptr(leaf, slot, struct btrfs_extent_ref_v0);
4873         if (btrfs_ref_objectid_v0(leaf, ref0) < BTRFS_FIRST_FREE_OBJECTID) {
4874                 add_tree_backref(extent_cache, key.objectid, key.offset, 0, 0);
4875         } else {
4876                 add_data_backref(extent_cache, key.objectid, key.offset, 0,
4877                                  0, 0, btrfs_ref_count_v0(leaf, ref0), 0, 0);
4878         }
4879         return 0;
4880 }
4881 #endif
4882
4883 struct chunk_record *btrfs_new_chunk_record(struct extent_buffer *leaf,
4884                                             struct btrfs_key *key,
4885                                             int slot)
4886 {
4887         struct btrfs_chunk *ptr;
4888         struct chunk_record *rec;
4889         int num_stripes, i;
4890
4891         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
4892         num_stripes = btrfs_chunk_num_stripes(leaf, ptr);
4893
4894         rec = calloc(1, btrfs_chunk_record_size(num_stripes));
4895         if (!rec) {
4896                 fprintf(stderr, "memory allocation failed\n");
4897                 exit(-1);
4898         }
4899
4900         INIT_LIST_HEAD(&rec->list);
4901         INIT_LIST_HEAD(&rec->dextents);
4902         rec->bg_rec = NULL;
4903
4904         rec->cache.start = key->offset;
4905         rec->cache.size = btrfs_chunk_length(leaf, ptr);
4906
4907         rec->generation = btrfs_header_generation(leaf);
4908
4909         rec->objectid = key->objectid;
4910         rec->type = key->type;
4911         rec->offset = key->offset;
4912
4913         rec->length = rec->cache.size;
4914         rec->owner = btrfs_chunk_owner(leaf, ptr);
4915         rec->stripe_len = btrfs_chunk_stripe_len(leaf, ptr);
4916         rec->type_flags = btrfs_chunk_type(leaf, ptr);
4917         rec->io_width = btrfs_chunk_io_width(leaf, ptr);
4918         rec->io_align = btrfs_chunk_io_align(leaf, ptr);
4919         rec->sector_size = btrfs_chunk_sector_size(leaf, ptr);
4920         rec->num_stripes = num_stripes;
4921         rec->sub_stripes = btrfs_chunk_sub_stripes(leaf, ptr);
4922
4923         for (i = 0; i < rec->num_stripes; ++i) {
4924                 rec->stripes[i].devid =
4925                         btrfs_stripe_devid_nr(leaf, ptr, i);
4926                 rec->stripes[i].offset =
4927                         btrfs_stripe_offset_nr(leaf, ptr, i);
4928                 read_extent_buffer(leaf, rec->stripes[i].dev_uuid,
4929                                 (unsigned long)btrfs_stripe_dev_uuid_nr(ptr, i),
4930                                 BTRFS_UUID_SIZE);
4931         }
4932
4933         return rec;
4934 }
4935
4936 static int process_chunk_item(struct cache_tree *chunk_cache,
4937                               struct btrfs_key *key, struct extent_buffer *eb,
4938                               int slot)
4939 {
4940         struct chunk_record *rec;
4941         int ret = 0;
4942
4943         rec = btrfs_new_chunk_record(eb, key, slot);
4944         ret = insert_cache_extent(chunk_cache, &rec->cache);
4945         if (ret) {
4946                 fprintf(stderr, "Chunk[%llu, %llu] existed.\n",
4947                         rec->offset, rec->length);
4948                 free(rec);
4949         }
4950
4951         return ret;
4952 }
4953
4954 static int process_device_item(struct rb_root *dev_cache,
4955                 struct btrfs_key *key, struct extent_buffer *eb, int slot)
4956 {
4957         struct btrfs_dev_item *ptr;
4958         struct device_record *rec;
4959         int ret = 0;
4960
4961         ptr = btrfs_item_ptr(eb,
4962                 slot, struct btrfs_dev_item);
4963
4964         rec = malloc(sizeof(*rec));
4965         if (!rec) {
4966                 fprintf(stderr, "memory allocation failed\n");
4967                 return -ENOMEM;
4968         }
4969
4970         rec->devid = key->offset;
4971         rec->generation = btrfs_header_generation(eb);
4972
4973         rec->objectid = key->objectid;
4974         rec->type = key->type;
4975         rec->offset = key->offset;
4976
4977         rec->devid = btrfs_device_id(eb, ptr);
4978         rec->total_byte = btrfs_device_total_bytes(eb, ptr);
4979         rec->byte_used = btrfs_device_bytes_used(eb, ptr);
4980
4981         ret = rb_insert(dev_cache, &rec->node, device_record_compare);
4982         if (ret) {
4983                 fprintf(stderr, "Device[%llu] existed.\n", rec->devid);
4984                 free(rec);
4985         }
4986
4987         return ret;
4988 }
4989
4990 struct block_group_record *
4991 btrfs_new_block_group_record(struct extent_buffer *leaf, struct btrfs_key *key,
4992                              int slot)
4993 {
4994         struct btrfs_block_group_item *ptr;
4995         struct block_group_record *rec;
4996
4997         rec = calloc(1, sizeof(*rec));
4998         if (!rec) {
4999                 fprintf(stderr, "memory allocation failed\n");
5000                 exit(-1);
5001         }
5002
5003         rec->cache.start = key->objectid;
5004         rec->cache.size = key->offset;
5005
5006         rec->generation = btrfs_header_generation(leaf);
5007
5008         rec->objectid = key->objectid;
5009         rec->type = key->type;
5010         rec->offset = key->offset;
5011
5012         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_block_group_item);
5013         rec->flags = btrfs_disk_block_group_flags(leaf, ptr);
5014
5015         INIT_LIST_HEAD(&rec->list);
5016
5017         return rec;
5018 }
5019
5020 static int process_block_group_item(struct block_group_tree *block_group_cache,
5021                                     struct btrfs_key *key,
5022                                     struct extent_buffer *eb, int slot)
5023 {
5024         struct block_group_record *rec;
5025         int ret = 0;
5026
5027         rec = btrfs_new_block_group_record(eb, key, slot);
5028         ret = insert_block_group_record(block_group_cache, rec);
5029         if (ret) {
5030                 fprintf(stderr, "Block Group[%llu, %llu] existed.\n",
5031                         rec->objectid, rec->offset);
5032                 free(rec);
5033         }
5034
5035         return ret;
5036 }
5037
5038 struct device_extent_record *
5039 btrfs_new_device_extent_record(struct extent_buffer *leaf,
5040                                struct btrfs_key *key, int slot)
5041 {
5042         struct device_extent_record *rec;
5043         struct btrfs_dev_extent *ptr;
5044
5045         rec = calloc(1, sizeof(*rec));
5046         if (!rec) {
5047                 fprintf(stderr, "memory allocation failed\n");
5048                 exit(-1);
5049         }
5050
5051         rec->cache.objectid = key->objectid;
5052         rec->cache.start = key->offset;
5053
5054         rec->generation = btrfs_header_generation(leaf);
5055
5056         rec->objectid = key->objectid;
5057         rec->type = key->type;
5058         rec->offset = key->offset;
5059
5060         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_dev_extent);
5061         rec->chunk_objecteid =
5062                 btrfs_dev_extent_chunk_objectid(leaf, ptr);
5063         rec->chunk_offset =
5064                 btrfs_dev_extent_chunk_offset(leaf, ptr);
5065         rec->length = btrfs_dev_extent_length(leaf, ptr);
5066         rec->cache.size = rec->length;
5067
5068         INIT_LIST_HEAD(&rec->chunk_list);
5069         INIT_LIST_HEAD(&rec->device_list);
5070
5071         return rec;
5072 }
5073
5074 static int
5075 process_device_extent_item(struct device_extent_tree *dev_extent_cache,
5076                            struct btrfs_key *key, struct extent_buffer *eb,
5077                            int slot)
5078 {
5079         struct device_extent_record *rec;
5080         int ret;
5081
5082         rec = btrfs_new_device_extent_record(eb, key, slot);
5083         ret = insert_device_extent_record(dev_extent_cache, rec);
5084         if (ret) {
5085                 fprintf(stderr,
5086                         "Device extent[%llu, %llu, %llu] existed.\n",
5087                         rec->objectid, rec->offset, rec->length);
5088                 free(rec);
5089         }
5090
5091         return ret;
5092 }
5093
5094 static int process_extent_item(struct btrfs_root *root,
5095                                struct cache_tree *extent_cache,
5096                                struct extent_buffer *eb, int slot)
5097 {
5098         struct btrfs_extent_item *ei;
5099         struct btrfs_extent_inline_ref *iref;
5100         struct btrfs_extent_data_ref *dref;
5101         struct btrfs_shared_data_ref *sref;
5102         struct btrfs_key key;
5103         unsigned long end;
5104         unsigned long ptr;
5105         int type;
5106         u32 item_size = btrfs_item_size_nr(eb, slot);
5107         u64 refs = 0;
5108         u64 offset;
5109         u64 num_bytes;
5110         int metadata = 0;
5111
5112         btrfs_item_key_to_cpu(eb, &key, slot);
5113
5114         if (key.type == BTRFS_METADATA_ITEM_KEY) {
5115                 metadata = 1;
5116                 num_bytes = root->leafsize;
5117         } else {
5118                 num_bytes = key.offset;
5119         }
5120
5121         if (item_size < sizeof(*ei)) {
5122 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
5123                 struct btrfs_extent_item_v0 *ei0;
5124                 BUG_ON(item_size != sizeof(*ei0));
5125                 ei0 = btrfs_item_ptr(eb, slot, struct btrfs_extent_item_v0);
5126                 refs = btrfs_extent_refs_v0(eb, ei0);
5127 #else
5128                 BUG();
5129 #endif
5130                 return add_extent_rec(extent_cache, NULL, 0, key.objectid,
5131                                       num_bytes, refs, 0, 0, 0, metadata, 1,
5132                                       num_bytes);
5133         }
5134
5135         ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
5136         refs = btrfs_extent_refs(eb, ei);
5137
5138         add_extent_rec(extent_cache, NULL, 0, key.objectid, num_bytes,
5139                        refs, 0, 0, 0, metadata, 1, num_bytes);
5140
5141         ptr = (unsigned long)(ei + 1);
5142         if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK &&
5143             key.type == BTRFS_EXTENT_ITEM_KEY)
5144                 ptr += sizeof(struct btrfs_tree_block_info);
5145
5146         end = (unsigned long)ei + item_size;
5147         while (ptr < end) {
5148                 iref = (struct btrfs_extent_inline_ref *)ptr;
5149                 type = btrfs_extent_inline_ref_type(eb, iref);
5150                 offset = btrfs_extent_inline_ref_offset(eb, iref);
5151                 switch (type) {
5152                 case BTRFS_TREE_BLOCK_REF_KEY:
5153                         add_tree_backref(extent_cache, key.objectid,
5154                                          0, offset, 0);
5155                         break;
5156                 case BTRFS_SHARED_BLOCK_REF_KEY:
5157                         add_tree_backref(extent_cache, key.objectid,
5158                                          offset, 0, 0);
5159                         break;
5160                 case BTRFS_EXTENT_DATA_REF_KEY:
5161                         dref = (struct btrfs_extent_data_ref *)(&iref->offset);
5162                         add_data_backref(extent_cache, key.objectid, 0,
5163                                         btrfs_extent_data_ref_root(eb, dref),
5164                                         btrfs_extent_data_ref_objectid(eb,
5165                                                                        dref),
5166                                         btrfs_extent_data_ref_offset(eb, dref),
5167                                         btrfs_extent_data_ref_count(eb, dref),
5168                                         0, num_bytes);
5169                         break;
5170                 case BTRFS_SHARED_DATA_REF_KEY:
5171                         sref = (struct btrfs_shared_data_ref *)(iref + 1);
5172                         add_data_backref(extent_cache, key.objectid, offset,
5173                                         0, 0, 0,
5174                                         btrfs_shared_data_ref_count(eb, sref),
5175                                         0, num_bytes);
5176                         break;
5177                 default:
5178                         fprintf(stderr, "corrupt extent record: key %Lu %u %Lu\n",
5179                                 key.objectid, key.type, num_bytes);
5180                         goto out;
5181                 }
5182                 ptr += btrfs_extent_inline_ref_size(type);
5183         }
5184         WARN_ON(ptr > end);
5185 out:
5186         return 0;
5187 }
5188
5189 static int check_cache_range(struct btrfs_root *root,
5190                              struct btrfs_block_group_cache *cache,
5191                              u64 offset, u64 bytes)
5192 {
5193         struct btrfs_free_space *entry;
5194         u64 *logical;
5195         u64 bytenr;
5196         int stripe_len;
5197         int i, nr, ret;
5198
5199         for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
5200                 bytenr = btrfs_sb_offset(i);
5201                 ret = btrfs_rmap_block(&root->fs_info->mapping_tree,
5202                                        cache->key.objectid, bytenr, 0,
5203                                        &logical, &nr, &stripe_len);
5204                 if (ret)
5205                         return ret;
5206
5207                 while (nr--) {
5208                         if (logical[nr] + stripe_len <= offset)
5209                                 continue;
5210                         if (offset + bytes <= logical[nr])
5211                                 continue;
5212                         if (logical[nr] == offset) {
5213                                 if (stripe_len >= bytes) {
5214                                         kfree(logical);
5215                                         return 0;
5216                                 }
5217                                 bytes -= stripe_len;
5218                                 offset += stripe_len;
5219                         } else if (logical[nr] < offset) {
5220                                 if (logical[nr] + stripe_len >=
5221                                     offset + bytes) {
5222                                         kfree(logical);
5223                                         return 0;
5224                                 }
5225                                 bytes = (offset + bytes) -
5226                                         (logical[nr] + stripe_len);
5227                                 offset = logical[nr] + stripe_len;
5228                         } else {
5229                                 /*
5230                                  * Could be tricky, the super may land in the
5231                                  * middle of the area we're checking.  First
5232                                  * check the easiest case, it's at the end.
5233                                  */
5234                                 if (logical[nr] + stripe_len >=
5235                                     bytes + offset) {
5236                                         bytes = logical[nr] - offset;
5237                                         continue;
5238                                 }
5239
5240                                 /* Check the left side */
5241                                 ret = check_cache_range(root, cache,
5242                                                         offset,
5243                                                         logical[nr] - offset);
5244                                 if (ret) {
5245                                         kfree(logical);
5246                                         return ret;
5247                                 }
5248
5249                                 /* Now we continue with the right side */
5250                                 bytes = (offset + bytes) -
5251                                         (logical[nr] + stripe_len);
5252                                 offset = logical[nr] + stripe_len;
5253                         }
5254                 }
5255
5256                 kfree(logical);
5257         }
5258
5259         entry = btrfs_find_free_space(cache->free_space_ctl, offset, bytes);
5260         if (!entry) {
5261                 fprintf(stderr, "There is no free space entry for %Lu-%Lu\n",
5262                         offset, offset+bytes);
5263                 return -EINVAL;
5264         }
5265
5266         if (entry->offset != offset) {
5267                 fprintf(stderr, "Wanted offset %Lu, found %Lu\n", offset,
5268                         entry->offset);
5269                 return -EINVAL;
5270         }
5271
5272         if (entry->bytes != bytes) {
5273                 fprintf(stderr, "Wanted bytes %Lu, found %Lu for off %Lu\n",
5274                         bytes, entry->bytes, offset);
5275                 return -EINVAL;
5276         }
5277
5278         unlink_free_space(cache->free_space_ctl, entry);
5279         free(entry);
5280         return 0;
5281 }
5282
5283 static int verify_space_cache(struct btrfs_root *root,
5284                               struct btrfs_block_group_cache *cache)
5285 {
5286         struct btrfs_path *path;
5287         struct extent_buffer *leaf;
5288         struct btrfs_key key;
5289         u64 last;
5290         int ret = 0;
5291
5292         path = btrfs_alloc_path();
5293         if (!path)
5294                 return -ENOMEM;
5295
5296         root = root->fs_info->extent_root;
5297
5298         last = max_t(u64, cache->key.objectid, BTRFS_SUPER_INFO_OFFSET);
5299
5300         key.objectid = last;
5301         key.offset = 0;
5302         key.type = BTRFS_EXTENT_ITEM_KEY;
5303
5304         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
5305         if (ret < 0)
5306                 goto out;
5307         ret = 0;
5308         while (1) {
5309                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
5310                         ret = btrfs_next_leaf(root, path);
5311                         if (ret < 0)
5312                                 goto out;
5313                         if (ret > 0) {
5314                                 ret = 0;
5315                                 break;
5316                         }
5317                 }
5318                 leaf = path->nodes[0];
5319                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
5320                 if (key.objectid >= cache->key.offset + cache->key.objectid)
5321                         break;
5322                 if (key.type != BTRFS_EXTENT_ITEM_KEY &&
5323                     key.type != BTRFS_METADATA_ITEM_KEY) {
5324                         path->slots[0]++;
5325                         continue;
5326                 }
5327
5328                 if (last == key.objectid) {
5329                         if (key.type == BTRFS_EXTENT_ITEM_KEY)
5330                                 last = key.objectid + key.offset;
5331                         else
5332                                 last = key.objectid + root->leafsize;
5333                         path->slots[0]++;
5334                         continue;
5335                 }
5336
5337                 ret = check_cache_range(root, cache, last,
5338                                         key.objectid - last);
5339                 if (ret)
5340                         break;
5341                 if (key.type == BTRFS_EXTENT_ITEM_KEY)
5342                         last = key.objectid + key.offset;
5343                 else
5344                         last = key.objectid + root->leafsize;
5345                 path->slots[0]++;
5346         }
5347
5348         if (last < cache->key.objectid + cache->key.offset)
5349                 ret = check_cache_range(root, cache, last,
5350                                         cache->key.objectid +
5351                                         cache->key.offset - last);
5352
5353 out:
5354         btrfs_free_path(path);
5355
5356         if (!ret &&
5357             !RB_EMPTY_ROOT(&cache->free_space_ctl->free_space_offset)) {
5358                 fprintf(stderr, "There are still entries left in the space "
5359                         "cache\n");
5360                 ret = -EINVAL;
5361         }
5362
5363         return ret;
5364 }
5365
5366 static int check_space_cache(struct btrfs_root *root)
5367 {
5368         struct btrfs_block_group_cache *cache;
5369         u64 start = BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE;
5370         int ret;
5371         int error = 0;
5372
5373         if (btrfs_super_cache_generation(root->fs_info->super_copy) != -1ULL &&
5374             btrfs_super_generation(root->fs_info->super_copy) !=
5375             btrfs_super_cache_generation(root->fs_info->super_copy)) {
5376                 printf("cache and super generation don't match, space cache "
5377                        "will be invalidated\n");
5378                 return 0;
5379         }
5380
5381         if (ctx.progress_enabled) {
5382                 ctx.tp = TASK_FREE_SPACE;
5383                 task_start(ctx.info);
5384         }
5385
5386         while (1) {
5387                 cache = btrfs_lookup_first_block_group(root->fs_info, start);
5388                 if (!cache)
5389                         break;
5390
5391                 start = cache->key.objectid + cache->key.offset;
5392                 if (!cache->free_space_ctl) {
5393                         if (btrfs_init_free_space_ctl(cache,
5394                                                       root->sectorsize)) {
5395                                 ret = -ENOMEM;
5396                                 break;
5397                         }
5398                 } else {
5399                         btrfs_remove_free_space_cache(cache);
5400                 }
5401
5402                 ret = load_free_space_cache(root->fs_info, cache);
5403                 if (!ret)
5404                         continue;
5405
5406                 ret = verify_space_cache(root, cache);
5407                 if (ret) {
5408                         fprintf(stderr, "cache appears valid but isnt %Lu\n",
5409                                 cache->key.objectid);
5410                         error++;
5411                 }
5412         }
5413
5414         task_stop(ctx.info);
5415
5416         return error ? -EINVAL : 0;
5417 }
5418
5419 static int check_extent_csums(struct btrfs_root *root, u64 bytenr,
5420                         u64 num_bytes, unsigned long leaf_offset,
5421                         struct extent_buffer *eb) {
5422
5423         u64 offset = 0;
5424         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
5425         char *data;
5426         unsigned long csum_offset;
5427         u32 csum;
5428         u32 csum_expected;
5429         u64 read_len;
5430         u64 data_checked = 0;
5431         u64 tmp;
5432         int ret = 0;
5433         int mirror;
5434         int num_copies;
5435
5436         if (num_bytes % root->sectorsize)
5437                 return -EINVAL;
5438
5439         data = malloc(num_bytes);
5440         if (!data)
5441                 return -ENOMEM;
5442
5443         while (offset < num_bytes) {
5444                 mirror = 0;
5445 again:
5446                 read_len = num_bytes - offset;
5447                 /* read as much space once a time */
5448                 ret = read_extent_data(root, data + offset,
5449                                 bytenr + offset, &read_len, mirror);
5450                 if (ret)
5451                         goto out;
5452                 data_checked = 0;
5453                 /* verify every 4k data's checksum */
5454                 while (data_checked < read_len) {
5455                         csum = ~(u32)0;
5456                         tmp = offset + data_checked;
5457
5458                         csum = btrfs_csum_data(NULL, (char *)data + tmp,
5459                                                csum, root->sectorsize);
5460                         btrfs_csum_final(csum, (char *)&csum);
5461
5462                         csum_offset = leaf_offset +
5463                                  tmp / root->sectorsize * csum_size;
5464                         read_extent_buffer(eb, (char *)&csum_expected,
5465                                            csum_offset, csum_size);
5466                         /* try another mirror */
5467                         if (csum != csum_expected) {
5468                                 fprintf(stderr, "mirror %d bytenr %llu csum %u expected csum %u\n",
5469                                                 mirror, bytenr + tmp,
5470                                                 csum, csum_expected);
5471                                 num_copies = btrfs_num_copies(
5472                                                 &root->fs_info->mapping_tree,
5473                                                 bytenr, num_bytes);
5474                                 if (mirror < num_copies - 1) {
5475                                         mirror += 1;
5476                                         goto again;
5477                                 }
5478                         }
5479                         data_checked += root->sectorsize;
5480                 }
5481                 offset += read_len;
5482         }
5483 out:
5484         free(data);
5485         return ret;
5486 }
5487
5488 static int check_extent_exists(struct btrfs_root *root, u64 bytenr,
5489                                u64 num_bytes)
5490 {
5491         struct btrfs_path *path;
5492         struct extent_buffer *leaf;
5493         struct btrfs_key key;
5494         int ret;
5495
5496         path = btrfs_alloc_path();
5497         if (!path) {
5498                 fprintf(stderr, "Error allocing path\n");
5499                 return -ENOMEM;
5500         }
5501
5502         key.objectid = bytenr;
5503         key.type = BTRFS_EXTENT_ITEM_KEY;
5504         key.offset = (u64)-1;
5505
5506 again:
5507         ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, path,
5508                                 0, 0);
5509         if (ret < 0) {
5510                 fprintf(stderr, "Error looking up extent record %d\n", ret);
5511                 btrfs_free_path(path);
5512                 return ret;
5513         } else if (ret) {
5514                 if (path->slots[0] > 0) {
5515                         path->slots[0]--;
5516                 } else {
5517                         ret = btrfs_prev_leaf(root, path);
5518                         if (ret < 0) {
5519                                 goto out;
5520                         } else if (ret > 0) {
5521                                 ret = 0;
5522                                 goto out;
5523                         }
5524                 }
5525         }
5526
5527         btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
5528
5529         /*
5530          * Block group items come before extent items if they have the same
5531          * bytenr, so walk back one more just in case.  Dear future traveler,
5532          * first congrats on mastering time travel.  Now if it's not too much
5533          * trouble could you go back to 2006 and tell Chris to make the
5534          * BLOCK_GROUP_ITEM_KEY (and BTRFS_*_REF_KEY) lower than the
5535          * EXTENT_ITEM_KEY please?
5536          */
5537         while (key.type > BTRFS_EXTENT_ITEM_KEY) {
5538                 if (path->slots[0] > 0) {
5539                         path->slots[0]--;
5540                 } else {
5541                         ret = btrfs_prev_leaf(root, path);
5542                         if (ret < 0) {
5543                                 goto out;
5544                         } else if (ret > 0) {
5545                                 ret = 0;
5546                                 goto out;
5547                         }
5548                 }
5549                 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
5550         }
5551
5552         while (num_bytes) {
5553                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
5554                         ret = btrfs_next_leaf(root, path);
5555                         if (ret < 0) {
5556                                 fprintf(stderr, "Error going to next leaf "
5557                                         "%d\n", ret);
5558                                 btrfs_free_path(path);
5559                                 return ret;
5560                         } else if (ret) {
5561                                 break;
5562                         }
5563                 }
5564                 leaf = path->nodes[0];
5565                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
5566                 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
5567                         path->slots[0]++;
5568                         continue;
5569                 }
5570                 if (key.objectid + key.offset < bytenr) {
5571                         path->slots[0]++;
5572                         continue;
5573                 }
5574                 if (key.objectid > bytenr + num_bytes)
5575                         break;
5576
5577                 if (key.objectid == bytenr) {
5578                         if (key.offset >= num_bytes) {
5579                                 num_bytes = 0;
5580                                 break;
5581                         }
5582                         num_bytes -= key.offset;
5583                         bytenr += key.offset;
5584                 } else if (key.objectid < bytenr) {
5585                         if (key.objectid + key.offset >= bytenr + num_bytes) {
5586                                 num_bytes = 0;
5587                                 break;
5588                         }
5589                         num_bytes = (bytenr + num_bytes) -
5590                                 (key.objectid + key.offset);
5591                         bytenr = key.objectid + key.offset;
5592                 } else {
5593                         if (key.objectid + key.offset < bytenr + num_bytes) {
5594                                 u64 new_start = key.objectid + key.offset;
5595                                 u64 new_bytes = bytenr + num_bytes - new_start;
5596
5597                                 /*
5598                                  * Weird case, the extent is in the middle of
5599                                  * our range, we'll have to search one side
5600                                  * and then the other.  Not sure if this happens
5601                                  * in real life, but no harm in coding it up
5602                                  * anyway just in case.
5603                                  */
5604                                 btrfs_release_path(path);
5605                                 ret = check_extent_exists(root, new_start,
5606                                                           new_bytes);
5607                                 if (ret) {
5608                                         fprintf(stderr, "Right section didn't "
5609                                                 "have a record\n");
5610                                         break;
5611                                 }
5612                                 num_bytes = key.objectid - bytenr;
5613                                 goto again;
5614                         }
5615                         num_bytes = key.objectid - bytenr;
5616                 }
5617                 path->slots[0]++;
5618         }
5619         ret = 0;
5620
5621 out:
5622         if (num_bytes && !ret) {
5623                 fprintf(stderr, "There are no extents for csum range "
5624                         "%Lu-%Lu\n", bytenr, bytenr+num_bytes);
5625                 ret = 1;
5626         }
5627
5628         btrfs_free_path(path);
5629         return ret;
5630 }
5631
5632 static int check_csums(struct btrfs_root *root)
5633 {
5634         struct btrfs_path *path;
5635         struct extent_buffer *leaf;
5636         struct btrfs_key key;
5637         u64 offset = 0, num_bytes = 0;
5638         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
5639         int errors = 0;
5640         int ret;
5641         u64 data_len;
5642         unsigned long leaf_offset;
5643
5644         root = root->fs_info->csum_root;
5645         if (!extent_buffer_uptodate(root->node)) {
5646                 fprintf(stderr, "No valid csum tree found\n");
5647                 return -ENOENT;
5648         }
5649
5650         key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
5651         key.type = BTRFS_EXTENT_CSUM_KEY;
5652         key.offset = 0;
5653
5654         path = btrfs_alloc_path();
5655         if (!path)
5656                 return -ENOMEM;
5657
5658         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
5659         if (ret < 0) {
5660                 fprintf(stderr, "Error searching csum tree %d\n", ret);
5661                 btrfs_free_path(path);
5662                 return ret;
5663         }
5664
5665         if (ret > 0 && path->slots[0])
5666                 path->slots[0]--;
5667         ret = 0;
5668
5669         while (1) {
5670                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
5671                         ret = btrfs_next_leaf(root, path);
5672                         if (ret < 0) {
5673                                 fprintf(stderr, "Error going to next leaf "
5674                                         "%d\n", ret);
5675                                 break;
5676                         }
5677                         if (ret)
5678                                 break;
5679                 }
5680                 leaf = path->nodes[0];
5681
5682                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
5683                 if (key.type != BTRFS_EXTENT_CSUM_KEY) {
5684                         path->slots[0]++;
5685                         continue;
5686                 }
5687
5688                 data_len = (btrfs_item_size_nr(leaf, path->slots[0]) /
5689                               csum_size) * root->sectorsize;
5690                 if (!check_data_csum)
5691                         goto skip_csum_check;
5692                 leaf_offset = btrfs_item_ptr_offset(leaf, path->slots[0]);
5693                 ret = check_extent_csums(root, key.offset, data_len,
5694                                          leaf_offset, leaf);
5695                 if (ret)
5696                         break;
5697 skip_csum_check:
5698                 if (!num_bytes) {
5699                         offset = key.offset;
5700                 } else if (key.offset != offset + num_bytes) {
5701                         ret = check_extent_exists(root, offset, num_bytes);
5702                         if (ret) {
5703                                 fprintf(stderr, "Csum exists for %Lu-%Lu but "
5704                                         "there is no extent record\n",
5705                                         offset, offset+num_bytes);
5706                                 errors++;
5707                         }
5708                         offset = key.offset;
5709                         num_bytes = 0;
5710                 }
5711                 num_bytes += data_len;
5712                 path->slots[0]++;
5713         }
5714
5715         btrfs_free_path(path);
5716         return errors;
5717 }
5718
5719 static int is_dropped_key(struct btrfs_key *key,
5720                           struct btrfs_key *drop_key) {
5721         if (key->objectid < drop_key->objectid)
5722                 return 1;
5723         else if (key->objectid == drop_key->objectid) {
5724                 if (key->type < drop_key->type)
5725                         return 1;
5726                 else if (key->type == drop_key->type) {
5727                         if (key->offset < drop_key->offset)
5728                                 return 1;
5729                 }
5730         }
5731         return 0;
5732 }
5733
5734 /*
5735  * Here are the rules for FULL_BACKREF.
5736  *
5737  * 1) If BTRFS_HEADER_FLAG_RELOC is set then we have FULL_BACKREF set.
5738  * 2) If btrfs_header_owner(buf) no longer points to buf then we have
5739  *      FULL_BACKREF set.
5740  * 3) We cow'ed the block walking down a reloc tree.  This is impossible to tell
5741  *    if it happened after the relocation occurred since we'll have dropped the
5742  *    reloc root, so it's entirely possible to have FULL_BACKREF set on buf and
5743  *    have no real way to know for sure.
5744  *
5745  * We process the blocks one root at a time, and we start from the lowest root
5746  * objectid and go to the highest.  So we can just lookup the owner backref for
5747  * the record and if we don't find it then we know it doesn't exist and we have
5748  * a FULL BACKREF.
5749  *
5750  * FIXME: if we ever start reclaiming root objectid's then we need to fix this
5751  * assumption and simply indicate that we _think_ that the FULL BACKREF needs to
5752  * be set or not and then we can check later once we've gathered all the refs.
5753  */
5754 static int calc_extent_flag(struct btrfs_root *root,
5755                            struct cache_tree *extent_cache,
5756                            struct extent_buffer *buf,
5757                            struct root_item_record *ri,
5758                            u64 *flags)
5759 {
5760         struct extent_record *rec;
5761         struct cache_extent *cache;
5762         struct tree_backref *tback;
5763         u64 owner = 0;
5764
5765         cache = lookup_cache_extent(extent_cache, buf->start, 1);
5766         /* we have added this extent before */
5767         BUG_ON(!cache);
5768         rec = container_of(cache, struct extent_record, cache);
5769
5770         /*
5771          * Except file/reloc tree, we can not have
5772          * FULL BACKREF MODE
5773          */
5774         if (ri->objectid < BTRFS_FIRST_FREE_OBJECTID)
5775                 goto normal;
5776         /*
5777          * root node
5778          */
5779         if (buf->start == ri->bytenr)
5780                 goto normal;
5781
5782         if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
5783                 goto full_backref;
5784
5785         owner = btrfs_header_owner(buf);
5786         if (owner == ri->objectid)
5787                 goto normal;
5788
5789         tback = find_tree_backref(rec, 0, owner);
5790         if (!tback)
5791                 goto full_backref;
5792 normal:
5793         *flags = 0;
5794         if (rec->flag_block_full_backref != -1 &&
5795             rec->flag_block_full_backref != 0)
5796                 rec->bad_full_backref = 1;
5797         return 0;
5798 full_backref:
5799         *flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
5800         if (rec->flag_block_full_backref != -1 &&
5801             rec->flag_block_full_backref != 1)
5802                 rec->bad_full_backref = 1;
5803         return 0;
5804 }
5805
5806 static int run_next_block(struct btrfs_root *root,
5807                           struct block_info *bits,
5808                           int bits_nr,
5809                           u64 *last,
5810                           struct cache_tree *pending,
5811                           struct cache_tree *seen,
5812                           struct cache_tree *reada,
5813                           struct cache_tree *nodes,
5814                           struct cache_tree *extent_cache,
5815                           struct cache_tree *chunk_cache,
5816                           struct rb_root *dev_cache,
5817                           struct block_group_tree *block_group_cache,
5818                           struct device_extent_tree *dev_extent_cache,
5819                           struct root_item_record *ri)
5820 {
5821         struct extent_buffer *buf;
5822         struct extent_record *rec = NULL;
5823         u64 bytenr;
5824         u32 size;
5825         u64 parent;
5826         u64 owner;
5827         u64 flags;
5828         u64 ptr;
5829         u64 gen = 0;
5830         int ret = 0;
5831         int i;
5832         int nritems;
5833         struct btrfs_key key;
5834         struct cache_extent *cache;
5835         int reada_bits;
5836
5837         nritems = pick_next_pending(pending, reada, nodes, *last, bits,
5838                                     bits_nr, &reada_bits);
5839         if (nritems == 0)
5840                 return 1;
5841
5842         if (!reada_bits) {
5843                 for(i = 0; i < nritems; i++) {
5844                         ret = add_cache_extent(reada, bits[i].start,
5845                                                bits[i].size);
5846                         if (ret == -EEXIST)
5847                                 continue;
5848
5849                         /* fixme, get the parent transid */
5850                         readahead_tree_block(root, bits[i].start,
5851                                              bits[i].size, 0);
5852                 }
5853         }
5854         *last = bits[0].start;
5855         bytenr = bits[0].start;
5856         size = bits[0].size;
5857
5858         cache = lookup_cache_extent(pending, bytenr, size);
5859         if (cache) {
5860                 remove_cache_extent(pending, cache);
5861                 free(cache);
5862         }
5863         cache = lookup_cache_extent(reada, bytenr, size);
5864         if (cache) {
5865                 remove_cache_extent(reada, cache);
5866                 free(cache);
5867         }
5868         cache = lookup_cache_extent(nodes, bytenr, size);
5869         if (cache) {
5870                 remove_cache_extent(nodes, cache);
5871                 free(cache);
5872         }
5873         cache = lookup_cache_extent(extent_cache, bytenr, size);
5874         if (cache) {
5875                 rec = container_of(cache, struct extent_record, cache);
5876                 gen = rec->parent_generation;
5877         }
5878
5879         /* fixme, get the real parent transid */
5880         buf = read_tree_block(root, bytenr, size, gen);
5881         if (!extent_buffer_uptodate(buf)) {
5882                 record_bad_block_io(root->fs_info,
5883                                     extent_cache, bytenr, size);
5884                 goto out;
5885         }
5886
5887         nritems = btrfs_header_nritems(buf);
5888
5889         flags = 0;
5890         if (!init_extent_tree) {
5891                 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
5892                                        btrfs_header_level(buf), 1, NULL,
5893                                        &flags);
5894                 if (ret < 0) {
5895                         ret = calc_extent_flag(root, extent_cache, buf, ri, &flags);
5896                         if (ret < 0) {
5897                                 fprintf(stderr, "Couldn't calc extent flags\n");
5898                                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
5899                         }
5900                 }
5901         } else {
5902                 flags = 0;
5903                 ret = calc_extent_flag(root, extent_cache, buf, ri, &flags);
5904                 if (ret < 0) {
5905                         fprintf(stderr, "Couldn't calc extent flags\n");
5906                         flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
5907                 }
5908         }
5909
5910         if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
5911                 if (ri != NULL &&
5912                     ri->objectid != BTRFS_TREE_RELOC_OBJECTID &&
5913                     ri->objectid == btrfs_header_owner(buf)) {
5914                         /*
5915                          * Ok we got to this block from it's original owner and
5916                          * we have FULL_BACKREF set.  Relocation can leave
5917                          * converted blocks over so this is altogether possible,
5918                          * however it's not possible if the generation > the
5919                          * last snapshot, so check for this case.
5920                          */
5921                         if (!btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC) &&
5922                             btrfs_header_generation(buf) > ri->last_snapshot) {
5923                                 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
5924                                 rec->bad_full_backref = 1;
5925                         }
5926                 }
5927         } else {
5928                 if (ri != NULL &&
5929                     (ri->objectid == BTRFS_TREE_RELOC_OBJECTID ||
5930                      btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))) {
5931                         flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
5932                         rec->bad_full_backref = 1;
5933                 }
5934         }
5935
5936         if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
5937                 rec->flag_block_full_backref = 1;
5938                 parent = bytenr;
5939                 owner = 0;
5940         } else {
5941                 rec->flag_block_full_backref = 0;
5942                 parent = 0;
5943                 owner = btrfs_header_owner(buf);
5944         }
5945
5946         ret = check_block(root, extent_cache, buf, flags);
5947         if (ret)
5948                 goto out;
5949
5950         if (btrfs_is_leaf(buf)) {
5951                 btree_space_waste += btrfs_leaf_free_space(root, buf);
5952                 for (i = 0; i < nritems; i++) {
5953                         struct btrfs_file_extent_item *fi;
5954                         btrfs_item_key_to_cpu(buf, &key, i);
5955                         if (key.type == BTRFS_EXTENT_ITEM_KEY) {
5956                                 process_extent_item(root, extent_cache, buf,
5957                                                     i);
5958                                 continue;
5959                         }
5960                         if (key.type == BTRFS_METADATA_ITEM_KEY) {
5961                                 process_extent_item(root, extent_cache, buf,
5962                                                     i);
5963                                 continue;
5964                         }
5965                         if (key.type == BTRFS_EXTENT_CSUM_KEY) {
5966                                 total_csum_bytes +=
5967                                         btrfs_item_size_nr(buf, i);
5968                                 continue;
5969                         }
5970                         if (key.type == BTRFS_CHUNK_ITEM_KEY) {
5971                                 process_chunk_item(chunk_cache, &key, buf, i);
5972                                 continue;
5973                         }
5974                         if (key.type == BTRFS_DEV_ITEM_KEY) {
5975                                 process_device_item(dev_cache, &key, buf, i);
5976                                 continue;
5977                         }
5978                         if (key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
5979                                 process_block_group_item(block_group_cache,
5980                                         &key, buf, i);
5981                                 continue;
5982                         }
5983                         if (key.type == BTRFS_DEV_EXTENT_KEY) {
5984                                 process_device_extent_item(dev_extent_cache,
5985                                         &key, buf, i);
5986                                 continue;
5987
5988                         }
5989                         if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
5990 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
5991                                 process_extent_ref_v0(extent_cache, buf, i);
5992 #else
5993                                 BUG();
5994 #endif
5995                                 continue;
5996                         }
5997
5998                         if (key.type == BTRFS_TREE_BLOCK_REF_KEY) {
5999                                 add_tree_backref(extent_cache, key.objectid, 0,
6000                                                  key.offset, 0);
6001                                 continue;
6002                         }
6003                         if (key.type == BTRFS_SHARED_BLOCK_REF_KEY) {
6004                                 add_tree_backref(extent_cache, key.objectid,
6005                                                  key.offset, 0, 0);
6006                                 continue;
6007                         }
6008                         if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
6009                                 struct btrfs_extent_data_ref *ref;
6010                                 ref = btrfs_item_ptr(buf, i,
6011                                                 struct btrfs_extent_data_ref);
6012                                 add_data_backref(extent_cache,
6013                                         key.objectid, 0,
6014                                         btrfs_extent_data_ref_root(buf, ref),
6015                                         btrfs_extent_data_ref_objectid(buf,
6016                                                                        ref),
6017                                         btrfs_extent_data_ref_offset(buf, ref),
6018                                         btrfs_extent_data_ref_count(buf, ref),
6019                                         0, root->sectorsize);
6020                                 continue;
6021                         }
6022                         if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
6023                                 struct btrfs_shared_data_ref *ref;
6024                                 ref = btrfs_item_ptr(buf, i,
6025                                                 struct btrfs_shared_data_ref);
6026                                 add_data_backref(extent_cache,
6027                                         key.objectid, key.offset, 0, 0, 0,
6028                                         btrfs_shared_data_ref_count(buf, ref),
6029                                         0, root->sectorsize);
6030                                 continue;
6031                         }
6032                         if (key.type == BTRFS_ORPHAN_ITEM_KEY) {
6033                                 struct bad_item *bad;
6034
6035                                 if (key.objectid == BTRFS_ORPHAN_OBJECTID)
6036                                         continue;
6037                                 if (!owner)
6038                                         continue;
6039                                 bad = malloc(sizeof(struct bad_item));
6040                                 if (!bad)
6041                                         continue;
6042                                 INIT_LIST_HEAD(&bad->list);
6043                                 memcpy(&bad->key, &key,
6044                                        sizeof(struct btrfs_key));
6045                                 bad->root_id = owner;
6046                                 list_add_tail(&bad->list, &delete_items);
6047                                 continue;
6048                         }
6049                         if (key.type != BTRFS_EXTENT_DATA_KEY)
6050                                 continue;
6051                         fi = btrfs_item_ptr(buf, i,
6052                                             struct btrfs_file_extent_item);
6053                         if (btrfs_file_extent_type(buf, fi) ==
6054                             BTRFS_FILE_EXTENT_INLINE)
6055                                 continue;
6056                         if (btrfs_file_extent_disk_bytenr(buf, fi) == 0)
6057                                 continue;
6058
6059                         data_bytes_allocated +=
6060                                 btrfs_file_extent_disk_num_bytes(buf, fi);
6061                         if (data_bytes_allocated < root->sectorsize) {
6062                                 abort();
6063                         }
6064                         data_bytes_referenced +=
6065                                 btrfs_file_extent_num_bytes(buf, fi);
6066                         add_data_backref(extent_cache,
6067                                 btrfs_file_extent_disk_bytenr(buf, fi),
6068                                 parent, owner, key.objectid, key.offset -
6069                                 btrfs_file_extent_offset(buf, fi), 1, 1,
6070                                 btrfs_file_extent_disk_num_bytes(buf, fi));
6071                 }
6072         } else {
6073                 int level;
6074                 struct btrfs_key first_key;
6075
6076                 first_key.objectid = 0;
6077
6078                 if (nritems > 0)
6079                         btrfs_item_key_to_cpu(buf, &first_key, 0);
6080                 level = btrfs_header_level(buf);
6081                 for (i = 0; i < nritems; i++) {
6082                         ptr = btrfs_node_blockptr(buf, i);
6083                         size = btrfs_level_size(root, level - 1);
6084                         btrfs_node_key_to_cpu(buf, &key, i);
6085                         if (ri != NULL) {
6086                                 if ((level == ri->drop_level)
6087                                     && is_dropped_key(&key, &ri->drop_key)) {
6088                                         continue;
6089                                 }
6090                         }
6091                         ret = add_extent_rec(extent_cache, &key,
6092                                              btrfs_node_ptr_generation(buf, i),
6093                                              ptr, size, 0, 0, 1, 0, 1, 0,
6094                                              size);
6095                         BUG_ON(ret);
6096
6097                         add_tree_backref(extent_cache, ptr, parent, owner, 1);
6098
6099                         if (level > 1) {
6100                                 add_pending(nodes, seen, ptr, size);
6101                         } else {
6102                                 add_pending(pending, seen, ptr, size);
6103                         }
6104                 }
6105                 btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) -
6106                                       nritems) * sizeof(struct btrfs_key_ptr);
6107         }
6108         total_btree_bytes += buf->len;
6109         if (fs_root_objectid(btrfs_header_owner(buf)))
6110                 total_fs_tree_bytes += buf->len;
6111         if (btrfs_header_owner(buf) == BTRFS_EXTENT_TREE_OBJECTID)
6112                 total_extent_tree_bytes += buf->len;
6113         if (!found_old_backref &&
6114             btrfs_header_owner(buf) == BTRFS_TREE_RELOC_OBJECTID &&
6115             btrfs_header_backref_rev(buf) == BTRFS_MIXED_BACKREF_REV &&
6116             !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
6117                 found_old_backref = 1;
6118 out:
6119         free_extent_buffer(buf);
6120         return ret;
6121 }
6122
6123 static int add_root_to_pending(struct extent_buffer *buf,
6124                                struct cache_tree *extent_cache,
6125                                struct cache_tree *pending,
6126                                struct cache_tree *seen,
6127                                struct cache_tree *nodes,
6128                                u64 objectid)
6129 {
6130         if (btrfs_header_level(buf) > 0)
6131                 add_pending(nodes, seen, buf->start, buf->len);
6132         else
6133                 add_pending(pending, seen, buf->start, buf->len);
6134         add_extent_rec(extent_cache, NULL, 0, buf->start, buf->len,
6135                        0, 1, 1, 0, 1, 0, buf->len);
6136
6137         if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
6138             btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
6139                 add_tree_backref(extent_cache, buf->start, buf->start,
6140                                  0, 1);
6141         else
6142                 add_tree_backref(extent_cache, buf->start, 0, objectid, 1);
6143         return 0;
6144 }
6145
6146 /* as we fix the tree, we might be deleting blocks that
6147  * we're tracking for repair.  This hook makes sure we
6148  * remove any backrefs for blocks as we are fixing them.
6149  */
6150 static int free_extent_hook(struct btrfs_trans_handle *trans,
6151                             struct btrfs_root *root,
6152                             u64 bytenr, u64 num_bytes, u64 parent,
6153                             u64 root_objectid, u64 owner, u64 offset,
6154                             int refs_to_drop)
6155 {
6156         struct extent_record *rec;
6157         struct cache_extent *cache;
6158         int is_data;
6159         struct cache_tree *extent_cache = root->fs_info->fsck_extent_cache;
6160
6161         is_data = owner >= BTRFS_FIRST_FREE_OBJECTID;
6162         cache = lookup_cache_extent(extent_cache, bytenr, num_bytes);
6163         if (!cache)
6164                 return 0;
6165
6166         rec = container_of(cache, struct extent_record, cache);
6167         if (is_data) {
6168                 struct data_backref *back;
6169                 back = find_data_backref(rec, parent, root_objectid, owner,
6170                                          offset, 1, bytenr, num_bytes);
6171                 if (!back)
6172                         goto out;
6173                 if (back->node.found_ref) {
6174                         back->found_ref -= refs_to_drop;
6175                         if (rec->refs)
6176                                 rec->refs -= refs_to_drop;
6177                 }
6178                 if (back->node.found_extent_tree) {
6179                         back->num_refs -= refs_to_drop;
6180                         if (rec->extent_item_refs)
6181                                 rec->extent_item_refs -= refs_to_drop;
6182                 }
6183                 if (back->found_ref == 0)
6184                         back->node.found_ref = 0;
6185                 if (back->num_refs == 0)
6186                         back->node.found_extent_tree = 0;
6187
6188                 if (!back->node.found_extent_tree && back->node.found_ref) {
6189                         list_del(&back->node.list);
6190                         free(back);
6191                 }
6192         } else {
6193                 struct tree_backref *back;
6194                 back = find_tree_backref(rec, parent, root_objectid);
6195                 if (!back)
6196                         goto out;
6197                 if (back->node.found_ref) {
6198                         if (rec->refs)
6199                                 rec->refs--;
6200                         back->node.found_ref = 0;
6201                 }
6202                 if (back->node.found_extent_tree) {
6203                         if (rec->extent_item_refs)
6204                                 rec->extent_item_refs--;
6205                         back->node.found_extent_tree = 0;
6206                 }
6207                 if (!back->node.found_extent_tree && back->node.found_ref) {
6208                         list_del(&back->node.list);
6209                         free(back);
6210                 }
6211         }
6212         maybe_free_extent_rec(extent_cache, rec);
6213 out:
6214         return 0;
6215 }
6216
6217 static int delete_extent_records(struct btrfs_trans_handle *trans,
6218                                  struct btrfs_root *root,
6219                                  struct btrfs_path *path,
6220                                  u64 bytenr, u64 new_len)
6221 {
6222         struct btrfs_key key;
6223         struct btrfs_key found_key;
6224         struct extent_buffer *leaf;
6225         int ret;
6226         int slot;
6227
6228
6229         key.objectid = bytenr;
6230         key.type = (u8)-1;
6231         key.offset = (u64)-1;
6232
6233         while(1) {
6234                 ret = btrfs_search_slot(trans, root->fs_info->extent_root,
6235                                         &key, path, 0, 1);
6236                 if (ret < 0)
6237                         break;
6238
6239                 if (ret > 0) {
6240                         ret = 0;
6241                         if (path->slots[0] == 0)
6242                                 break;
6243                         path->slots[0]--;
6244                 }
6245                 ret = 0;
6246
6247                 leaf = path->nodes[0];
6248                 slot = path->slots[0];
6249
6250                 btrfs_item_key_to_cpu(leaf, &found_key, slot);
6251                 if (found_key.objectid != bytenr)
6252                         break;
6253
6254                 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
6255                     found_key.type != BTRFS_METADATA_ITEM_KEY &&
6256                     found_key.type != BTRFS_TREE_BLOCK_REF_KEY &&
6257                     found_key.type != BTRFS_EXTENT_DATA_REF_KEY &&
6258                     found_key.type != BTRFS_EXTENT_REF_V0_KEY &&
6259                     found_key.type != BTRFS_SHARED_BLOCK_REF_KEY &&
6260                     found_key.type != BTRFS_SHARED_DATA_REF_KEY) {
6261                         btrfs_release_path(path);
6262                         if (found_key.type == 0) {
6263                                 if (found_key.offset == 0)
6264                                         break;
6265                                 key.offset = found_key.offset - 1;
6266                                 key.type = found_key.type;
6267                         }
6268                         key.type = found_key.type - 1;
6269                         key.offset = (u64)-1;
6270                         continue;
6271                 }
6272
6273                 fprintf(stderr, "repair deleting extent record: key %Lu %u %Lu\n",
6274                         found_key.objectid, found_key.type, found_key.offset);
6275
6276                 ret = btrfs_del_item(trans, root->fs_info->extent_root, path);
6277                 if (ret)
6278                         break;
6279                 btrfs_release_path(path);
6280
6281                 if (found_key.type == BTRFS_EXTENT_ITEM_KEY ||
6282                     found_key.type == BTRFS_METADATA_ITEM_KEY) {
6283                         u64 bytes = (found_key.type == BTRFS_EXTENT_ITEM_KEY) ?
6284                                 found_key.offset : root->leafsize;
6285
6286                         ret = btrfs_update_block_group(trans, root, bytenr,
6287                                                        bytes, 0, 0);
6288                         if (ret)
6289                                 break;
6290                 }
6291         }
6292
6293         btrfs_release_path(path);
6294         return ret;
6295 }
6296
6297 /*
6298  * for a single backref, this will allocate a new extent
6299  * and add the backref to it.
6300  */
6301 static int record_extent(struct btrfs_trans_handle *trans,
6302                          struct btrfs_fs_info *info,
6303                          struct btrfs_path *path,
6304                          struct extent_record *rec,
6305                          struct extent_backref *back,
6306                          int allocated, u64 flags)
6307 {
6308         int ret;
6309         struct btrfs_root *extent_root = info->extent_root;
6310         struct extent_buffer *leaf;
6311         struct btrfs_key ins_key;
6312         struct btrfs_extent_item *ei;
6313         struct tree_backref *tback;
6314         struct data_backref *dback;
6315         struct btrfs_tree_block_info *bi;
6316
6317         if (!back->is_data)
6318                 rec->max_size = max_t(u64, rec->max_size,
6319                                     info->extent_root->leafsize);
6320
6321         if (!allocated) {
6322                 u32 item_size = sizeof(*ei);
6323
6324                 if (!back->is_data)
6325                         item_size += sizeof(*bi);
6326
6327                 ins_key.objectid = rec->start;
6328                 ins_key.offset = rec->max_size;
6329                 ins_key.type = BTRFS_EXTENT_ITEM_KEY;
6330
6331                 ret = btrfs_insert_empty_item(trans, extent_root, path,
6332                                         &ins_key, item_size);
6333                 if (ret)
6334                         goto fail;
6335
6336                 leaf = path->nodes[0];
6337                 ei = btrfs_item_ptr(leaf, path->slots[0],
6338                                     struct btrfs_extent_item);
6339
6340                 btrfs_set_extent_refs(leaf, ei, 0);
6341                 btrfs_set_extent_generation(leaf, ei, rec->generation);
6342
6343                 if (back->is_data) {
6344                         btrfs_set_extent_flags(leaf, ei,
6345                                                BTRFS_EXTENT_FLAG_DATA);
6346                 } else {
6347                         struct btrfs_disk_key copy_key;;
6348
6349                         tback = (struct tree_backref *)back;
6350                         bi = (struct btrfs_tree_block_info *)(ei + 1);
6351                         memset_extent_buffer(leaf, 0, (unsigned long)bi,
6352                                              sizeof(*bi));
6353
6354                         btrfs_set_disk_key_objectid(&copy_key,
6355                                                     rec->info_objectid);
6356                         btrfs_set_disk_key_type(&copy_key, 0);
6357                         btrfs_set_disk_key_offset(&copy_key, 0);
6358
6359                         btrfs_set_tree_block_level(leaf, bi, rec->info_level);
6360                         btrfs_set_tree_block_key(leaf, bi, &copy_key);
6361
6362                         btrfs_set_extent_flags(leaf, ei,
6363                                                BTRFS_EXTENT_FLAG_TREE_BLOCK | flags);
6364                 }
6365
6366                 btrfs_mark_buffer_dirty(leaf);
6367                 ret = btrfs_update_block_group(trans, extent_root, rec->start,
6368                                                rec->max_size, 1, 0);
6369                 if (ret)
6370                         goto fail;
6371                 btrfs_release_path(path);
6372         }
6373
6374         if (back->is_data) {
6375                 u64 parent;
6376                 int i;
6377
6378                 dback = (struct data_backref *)back;
6379                 if (back->full_backref)
6380                         parent = dback->parent;
6381                 else
6382                         parent = 0;
6383
6384                 for (i = 0; i < dback->found_ref; i++) {
6385                         /* if parent != 0, we're doing a full backref
6386                          * passing BTRFS_FIRST_FREE_OBJECTID as the owner
6387                          * just makes the backref allocator create a data
6388                          * backref
6389                          */
6390                         ret = btrfs_inc_extent_ref(trans, info->extent_root,
6391                                                    rec->start, rec->max_size,
6392                                                    parent,
6393                                                    dback->root,
6394                                                    parent ?
6395                                                    BTRFS_FIRST_FREE_OBJECTID :
6396                                                    dback->owner,
6397                                                    dback->offset);
6398                         if (ret)
6399                                 break;
6400                 }
6401                 fprintf(stderr, "adding new data backref"
6402                                 " on %llu %s %llu owner %llu"
6403                                 " offset %llu found %d\n",
6404                                 (unsigned long long)rec->start,
6405                                 back->full_backref ?
6406                                 "parent" : "root",
6407                                 back->full_backref ?
6408                                 (unsigned long long)parent :
6409                                 (unsigned long long)dback->root,
6410                                 (unsigned long long)dback->owner,
6411                                 (unsigned long long)dback->offset,
6412                                 dback->found_ref);
6413         } else {
6414                 u64 parent;
6415
6416                 tback = (struct tree_backref *)back;
6417                 if (back->full_backref)
6418                         parent = tback->parent;
6419                 else
6420                         parent = 0;
6421
6422                 ret = btrfs_inc_extent_ref(trans, info->extent_root,
6423                                            rec->start, rec->max_size,
6424                                            parent, tback->root, 0, 0);
6425                 fprintf(stderr, "adding new tree backref on "
6426                         "start %llu len %llu parent %llu root %llu\n",
6427                         rec->start, rec->max_size, parent, tback->root);
6428         }
6429         if (ret)
6430                 goto fail;
6431 fail:
6432         btrfs_release_path(path);
6433         return ret;
6434 }
6435
6436 struct extent_entry {
6437         u64 bytenr;
6438         u64 bytes;
6439         int count;
6440         int broken;
6441         struct list_head list;
6442 };
6443
6444 static struct extent_entry *find_entry(struct list_head *entries,
6445                                        u64 bytenr, u64 bytes)
6446 {
6447         struct extent_entry *entry = NULL;
6448
6449         list_for_each_entry(entry, entries, list) {
6450                 if (entry->bytenr == bytenr && entry->bytes == bytes)
6451                         return entry;
6452         }
6453
6454         return NULL;
6455 }
6456
6457 static struct extent_entry *find_most_right_entry(struct list_head *entries)
6458 {
6459         struct extent_entry *entry, *best = NULL, *prev = NULL;
6460
6461         list_for_each_entry(entry, entries, list) {
6462                 if (!prev) {
6463                         prev = entry;
6464                         continue;
6465                 }
6466
6467                 /*
6468                  * If there are as many broken entries as entries then we know
6469                  * not to trust this particular entry.
6470                  */
6471                 if (entry->broken == entry->count)
6472                         continue;
6473
6474                 /*
6475                  * If our current entry == best then we can't be sure our best
6476                  * is really the best, so we need to keep searching.
6477                  */
6478                 if (best && best->count == entry->count) {
6479                         prev = entry;
6480                         best = NULL;
6481                         continue;
6482                 }
6483
6484                 /* Prev == entry, not good enough, have to keep searching */
6485                 if (!prev->broken && prev->count == entry->count)
6486                         continue;
6487
6488                 if (!best)
6489                         best = (prev->count > entry->count) ? prev : entry;
6490                 else if (best->count < entry->count)
6491                         best = entry;
6492                 prev = entry;
6493         }
6494
6495         return best;
6496 }
6497
6498 static int repair_ref(struct btrfs_fs_info *info, struct btrfs_path *path,
6499                       struct data_backref *dback, struct extent_entry *entry)
6500 {
6501         struct btrfs_trans_handle *trans;
6502         struct btrfs_root *root;
6503         struct btrfs_file_extent_item *fi;
6504         struct extent_buffer *leaf;
6505         struct btrfs_key key;
6506         u64 bytenr, bytes;
6507         int ret, err;
6508
6509         key.objectid = dback->root;
6510         key.type = BTRFS_ROOT_ITEM_KEY;
6511         key.offset = (u64)-1;
6512         root = btrfs_read_fs_root(info, &key);
6513         if (IS_ERR(root)) {
6514                 fprintf(stderr, "Couldn't find root for our ref\n");
6515                 return -EINVAL;
6516         }
6517
6518         /*
6519          * The backref points to the original offset of the extent if it was
6520          * split, so we need to search down to the offset we have and then walk
6521          * forward until we find the backref we're looking for.
6522          */
6523         key.objectid = dback->owner;
6524         key.type = BTRFS_EXTENT_DATA_KEY;
6525         key.offset = dback->offset;
6526         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
6527         if (ret < 0) {
6528                 fprintf(stderr, "Error looking up ref %d\n", ret);
6529                 return ret;
6530         }
6531
6532         while (1) {
6533                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
6534                         ret = btrfs_next_leaf(root, path);
6535                         if (ret) {
6536                                 fprintf(stderr, "Couldn't find our ref, next\n");
6537                                 return -EINVAL;
6538                         }
6539                 }
6540                 leaf = path->nodes[0];
6541                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
6542                 if (key.objectid != dback->owner ||
6543                     key.type != BTRFS_EXTENT_DATA_KEY) {
6544                         fprintf(stderr, "Couldn't find our ref, search\n");
6545                         return -EINVAL;
6546                 }
6547                 fi = btrfs_item_ptr(leaf, path->slots[0],
6548                                     struct btrfs_file_extent_item);
6549                 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
6550                 bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
6551
6552                 if (bytenr == dback->disk_bytenr && bytes == dback->bytes)
6553                         break;
6554                 path->slots[0]++;
6555         }
6556
6557         btrfs_release_path(path);
6558
6559         trans = btrfs_start_transaction(root, 1);
6560         if (IS_ERR(trans))
6561                 return PTR_ERR(trans);
6562
6563         /*
6564          * Ok we have the key of the file extent we want to fix, now we can cow
6565          * down to the thing and fix it.
6566          */
6567         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
6568         if (ret < 0) {
6569                 fprintf(stderr, "Error cowing down to ref [%Lu, %u, %Lu]: %d\n",
6570                         key.objectid, key.type, key.offset, ret);
6571                 goto out;
6572         }
6573         if (ret > 0) {
6574                 fprintf(stderr, "Well that's odd, we just found this key "
6575                         "[%Lu, %u, %Lu]\n", key.objectid, key.type,
6576                         key.offset);
6577                 ret = -EINVAL;
6578                 goto out;
6579         }
6580         leaf = path->nodes[0];
6581         fi = btrfs_item_ptr(leaf, path->slots[0],
6582                             struct btrfs_file_extent_item);
6583
6584         if (btrfs_file_extent_compression(leaf, fi) &&
6585             dback->disk_bytenr != entry->bytenr) {
6586                 fprintf(stderr, "Ref doesn't match the record start and is "
6587                         "compressed, please take a btrfs-image of this file "
6588                         "system and send it to a btrfs developer so they can "
6589                         "complete this functionality for bytenr %Lu\n",
6590                         dback->disk_bytenr);
6591                 ret = -EINVAL;
6592                 goto out;
6593         }
6594
6595         if (dback->node.broken && dback->disk_bytenr != entry->bytenr) {
6596                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
6597         } else if (dback->disk_bytenr > entry->bytenr) {
6598                 u64 off_diff, offset;
6599
6600                 off_diff = dback->disk_bytenr - entry->bytenr;
6601                 offset = btrfs_file_extent_offset(leaf, fi);
6602                 if (dback->disk_bytenr + offset +
6603                     btrfs_file_extent_num_bytes(leaf, fi) >
6604                     entry->bytenr + entry->bytes) {
6605                         fprintf(stderr, "Ref is past the entry end, please "
6606                                 "take a btrfs-image of this file system and "
6607                                 "send it to a btrfs developer, ref %Lu\n",
6608                                 dback->disk_bytenr);
6609                         ret = -EINVAL;
6610                         goto out;
6611                 }
6612                 offset += off_diff;
6613                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
6614                 btrfs_set_file_extent_offset(leaf, fi, offset);
6615         } else if (dback->disk_bytenr < entry->bytenr) {
6616                 u64 offset;
6617
6618                 offset = btrfs_file_extent_offset(leaf, fi);
6619                 if (dback->disk_bytenr + offset < entry->bytenr) {
6620                         fprintf(stderr, "Ref is before the entry start, please"
6621                                 " take a btrfs-image of this file system and "
6622                                 "send it to a btrfs developer, ref %Lu\n",
6623                                 dback->disk_bytenr);
6624                         ret = -EINVAL;
6625                         goto out;
6626                 }
6627
6628                 offset += dback->disk_bytenr;
6629                 offset -= entry->bytenr;
6630                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
6631                 btrfs_set_file_extent_offset(leaf, fi, offset);
6632         }
6633
6634         btrfs_set_file_extent_disk_num_bytes(leaf, fi, entry->bytes);
6635
6636         /*
6637          * Chances are if disk_num_bytes were wrong then so is ram_bytes, but
6638          * only do this if we aren't using compression, otherwise it's a
6639          * trickier case.
6640          */
6641         if (!btrfs_file_extent_compression(leaf, fi))
6642                 btrfs_set_file_extent_ram_bytes(leaf, fi, entry->bytes);
6643         else
6644                 printf("ram bytes may be wrong?\n");
6645         btrfs_mark_buffer_dirty(leaf);
6646 out:
6647         err = btrfs_commit_transaction(trans, root);
6648         btrfs_release_path(path);
6649         return ret ? ret : err;
6650 }
6651
6652 static int verify_backrefs(struct btrfs_fs_info *info, struct btrfs_path *path,
6653                            struct extent_record *rec)
6654 {
6655         struct extent_backref *back;
6656         struct data_backref *dback;
6657         struct extent_entry *entry, *best = NULL;
6658         LIST_HEAD(entries);
6659         int nr_entries = 0;
6660         int broken_entries = 0;
6661         int ret = 0;
6662         short mismatch = 0;
6663
6664         /*
6665          * Metadata is easy and the backrefs should always agree on bytenr and
6666          * size, if not we've got bigger issues.
6667          */
6668         if (rec->metadata)
6669                 return 0;
6670
6671         list_for_each_entry(back, &rec->backrefs, list) {
6672                 if (back->full_backref || !back->is_data)
6673                         continue;
6674
6675                 dback = (struct data_backref *)back;
6676
6677                 /*
6678                  * We only pay attention to backrefs that we found a real
6679                  * backref for.
6680                  */
6681                 if (dback->found_ref == 0)
6682                         continue;
6683
6684                 /*
6685                  * For now we only catch when the bytes don't match, not the
6686                  * bytenr.  We can easily do this at the same time, but I want
6687                  * to have a fs image to test on before we just add repair
6688                  * functionality willy-nilly so we know we won't screw up the
6689                  * repair.
6690                  */
6691
6692                 entry = find_entry(&entries, dback->disk_bytenr,
6693                                    dback->bytes);
6694                 if (!entry) {
6695                         entry = malloc(sizeof(struct extent_entry));
6696                         if (!entry) {
6697                                 ret = -ENOMEM;
6698                                 goto out;
6699                         }
6700                         memset(entry, 0, sizeof(*entry));
6701                         entry->bytenr = dback->disk_bytenr;
6702                         entry->bytes = dback->bytes;
6703                         list_add_tail(&entry->list, &entries);
6704                         nr_entries++;
6705                 }
6706
6707                 /*
6708                  * If we only have on entry we may think the entries agree when
6709                  * in reality they don't so we have to do some extra checking.
6710                  */
6711                 if (dback->disk_bytenr != rec->start ||
6712                     dback->bytes != rec->nr || back->broken)
6713                         mismatch = 1;
6714
6715                 if (back->broken) {
6716                         entry->broken++;
6717                         broken_entries++;
6718                 }
6719
6720                 entry->count++;
6721         }
6722
6723         /* Yay all the backrefs agree, carry on good sir */
6724         if (nr_entries <= 1 && !mismatch)
6725                 goto out;
6726
6727         fprintf(stderr, "attempting to repair backref discrepency for bytenr "
6728                 "%Lu\n", rec->start);
6729
6730         /*
6731          * First we want to see if the backrefs can agree amongst themselves who
6732          * is right, so figure out which one of the entries has the highest
6733          * count.
6734          */
6735         best = find_most_right_entry(&entries);
6736
6737         /*
6738          * Ok so we may have an even split between what the backrefs think, so
6739          * this is where we use the extent ref to see what it thinks.
6740          */
6741         if (!best) {
6742                 entry = find_entry(&entries, rec->start, rec->nr);
6743                 if (!entry && (!broken_entries || !rec->found_rec)) {
6744                         fprintf(stderr, "Backrefs don't agree with each other "
6745                                 "and extent record doesn't agree with anybody,"
6746                                 " so we can't fix bytenr %Lu bytes %Lu\n",
6747                                 rec->start, rec->nr);
6748                         ret = -EINVAL;
6749                         goto out;
6750                 } else if (!entry) {
6751                         /*
6752                          * Ok our backrefs were broken, we'll assume this is the
6753                          * correct value and add an entry for this range.
6754                          */
6755                         entry = malloc(sizeof(struct extent_entry));
6756                         if (!entry) {
6757                                 ret = -ENOMEM;
6758                                 goto out;
6759                         }
6760                         memset(entry, 0, sizeof(*entry));
6761                         entry->bytenr = rec->start;
6762                         entry->bytes = rec->nr;
6763                         list_add_tail(&entry->list, &entries);
6764                         nr_entries++;
6765                 }
6766                 entry->count++;
6767                 best = find_most_right_entry(&entries);
6768                 if (!best) {
6769                         fprintf(stderr, "Backrefs and extent record evenly "
6770                                 "split on who is right, this is going to "
6771                                 "require user input to fix bytenr %Lu bytes "
6772                                 "%Lu\n", rec->start, rec->nr);
6773                         ret = -EINVAL;
6774                         goto out;
6775                 }
6776         }
6777
6778         /*
6779          * I don't think this can happen currently as we'll abort() if we catch
6780          * this case higher up, but in case somebody removes that we still can't
6781          * deal with it properly here yet, so just bail out of that's the case.
6782          */
6783         if (best->bytenr != rec->start) {
6784                 fprintf(stderr, "Extent start and backref starts don't match, "
6785                         "please use btrfs-image on this file system and send "
6786                         "it to a btrfs developer so they can make fsck fix "
6787                         "this particular case.  bytenr is %Lu, bytes is %Lu\n",
6788                         rec->start, rec->nr);
6789                 ret = -EINVAL;
6790                 goto out;
6791         }
6792
6793         /*
6794          * Ok great we all agreed on an extent record, let's go find the real
6795          * references and fix up the ones that don't match.
6796          */
6797         list_for_each_entry(back, &rec->backrefs, list) {
6798                 if (back->full_backref || !back->is_data)
6799                         continue;
6800
6801                 dback = (struct data_backref *)back;
6802
6803                 /*
6804                  * Still ignoring backrefs that don't have a real ref attached
6805                  * to them.
6806                  */
6807                 if (dback->found_ref == 0)
6808                         continue;
6809
6810                 if (dback->bytes == best->bytes &&
6811                     dback->disk_bytenr == best->bytenr)
6812                         continue;
6813
6814                 ret = repair_ref(info, path, dback, best);
6815                 if (ret)
6816                         goto out;
6817         }
6818
6819         /*
6820          * Ok we messed with the actual refs, which means we need to drop our
6821          * entire cache and go back and rescan.  I know this is a huge pain and
6822          * adds a lot of extra work, but it's the only way to be safe.  Once all
6823          * the backrefs agree we may not need to do anything to the extent
6824          * record itself.
6825          */
6826         ret = -EAGAIN;
6827 out:
6828         while (!list_empty(&entries)) {
6829                 entry = list_entry(entries.next, struct extent_entry, list);
6830                 list_del_init(&entry->list);
6831                 free(entry);
6832         }
6833         return ret;
6834 }
6835
6836 static int process_duplicates(struct btrfs_root *root,
6837                               struct cache_tree *extent_cache,
6838                               struct extent_record *rec)
6839 {
6840         struct extent_record *good, *tmp;
6841         struct cache_extent *cache;
6842         int ret;
6843
6844         /*
6845          * If we found a extent record for this extent then return, or if we
6846          * have more than one duplicate we are likely going to need to delete
6847          * something.
6848          */
6849         if (rec->found_rec || rec->num_duplicates > 1)
6850                 return 0;
6851
6852         /* Shouldn't happen but just in case */
6853         BUG_ON(!rec->num_duplicates);
6854
6855         /*
6856          * So this happens if we end up with a backref that doesn't match the
6857          * actual extent entry.  So either the backref is bad or the extent
6858          * entry is bad.  Either way we want to have the extent_record actually
6859          * reflect what we found in the extent_tree, so we need to take the
6860          * duplicate out and use that as the extent_record since the only way we
6861          * get a duplicate is if we find a real life BTRFS_EXTENT_ITEM_KEY.
6862          */
6863         remove_cache_extent(extent_cache, &rec->cache);
6864
6865         good = list_entry(rec->dups.next, struct extent_record, list);
6866         list_del_init(&good->list);
6867         INIT_LIST_HEAD(&good->backrefs);
6868         INIT_LIST_HEAD(&good->dups);
6869         good->cache.start = good->start;
6870         good->cache.size = good->nr;
6871         good->content_checked = 0;
6872         good->owner_ref_checked = 0;
6873         good->num_duplicates = 0;
6874         good->refs = rec->refs;
6875         list_splice_init(&rec->backrefs, &good->backrefs);
6876         while (1) {
6877                 cache = lookup_cache_extent(extent_cache, good->start,
6878                                             good->nr);
6879                 if (!cache)
6880                         break;
6881                 tmp = container_of(cache, struct extent_record, cache);
6882
6883                 /*
6884                  * If we find another overlapping extent and it's found_rec is
6885                  * set then it's a duplicate and we need to try and delete
6886                  * something.
6887                  */
6888                 if (tmp->found_rec || tmp->num_duplicates > 0) {
6889                         if (list_empty(&good->list))
6890                                 list_add_tail(&good->list,
6891                                               &duplicate_extents);
6892                         good->num_duplicates += tmp->num_duplicates + 1;
6893                         list_splice_init(&tmp->dups, &good->dups);
6894                         list_del_init(&tmp->list);
6895                         list_add_tail(&tmp->list, &good->dups);
6896                         remove_cache_extent(extent_cache, &tmp->cache);
6897                         continue;
6898                 }
6899
6900                 /*
6901                  * Ok we have another non extent item backed extent rec, so lets
6902                  * just add it to this extent and carry on like we did above.
6903                  */
6904                 good->refs += tmp->refs;
6905                 list_splice_init(&tmp->backrefs, &good->backrefs);
6906                 remove_cache_extent(extent_cache, &tmp->cache);
6907                 free(tmp);
6908         }
6909         ret = insert_cache_extent(extent_cache, &good->cache);
6910         BUG_ON(ret);
6911         free(rec);
6912         return good->num_duplicates ? 0 : 1;
6913 }
6914
6915 static int delete_duplicate_records(struct btrfs_root *root,
6916                                     struct extent_record *rec)
6917 {
6918         struct btrfs_trans_handle *trans;
6919         LIST_HEAD(delete_list);
6920         struct btrfs_path *path;
6921         struct extent_record *tmp, *good, *n;
6922         int nr_del = 0;
6923         int ret = 0, err;
6924         struct btrfs_key key;
6925
6926         path = btrfs_alloc_path();
6927         if (!path) {
6928                 ret = -ENOMEM;
6929                 goto out;
6930         }
6931
6932         good = rec;
6933         /* Find the record that covers all of the duplicates. */
6934         list_for_each_entry(tmp, &rec->dups, list) {
6935                 if (good->start < tmp->start)
6936                         continue;
6937                 if (good->nr > tmp->nr)
6938                         continue;
6939
6940                 if (tmp->start + tmp->nr < good->start + good->nr) {
6941                         fprintf(stderr, "Ok we have overlapping extents that "
6942                                 "aren't completely covered by eachother, this "
6943                                 "is going to require more careful thought.  "
6944                                 "The extents are [%Lu-%Lu] and [%Lu-%Lu]\n",
6945                                 tmp->start, tmp->nr, good->start, good->nr);
6946                         abort();
6947                 }
6948                 good = tmp;
6949         }
6950
6951         if (good != rec)
6952                 list_add_tail(&rec->list, &delete_list);
6953
6954         list_for_each_entry_safe(tmp, n, &rec->dups, list) {
6955                 if (tmp == good)
6956                         continue;
6957                 list_move_tail(&tmp->list, &delete_list);
6958         }
6959
6960         root = root->fs_info->extent_root;
6961         trans = btrfs_start_transaction(root, 1);
6962         if (IS_ERR(trans)) {
6963                 ret = PTR_ERR(trans);
6964                 goto out;
6965         }
6966
6967         list_for_each_entry(tmp, &delete_list, list) {
6968                 if (tmp->found_rec == 0)
6969                         continue;
6970                 key.objectid = tmp->start;
6971                 key.type = BTRFS_EXTENT_ITEM_KEY;
6972                 key.offset = tmp->nr;
6973
6974                 /* Shouldn't happen but just in case */
6975                 if (tmp->metadata) {
6976                         fprintf(stderr, "Well this shouldn't happen, extent "
6977                                 "record overlaps but is metadata? "
6978                                 "[%Lu, %Lu]\n", tmp->start, tmp->nr);
6979                         abort();
6980                 }
6981
6982                 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
6983                 if (ret) {
6984                         if (ret > 0)
6985                                 ret = -EINVAL;
6986                         break;
6987                 }
6988                 ret = btrfs_del_item(trans, root, path);
6989                 if (ret)
6990                         break;
6991                 btrfs_release_path(path);
6992                 nr_del++;
6993         }
6994         err = btrfs_commit_transaction(trans, root);
6995         if (err && !ret)
6996                 ret = err;
6997 out:
6998         while (!list_empty(&delete_list)) {
6999                 tmp = list_entry(delete_list.next, struct extent_record, list);
7000                 list_del_init(&tmp->list);
7001                 if (tmp == rec)
7002                         continue;
7003                 free(tmp);
7004         }
7005
7006         while (!list_empty(&rec->dups)) {
7007                 tmp = list_entry(rec->dups.next, struct extent_record, list);
7008                 list_del_init(&tmp->list);
7009                 free(tmp);
7010         }
7011
7012         btrfs_free_path(path);
7013
7014         if (!ret && !nr_del)
7015                 rec->num_duplicates = 0;
7016
7017         return ret ? ret : nr_del;
7018 }
7019
7020 static int find_possible_backrefs(struct btrfs_fs_info *info,
7021                                   struct btrfs_path *path,
7022                                   struct cache_tree *extent_cache,
7023                                   struct extent_record *rec)
7024 {
7025         struct btrfs_root *root;
7026         struct extent_backref *back;
7027         struct data_backref *dback;
7028         struct cache_extent *cache;
7029         struct btrfs_file_extent_item *fi;
7030         struct btrfs_key key;
7031         u64 bytenr, bytes;
7032         int ret;
7033
7034         list_for_each_entry(back, &rec->backrefs, list) {
7035                 /* Don't care about full backrefs (poor unloved backrefs) */
7036                 if (back->full_backref || !back->is_data)
7037                         continue;
7038
7039                 dback = (struct data_backref *)back;
7040
7041                 /* We found this one, we don't need to do a lookup */
7042                 if (dback->found_ref)
7043                         continue;
7044
7045                 key.objectid = dback->root;
7046                 key.type = BTRFS_ROOT_ITEM_KEY;
7047                 key.offset = (u64)-1;
7048
7049                 root = btrfs_read_fs_root(info, &key);
7050
7051                 /* No root, definitely a bad ref, skip */
7052                 if (IS_ERR(root) && PTR_ERR(root) == -ENOENT)
7053                         continue;
7054                 /* Other err, exit */
7055                 if (IS_ERR(root))
7056                         return PTR_ERR(root);
7057
7058                 key.objectid = dback->owner;
7059                 key.type = BTRFS_EXTENT_DATA_KEY;
7060                 key.offset = dback->offset;
7061                 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
7062                 if (ret) {
7063                         btrfs_release_path(path);
7064                         if (ret < 0)
7065                                 return ret;
7066                         /* Didn't find it, we can carry on */
7067                         ret = 0;
7068                         continue;
7069                 }
7070
7071                 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
7072                                     struct btrfs_file_extent_item);
7073                 bytenr = btrfs_file_extent_disk_bytenr(path->nodes[0], fi);
7074                 bytes = btrfs_file_extent_disk_num_bytes(path->nodes[0], fi);
7075                 btrfs_release_path(path);
7076                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
7077                 if (cache) {
7078                         struct extent_record *tmp;
7079                         tmp = container_of(cache, struct extent_record, cache);
7080
7081                         /*
7082                          * If we found an extent record for the bytenr for this
7083                          * particular backref then we can't add it to our
7084                          * current extent record.  We only want to add backrefs
7085                          * that don't have a corresponding extent item in the
7086                          * extent tree since they likely belong to this record
7087                          * and we need to fix it if it doesn't match bytenrs.
7088                          */
7089                         if  (tmp->found_rec)
7090                                 continue;
7091                 }
7092
7093                 dback->found_ref += 1;
7094                 dback->disk_bytenr = bytenr;
7095                 dback->bytes = bytes;
7096
7097                 /*
7098                  * Set this so the verify backref code knows not to trust the
7099                  * values in this backref.
7100                  */
7101                 back->broken = 1;
7102         }
7103
7104         return 0;
7105 }
7106
7107 /*
7108  * Record orphan data ref into corresponding root.
7109  *
7110  * Return 0 if the extent item contains data ref and recorded.
7111  * Return 1 if the extent item contains no useful data ref
7112  *   On that case, it may contains only shared_dataref or metadata backref
7113  *   or the file extent exists(this should be handled by the extent bytenr
7114  *   recovery routine)
7115  * Return <0 if something goes wrong.
7116  */
7117 static int record_orphan_data_extents(struct btrfs_fs_info *fs_info,
7118                                       struct extent_record *rec)
7119 {
7120         struct btrfs_key key;
7121         struct btrfs_root *dest_root;
7122         struct extent_backref *back;
7123         struct data_backref *dback;
7124         struct orphan_data_extent *orphan;
7125         struct btrfs_path *path;
7126         int recorded_data_ref = 0;
7127         int ret = 0;
7128
7129         if (rec->metadata)
7130                 return 1;
7131         path = btrfs_alloc_path();
7132         if (!path)
7133                 return -ENOMEM;
7134         list_for_each_entry(back, &rec->backrefs, list) {
7135                 if (back->full_backref || !back->is_data ||
7136                     !back->found_extent_tree)
7137                         continue;
7138                 dback = (struct data_backref *)back;
7139                 if (dback->found_ref)
7140                         continue;
7141                 key.objectid = dback->root;
7142                 key.type = BTRFS_ROOT_ITEM_KEY;
7143                 key.offset = (u64)-1;
7144
7145                 dest_root = btrfs_read_fs_root(fs_info, &key);
7146
7147                 /* For non-exist root we just skip it */
7148                 if (IS_ERR(dest_root) || !dest_root)
7149                         continue;
7150
7151                 key.objectid = dback->owner;
7152                 key.type = BTRFS_EXTENT_DATA_KEY;
7153                 key.offset = dback->offset;
7154
7155                 ret = btrfs_search_slot(NULL, dest_root, &key, path, 0, 0);
7156                 /*
7157                  * For ret < 0, it's OK since the fs-tree may be corrupted,
7158                  * we need to record it for inode/file extent rebuild.
7159                  * For ret > 0, we record it only for file extent rebuild.
7160                  * For ret == 0, the file extent exists but only bytenr
7161                  * mismatch, let the original bytenr fix routine to handle,
7162                  * don't record it.
7163                  */
7164                 if (ret == 0)
7165                         continue;
7166                 ret = 0;
7167                 orphan = malloc(sizeof(*orphan));
7168                 if (!orphan) {
7169                         ret = -ENOMEM;
7170                         goto out;
7171                 }
7172                 INIT_LIST_HEAD(&orphan->list);
7173                 orphan->root = dback->root;
7174                 orphan->objectid = dback->owner;
7175                 orphan->offset = dback->offset;
7176                 orphan->disk_bytenr = rec->cache.start;
7177                 orphan->disk_len = rec->cache.size;
7178                 list_add(&dest_root->orphan_data_extents, &orphan->list);
7179                 recorded_data_ref = 1;
7180         }
7181 out:
7182         btrfs_free_path(path);
7183         if (!ret)
7184                 return !recorded_data_ref;
7185         else
7186                 return ret;
7187 }
7188
7189 /*
7190  * when an incorrect extent item is found, this will delete
7191  * all of the existing entries for it and recreate them
7192  * based on what the tree scan found.
7193  */
7194 static int fixup_extent_refs(struct btrfs_fs_info *info,
7195                              struct cache_tree *extent_cache,
7196                              struct extent_record *rec)
7197 {
7198         struct btrfs_trans_handle *trans = NULL;
7199         int ret;
7200         struct btrfs_path *path;
7201         struct list_head *cur = rec->backrefs.next;
7202         struct cache_extent *cache;
7203         struct extent_backref *back;
7204         int allocated = 0;
7205         u64 flags = 0;
7206
7207         if (rec->flag_block_full_backref)
7208                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7209
7210         path = btrfs_alloc_path();
7211         if (!path)
7212                 return -ENOMEM;
7213
7214         if (rec->refs != rec->extent_item_refs && !rec->metadata) {
7215                 /*
7216                  * Sometimes the backrefs themselves are so broken they don't
7217                  * get attached to any meaningful rec, so first go back and
7218                  * check any of our backrefs that we couldn't find and throw
7219                  * them into the list if we find the backref so that
7220                  * verify_backrefs can figure out what to do.
7221                  */
7222                 ret = find_possible_backrefs(info, path, extent_cache, rec);
7223                 if (ret < 0)
7224                         goto out;
7225         }
7226
7227         /* step one, make sure all of the backrefs agree */
7228         ret = verify_backrefs(info, path, rec);
7229         if (ret < 0)
7230                 goto out;
7231
7232         trans = btrfs_start_transaction(info->extent_root, 1);
7233         if (IS_ERR(trans)) {
7234                 ret = PTR_ERR(trans);
7235                 goto out;
7236         }
7237
7238         /* step two, delete all the existing records */
7239         ret = delete_extent_records(trans, info->extent_root, path,
7240                                     rec->start, rec->max_size);
7241
7242         if (ret < 0)
7243                 goto out;
7244
7245         /* was this block corrupt?  If so, don't add references to it */
7246         cache = lookup_cache_extent(info->corrupt_blocks,
7247                                     rec->start, rec->max_size);
7248         if (cache) {
7249                 ret = 0;
7250                 goto out;
7251         }
7252
7253         /* step three, recreate all the refs we did find */
7254         while(cur != &rec->backrefs) {
7255                 back = list_entry(cur, struct extent_backref, list);
7256                 cur = cur->next;
7257
7258                 /*
7259                  * if we didn't find any references, don't create a
7260                  * new extent record
7261                  */
7262                 if (!back->found_ref)
7263                         continue;
7264
7265                 rec->bad_full_backref = 0;
7266                 ret = record_extent(trans, info, path, rec, back, allocated, flags);
7267                 allocated = 1;
7268
7269                 if (ret)
7270                         goto out;
7271         }
7272 out:
7273         if (trans) {
7274                 int err = btrfs_commit_transaction(trans, info->extent_root);
7275                 if (!ret)
7276                         ret = err;
7277         }
7278
7279         btrfs_free_path(path);
7280         return ret;
7281 }
7282
7283 static int fixup_extent_flags(struct btrfs_fs_info *fs_info,
7284                               struct extent_record *rec)
7285 {
7286         struct btrfs_trans_handle *trans;
7287         struct btrfs_root *root = fs_info->extent_root;
7288         struct btrfs_path *path;
7289         struct btrfs_extent_item *ei;
7290         struct btrfs_key key;
7291         u64 flags;
7292         int ret = 0;
7293
7294         key.objectid = rec->start;
7295         if (rec->metadata) {
7296                 key.type = BTRFS_METADATA_ITEM_KEY;
7297                 key.offset = rec->info_level;
7298         } else {
7299                 key.type = BTRFS_EXTENT_ITEM_KEY;
7300                 key.offset = rec->max_size;
7301         }
7302
7303         path = btrfs_alloc_path();
7304         if (!path)
7305                 return -ENOMEM;
7306
7307         trans = btrfs_start_transaction(root, 0);
7308         if (IS_ERR(trans)) {
7309                 btrfs_free_path(path);
7310                 return PTR_ERR(trans);
7311         }
7312
7313         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
7314         if (ret < 0) {
7315                 btrfs_free_path(path);
7316                 btrfs_commit_transaction(trans, root);
7317                 return ret;
7318         } else if (ret) {
7319                 fprintf(stderr, "Didn't find extent for %llu\n",
7320                         (unsigned long long)rec->start);
7321                 btrfs_free_path(path);
7322                 btrfs_commit_transaction(trans, root);
7323                 return -ENOENT;
7324         }
7325
7326         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
7327                             struct btrfs_extent_item);
7328         flags = btrfs_extent_flags(path->nodes[0], ei);
7329         if (rec->flag_block_full_backref) {
7330                 fprintf(stderr, "setting full backref on %llu\n",
7331                         (unsigned long long)key.objectid);
7332                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7333         } else {
7334                 fprintf(stderr, "clearing full backref on %llu\n",
7335                         (unsigned long long)key.objectid);
7336                 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
7337         }
7338         btrfs_set_extent_flags(path->nodes[0], ei, flags);
7339         btrfs_mark_buffer_dirty(path->nodes[0]);
7340         btrfs_free_path(path);
7341         return btrfs_commit_transaction(trans, root);
7342 }
7343
7344 /* right now we only prune from the extent allocation tree */
7345 static int prune_one_block(struct btrfs_trans_handle *trans,
7346                            struct btrfs_fs_info *info,
7347                            struct btrfs_corrupt_block *corrupt)
7348 {
7349         int ret;
7350         struct btrfs_path path;
7351         struct extent_buffer *eb;
7352         u64 found;
7353         int slot;
7354         int nritems;
7355         int level = corrupt->level + 1;
7356
7357         btrfs_init_path(&path);
7358 again:
7359         /* we want to stop at the parent to our busted block */
7360         path.lowest_level = level;
7361
7362         ret = btrfs_search_slot(trans, info->extent_root,
7363                                 &corrupt->key, &path, -1, 1);
7364
7365         if (ret < 0)
7366                 goto out;
7367
7368         eb = path.nodes[level];
7369         if (!eb) {
7370                 ret = -ENOENT;
7371                 goto out;
7372         }
7373
7374         /*
7375          * hopefully the search gave us the block we want to prune,
7376          * lets try that first
7377          */
7378         slot = path.slots[level];
7379         found =  btrfs_node_blockptr(eb, slot);
7380         if (found == corrupt->cache.start)
7381                 goto del_ptr;
7382
7383         nritems = btrfs_header_nritems(eb);
7384
7385         /* the search failed, lets scan this node and hope we find it */
7386         for (slot = 0; slot < nritems; slot++) {
7387                 found =  btrfs_node_blockptr(eb, slot);
7388                 if (found == corrupt->cache.start)
7389                         goto del_ptr;
7390         }
7391         /*
7392          * we couldn't find the bad block.  TODO, search all the nodes for pointers
7393          * to this block
7394          */
7395         if (eb == info->extent_root->node) {
7396                 ret = -ENOENT;
7397                 goto out;
7398         } else {
7399                 level++;
7400                 btrfs_release_path(&path);
7401                 goto again;
7402         }
7403
7404 del_ptr:
7405         printk("deleting pointer to block %Lu\n", corrupt->cache.start);
7406         ret = btrfs_del_ptr(trans, info->extent_root, &path, level, slot);
7407
7408 out:
7409         btrfs_release_path(&path);
7410         return ret;
7411 }
7412
7413 static int prune_corrupt_blocks(struct btrfs_fs_info *info)
7414 {
7415         struct btrfs_trans_handle *trans = NULL;
7416         struct cache_extent *cache;
7417         struct btrfs_corrupt_block *corrupt;
7418
7419         while (1) {
7420                 cache = search_cache_extent(info->corrupt_blocks, 0);
7421                 if (!cache)
7422                         break;
7423                 if (!trans) {
7424                         trans = btrfs_start_transaction(info->extent_root, 1);
7425                         if (IS_ERR(trans))
7426                                 return PTR_ERR(trans);
7427                 }
7428                 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
7429                 prune_one_block(trans, info, corrupt);
7430                 remove_cache_extent(info->corrupt_blocks, cache);
7431         }
7432         if (trans)
7433                 return btrfs_commit_transaction(trans, info->extent_root);
7434         return 0;
7435 }
7436
7437 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info)
7438 {
7439         struct btrfs_block_group_cache *cache;
7440         u64 start, end;
7441         int ret;
7442
7443         while (1) {
7444                 ret = find_first_extent_bit(&fs_info->free_space_cache, 0,
7445                                             &start, &end, EXTENT_DIRTY);
7446                 if (ret)
7447                         break;
7448                 clear_extent_dirty(&fs_info->free_space_cache, start, end,
7449                                    GFP_NOFS);
7450         }
7451
7452         start = 0;
7453         while (1) {
7454                 cache = btrfs_lookup_first_block_group(fs_info, start);
7455                 if (!cache)
7456                         break;
7457                 if (cache->cached)
7458                         cache->cached = 0;
7459                 start = cache->key.objectid + cache->key.offset;
7460         }
7461 }
7462
7463 static int check_extent_refs(struct btrfs_root *root,
7464                              struct cache_tree *extent_cache)
7465 {
7466         struct extent_record *rec;
7467         struct cache_extent *cache;
7468         int err = 0;
7469         int ret = 0;
7470         int fixed = 0;
7471         int had_dups = 0;
7472         int recorded = 0;
7473
7474         if (repair) {
7475                 /*
7476                  * if we're doing a repair, we have to make sure
7477                  * we don't allocate from the problem extents.
7478                  * In the worst case, this will be all the
7479                  * extents in the FS
7480                  */
7481                 cache = search_cache_extent(extent_cache, 0);
7482                 while(cache) {
7483                         rec = container_of(cache, struct extent_record, cache);
7484                         set_extent_dirty(root->fs_info->excluded_extents,
7485                                          rec->start,
7486                                          rec->start + rec->max_size - 1,
7487                                          GFP_NOFS);
7488                         cache = next_cache_extent(cache);
7489                 }
7490
7491                 /* pin down all the corrupted blocks too */
7492                 cache = search_cache_extent(root->fs_info->corrupt_blocks, 0);
7493                 while(cache) {
7494                         set_extent_dirty(root->fs_info->excluded_extents,
7495                                          cache->start,
7496                                          cache->start + cache->size - 1,
7497                                          GFP_NOFS);
7498                         cache = next_cache_extent(cache);
7499                 }
7500                 prune_corrupt_blocks(root->fs_info);
7501                 reset_cached_block_groups(root->fs_info);
7502         }
7503
7504         reset_cached_block_groups(root->fs_info);
7505
7506         /*
7507          * We need to delete any duplicate entries we find first otherwise we
7508          * could mess up the extent tree when we have backrefs that actually
7509          * belong to a different extent item and not the weird duplicate one.
7510          */
7511         while (repair && !list_empty(&duplicate_extents)) {
7512                 rec = list_entry(duplicate_extents.next, struct extent_record,
7513                                  list);
7514                 list_del_init(&rec->list);
7515
7516                 /* Sometimes we can find a backref before we find an actual
7517                  * extent, so we need to process it a little bit to see if there
7518                  * truly are multiple EXTENT_ITEM_KEY's for the same range, or
7519                  * if this is a backref screwup.  If we need to delete stuff
7520                  * process_duplicates() will return 0, otherwise it will return
7521                  * 1 and we
7522                  */
7523                 if (process_duplicates(root, extent_cache, rec))
7524                         continue;
7525                 ret = delete_duplicate_records(root, rec);
7526                 if (ret < 0)
7527                         return ret;
7528                 /*
7529                  * delete_duplicate_records will return the number of entries
7530                  * deleted, so if it's greater than 0 then we know we actually
7531                  * did something and we need to remove.
7532                  */
7533                 if (ret)
7534                         had_dups = 1;
7535         }
7536
7537         if (had_dups)
7538                 return -EAGAIN;
7539
7540         while(1) {
7541                 int cur_err = 0;
7542
7543                 fixed = 0;
7544                 recorded = 0;
7545                 cache = search_cache_extent(extent_cache, 0);
7546                 if (!cache)
7547                         break;
7548                 rec = container_of(cache, struct extent_record, cache);
7549                 if (rec->num_duplicates) {
7550                         fprintf(stderr, "extent item %llu has multiple extent "
7551                                 "items\n", (unsigned long long)rec->start);
7552                         err = 1;
7553                         cur_err = 1;
7554                 }
7555
7556                 if (rec->refs != rec->extent_item_refs) {
7557                         fprintf(stderr, "ref mismatch on [%llu %llu] ",
7558                                 (unsigned long long)rec->start,
7559                                 (unsigned long long)rec->nr);
7560                         fprintf(stderr, "extent item %llu, found %llu\n",
7561                                 (unsigned long long)rec->extent_item_refs,
7562                                 (unsigned long long)rec->refs);
7563                         ret = record_orphan_data_extents(root->fs_info, rec);
7564                         if (ret < 0)
7565                                 goto repair_abort;
7566                         if (ret == 0) {
7567                                 recorded = 1;
7568                         } else {
7569                                 /*
7570                                  * we can't use the extent to repair file
7571                                  * extent, let the fallback method handle it.
7572                                  */
7573                                 if (!fixed && repair) {
7574                                         ret = fixup_extent_refs(
7575                                                         root->fs_info,
7576                                                         extent_cache, rec);
7577                                         if (ret)
7578                                                 goto repair_abort;
7579                                         fixed = 1;
7580                                 }
7581                         }
7582                         err = 1;
7583                         cur_err = 1;
7584                 }
7585                 if (all_backpointers_checked(rec, 1)) {
7586                         fprintf(stderr, "backpointer mismatch on [%llu %llu]\n",
7587                                 (unsigned long long)rec->start,
7588                                 (unsigned long long)rec->nr);
7589
7590                         if (!fixed && !recorded && repair) {
7591                                 ret = fixup_extent_refs(root->fs_info,
7592                                                         extent_cache, rec);
7593                                 if (ret)
7594                                         goto repair_abort;
7595                                 fixed = 1;
7596                         }
7597                         cur_err = 1;
7598                         err = 1;
7599                 }
7600                 if (!rec->owner_ref_checked) {
7601                         fprintf(stderr, "owner ref check failed [%llu %llu]\n",
7602                                 (unsigned long long)rec->start,
7603                                 (unsigned long long)rec->nr);
7604                         if (!fixed && !recorded && repair) {
7605                                 ret = fixup_extent_refs(root->fs_info,
7606                                                         extent_cache, rec);
7607                                 if (ret)
7608                                         goto repair_abort;
7609                                 fixed = 1;
7610                         }
7611                         err = 1;
7612                         cur_err = 1;
7613                 }
7614                 if (rec->bad_full_backref) {
7615                         fprintf(stderr, "bad full backref, on [%llu]\n",
7616                                 (unsigned long long)rec->start);
7617                         if (repair) {
7618                                 ret = fixup_extent_flags(root->fs_info, rec);
7619                                 if (ret)
7620                                         goto repair_abort;
7621                                 fixed = 1;
7622                         }
7623                         err = 1;
7624                         cur_err = 1;
7625                 }
7626                 /*
7627                  * Although it's not a extent ref's problem, we reuse this
7628                  * routine for error reporting.
7629                  * No repair function yet.
7630                  */
7631                 if (rec->crossing_stripes) {
7632                         fprintf(stderr,
7633                                 "bad metadata [%llu, %llu) crossing stripe boundary\n",
7634                                 rec->start, rec->start + rec->max_size);
7635                         err = 1;
7636                         cur_err = 1;
7637                 }
7638
7639                 if (rec->wrong_chunk_type) {
7640                         fprintf(stderr,
7641                                 "bad extent [%llu, %llu), type mismatch with chunk\n",
7642                                 rec->start, rec->start + rec->max_size);
7643                         err = 1;
7644                         cur_err = 1;
7645                 }
7646
7647                 remove_cache_extent(extent_cache, cache);
7648                 free_all_extent_backrefs(rec);
7649                 if (!init_extent_tree && repair && (!cur_err || fixed))
7650                         clear_extent_dirty(root->fs_info->excluded_extents,
7651                                            rec->start,
7652                                            rec->start + rec->max_size - 1,
7653                                            GFP_NOFS);
7654                 free(rec);
7655         }
7656 repair_abort:
7657         if (repair) {
7658                 if (ret && ret != -EAGAIN) {
7659                         fprintf(stderr, "failed to repair damaged filesystem, aborting\n");
7660                         exit(1);
7661                 } else if (!ret) {
7662                         struct btrfs_trans_handle *trans;
7663
7664                         root = root->fs_info->extent_root;
7665                         trans = btrfs_start_transaction(root, 1);
7666                         if (IS_ERR(trans)) {
7667                                 ret = PTR_ERR(trans);
7668                                 goto repair_abort;
7669                         }
7670
7671                         btrfs_fix_block_accounting(trans, root);
7672                         ret = btrfs_commit_transaction(trans, root);
7673                         if (ret)
7674                                 goto repair_abort;
7675                 }
7676                 if (err)
7677                         fprintf(stderr, "repaired damaged extent references\n");
7678                 return ret;
7679         }
7680         return err;
7681 }
7682
7683 u64 calc_stripe_length(u64 type, u64 length, int num_stripes)
7684 {
7685         u64 stripe_size;
7686
7687         if (type & BTRFS_BLOCK_GROUP_RAID0) {
7688                 stripe_size = length;
7689                 stripe_size /= num_stripes;
7690         } else if (type & BTRFS_BLOCK_GROUP_RAID10) {
7691                 stripe_size = length * 2;
7692                 stripe_size /= num_stripes;
7693         } else if (type & BTRFS_BLOCK_GROUP_RAID5) {
7694                 stripe_size = length;
7695                 stripe_size /= (num_stripes - 1);
7696         } else if (type & BTRFS_BLOCK_GROUP_RAID6) {
7697                 stripe_size = length;
7698                 stripe_size /= (num_stripes - 2);
7699         } else {
7700                 stripe_size = length;
7701         }
7702         return stripe_size;
7703 }
7704
7705 /*
7706  * Check the chunk with its block group/dev list ref:
7707  * Return 0 if all refs seems valid.
7708  * Return 1 if part of refs seems valid, need later check for rebuild ref
7709  * like missing block group and needs to search extent tree to rebuild them.
7710  * Return -1 if essential refs are missing and unable to rebuild.
7711  */
7712 static int check_chunk_refs(struct chunk_record *chunk_rec,
7713                             struct block_group_tree *block_group_cache,
7714                             struct device_extent_tree *dev_extent_cache,
7715                             int silent)
7716 {
7717         struct cache_extent *block_group_item;
7718         struct block_group_record *block_group_rec;
7719         struct cache_extent *dev_extent_item;
7720         struct device_extent_record *dev_extent_rec;
7721         u64 devid;
7722         u64 offset;
7723         u64 length;
7724         int metadump_v2 = 0;
7725         int i;
7726         int ret = 0;
7727
7728         block_group_item = lookup_cache_extent(&block_group_cache->tree,
7729                                                chunk_rec->offset,
7730                                                chunk_rec->length);
7731         if (block_group_item) {
7732                 block_group_rec = container_of(block_group_item,
7733                                                struct block_group_record,
7734                                                cache);
7735                 if (chunk_rec->length != block_group_rec->offset ||
7736                     chunk_rec->offset != block_group_rec->objectid ||
7737                     (!metadump_v2 &&
7738                      chunk_rec->type_flags != block_group_rec->flags)) {
7739                         if (!silent)
7740                                 fprintf(stderr,
7741                                         "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) mismatch with block group[%llu, %u, %llu]: offset(%llu), objectid(%llu), flags(%llu)\n",
7742                                         chunk_rec->objectid,
7743                                         chunk_rec->type,
7744                                         chunk_rec->offset,
7745                                         chunk_rec->length,
7746                                         chunk_rec->offset,
7747                                         chunk_rec->type_flags,
7748                                         block_group_rec->objectid,
7749                                         block_group_rec->type,
7750                                         block_group_rec->offset,
7751                                         block_group_rec->offset,
7752                                         block_group_rec->objectid,
7753                                         block_group_rec->flags);
7754                         ret = -1;
7755                 } else {
7756                         list_del_init(&block_group_rec->list);
7757                         chunk_rec->bg_rec = block_group_rec;
7758                 }
7759         } else {
7760                 if (!silent)
7761                         fprintf(stderr,
7762                                 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) is not found in block group\n",
7763                                 chunk_rec->objectid,
7764                                 chunk_rec->type,
7765                                 chunk_rec->offset,
7766                                 chunk_rec->length,
7767                                 chunk_rec->offset,
7768                                 chunk_rec->type_flags);
7769                 ret = 1;
7770         }
7771
7772         if (metadump_v2)
7773                 return ret;
7774
7775         length = calc_stripe_length(chunk_rec->type_flags, chunk_rec->length,
7776                                     chunk_rec->num_stripes);
7777         for (i = 0; i < chunk_rec->num_stripes; ++i) {
7778                 devid = chunk_rec->stripes[i].devid;
7779                 offset = chunk_rec->stripes[i].offset;
7780                 dev_extent_item = lookup_cache_extent2(&dev_extent_cache->tree,
7781                                                        devid, offset, length);
7782                 if (dev_extent_item) {
7783                         dev_extent_rec = container_of(dev_extent_item,
7784                                                 struct device_extent_record,
7785                                                 cache);
7786                         if (dev_extent_rec->objectid != devid ||
7787                             dev_extent_rec->offset != offset ||
7788                             dev_extent_rec->chunk_offset != chunk_rec->offset ||
7789                             dev_extent_rec->length != length) {
7790                                 if (!silent)
7791                                         fprintf(stderr,
7792                                                 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] dismatch dev extent[%llu, %llu, %llu]\n",
7793                                                 chunk_rec->objectid,
7794                                                 chunk_rec->type,
7795                                                 chunk_rec->offset,
7796                                                 chunk_rec->stripes[i].devid,
7797                                                 chunk_rec->stripes[i].offset,
7798                                                 dev_extent_rec->objectid,
7799                                                 dev_extent_rec->offset,
7800                                                 dev_extent_rec->length);
7801                                 ret = -1;
7802                         } else {
7803                                 list_move(&dev_extent_rec->chunk_list,
7804                                           &chunk_rec->dextents);
7805                         }
7806                 } else {
7807                         if (!silent)
7808                                 fprintf(stderr,
7809                                         "Chunk[%llu, %u, %llu] stripe[%llu, %llu] is not found in dev extent\n",
7810                                         chunk_rec->objectid,
7811                                         chunk_rec->type,
7812                                         chunk_rec->offset,
7813                                         chunk_rec->stripes[i].devid,
7814                                         chunk_rec->stripes[i].offset);
7815                         ret = -1;
7816                 }
7817         }
7818         return ret;
7819 }
7820
7821 /* check btrfs_chunk -> btrfs_dev_extent / btrfs_block_group_item */
7822 int check_chunks(struct cache_tree *chunk_cache,
7823                  struct block_group_tree *block_group_cache,
7824                  struct device_extent_tree *dev_extent_cache,
7825                  struct list_head *good, struct list_head *bad,
7826                  struct list_head *rebuild, int silent)
7827 {
7828         struct cache_extent *chunk_item;
7829         struct chunk_record *chunk_rec;
7830         struct block_group_record *bg_rec;
7831         struct device_extent_record *dext_rec;
7832         int err;
7833         int ret = 0;
7834
7835         chunk_item = first_cache_extent(chunk_cache);
7836         while (chunk_item) {
7837                 chunk_rec = container_of(chunk_item, struct chunk_record,
7838                                          cache);
7839                 err = check_chunk_refs(chunk_rec, block_group_cache,
7840                                        dev_extent_cache, silent);
7841                 if (err < 0)
7842                         ret = err;
7843                 if (err == 0 && good)
7844                         list_add_tail(&chunk_rec->list, good);
7845                 if (err > 0 && rebuild)
7846                         list_add_tail(&chunk_rec->list, rebuild);
7847                 if (err < 0 && bad)
7848                         list_add_tail(&chunk_rec->list, bad);
7849                 chunk_item = next_cache_extent(chunk_item);
7850         }
7851
7852         list_for_each_entry(bg_rec, &block_group_cache->block_groups, list) {
7853                 if (!silent)
7854                         fprintf(stderr,
7855                                 "Block group[%llu, %llu] (flags = %llu) didn't find the relative chunk.\n",
7856                                 bg_rec->objectid,
7857                                 bg_rec->offset,
7858                                 bg_rec->flags);
7859                 if (!ret)
7860                         ret = 1;
7861         }
7862
7863         list_for_each_entry(dext_rec, &dev_extent_cache->no_chunk_orphans,
7864                             chunk_list) {
7865                 if (!silent)
7866                         fprintf(stderr,
7867                                 "Device extent[%llu, %llu, %llu] didn't find the relative chunk.\n",
7868                                 dext_rec->objectid,
7869                                 dext_rec->offset,
7870                                 dext_rec->length);
7871                 if (!ret)
7872                         ret = 1;
7873         }
7874         return ret;
7875 }
7876
7877
7878 static int check_device_used(struct device_record *dev_rec,
7879                              struct device_extent_tree *dext_cache)
7880 {
7881         struct cache_extent *cache;
7882         struct device_extent_record *dev_extent_rec;
7883         u64 total_byte = 0;
7884
7885         cache = search_cache_extent2(&dext_cache->tree, dev_rec->devid, 0);
7886         while (cache) {
7887                 dev_extent_rec = container_of(cache,
7888                                               struct device_extent_record,
7889                                               cache);
7890                 if (dev_extent_rec->objectid != dev_rec->devid)
7891                         break;
7892
7893                 list_del_init(&dev_extent_rec->device_list);
7894                 total_byte += dev_extent_rec->length;
7895                 cache = next_cache_extent(cache);
7896         }
7897
7898         if (total_byte != dev_rec->byte_used) {
7899                 fprintf(stderr,
7900                         "Dev extent's total-byte(%llu) is not equal to byte-used(%llu) in dev[%llu, %u, %llu]\n",
7901                         total_byte, dev_rec->byte_used, dev_rec->objectid,
7902                         dev_rec->type, dev_rec->offset);
7903                 return -1;
7904         } else {
7905                 return 0;
7906         }
7907 }
7908
7909 /* check btrfs_dev_item -> btrfs_dev_extent */
7910 static int check_devices(struct rb_root *dev_cache,
7911                          struct device_extent_tree *dev_extent_cache)
7912 {
7913         struct rb_node *dev_node;
7914         struct device_record *dev_rec;
7915         struct device_extent_record *dext_rec;
7916         int err;
7917         int ret = 0;
7918
7919         dev_node = rb_first(dev_cache);
7920         while (dev_node) {
7921                 dev_rec = container_of(dev_node, struct device_record, node);
7922                 err = check_device_used(dev_rec, dev_extent_cache);
7923                 if (err)
7924                         ret = err;
7925
7926                 dev_node = rb_next(dev_node);
7927         }
7928         list_for_each_entry(dext_rec, &dev_extent_cache->no_device_orphans,
7929                             device_list) {
7930                 fprintf(stderr,
7931                         "Device extent[%llu, %llu, %llu] didn't find its device.\n",
7932                         dext_rec->objectid, dext_rec->offset, dext_rec->length);
7933                 if (!ret)
7934                         ret = 1;
7935         }
7936         return ret;
7937 }
7938
7939 static int add_root_item_to_list(struct list_head *head,
7940                                   u64 objectid, u64 bytenr, u64 last_snapshot,
7941                                   u8 level, u8 drop_level,
7942                                   int level_size, struct btrfs_key *drop_key)
7943 {
7944
7945         struct root_item_record *ri_rec;
7946         ri_rec = malloc(sizeof(*ri_rec));
7947         if (!ri_rec)
7948                 return -ENOMEM;
7949         ri_rec->bytenr = bytenr;
7950         ri_rec->objectid = objectid;
7951         ri_rec->level = level;
7952         ri_rec->level_size = level_size;
7953         ri_rec->drop_level = drop_level;
7954         ri_rec->last_snapshot = last_snapshot;
7955         if (drop_key)
7956                 memcpy(&ri_rec->drop_key, drop_key, sizeof(*drop_key));
7957         list_add_tail(&ri_rec->list, head);
7958
7959         return 0;
7960 }
7961
7962 static void free_root_item_list(struct list_head *list)
7963 {
7964         struct root_item_record *ri_rec;
7965
7966         while (!list_empty(list)) {
7967                 ri_rec = list_first_entry(list, struct root_item_record,
7968                                           list);
7969                 list_del_init(&ri_rec->list);
7970                 free(ri_rec);
7971         }
7972 }
7973
7974 static int deal_root_from_list(struct list_head *list,
7975                                struct btrfs_root *root,
7976                                struct block_info *bits,
7977                                int bits_nr,
7978                                struct cache_tree *pending,
7979                                struct cache_tree *seen,
7980                                struct cache_tree *reada,
7981                                struct cache_tree *nodes,
7982                                struct cache_tree *extent_cache,
7983                                struct cache_tree *chunk_cache,
7984                                struct rb_root *dev_cache,
7985                                struct block_group_tree *block_group_cache,
7986                                struct device_extent_tree *dev_extent_cache)
7987 {
7988         int ret = 0;
7989         u64 last;
7990
7991         while (!list_empty(list)) {
7992                 struct root_item_record *rec;
7993                 struct extent_buffer *buf;
7994                 rec = list_entry(list->next,
7995                                  struct root_item_record, list);
7996                 last = 0;
7997                 buf = read_tree_block(root->fs_info->tree_root,
7998                                       rec->bytenr, rec->level_size, 0);
7999                 if (!extent_buffer_uptodate(buf)) {
8000                         free_extent_buffer(buf);
8001                         ret = -EIO;
8002                         break;
8003                 }
8004                 add_root_to_pending(buf, extent_cache, pending,
8005                                     seen, nodes, rec->objectid);
8006                 /*
8007                  * To rebuild extent tree, we need deal with snapshot
8008                  * one by one, otherwise we deal with node firstly which
8009                  * can maximize readahead.
8010                  */
8011                 while (1) {
8012                         ret = run_next_block(root, bits, bits_nr, &last,
8013                                              pending, seen, reada, nodes,
8014                                              extent_cache, chunk_cache,
8015                                              dev_cache, block_group_cache,
8016                                              dev_extent_cache, rec);
8017                         if (ret != 0)
8018                                 break;
8019                 }
8020                 free_extent_buffer(buf);
8021                 list_del(&rec->list);
8022                 free(rec);
8023                 if (ret < 0)
8024                         break;
8025         }
8026         while (ret >= 0) {
8027                 ret = run_next_block(root, bits, bits_nr, &last, pending, seen,
8028                                      reada, nodes, extent_cache, chunk_cache,
8029                                      dev_cache, block_group_cache,
8030                                      dev_extent_cache, NULL);
8031                 if (ret != 0) {
8032                         if (ret > 0)
8033                                 ret = 0;
8034                         break;
8035                 }
8036         }
8037         return ret;
8038 }
8039
8040 static int check_chunks_and_extents(struct btrfs_root *root)
8041 {
8042         struct rb_root dev_cache;
8043         struct cache_tree chunk_cache;
8044         struct block_group_tree block_group_cache;
8045         struct device_extent_tree dev_extent_cache;
8046         struct cache_tree extent_cache;
8047         struct cache_tree seen;
8048         struct cache_tree pending;
8049         struct cache_tree reada;
8050         struct cache_tree nodes;
8051         struct extent_io_tree excluded_extents;
8052         struct cache_tree corrupt_blocks;
8053         struct btrfs_path path;
8054         struct btrfs_key key;
8055         struct btrfs_key found_key;
8056         int ret, err = 0;
8057         struct block_info *bits;
8058         int bits_nr;
8059         struct extent_buffer *leaf;
8060         int slot;
8061         struct btrfs_root_item ri;
8062         struct list_head dropping_trees;
8063         struct list_head normal_trees;
8064         struct btrfs_root *root1;
8065         u64 objectid;
8066         u32 level_size;
8067         u8 level;
8068
8069         dev_cache = RB_ROOT;
8070         cache_tree_init(&chunk_cache);
8071         block_group_tree_init(&block_group_cache);
8072         device_extent_tree_init(&dev_extent_cache);
8073
8074         cache_tree_init(&extent_cache);
8075         cache_tree_init(&seen);
8076         cache_tree_init(&pending);
8077         cache_tree_init(&nodes);
8078         cache_tree_init(&reada);
8079         cache_tree_init(&corrupt_blocks);
8080         extent_io_tree_init(&excluded_extents);
8081         INIT_LIST_HEAD(&dropping_trees);
8082         INIT_LIST_HEAD(&normal_trees);
8083
8084         if (repair) {
8085                 root->fs_info->excluded_extents = &excluded_extents;
8086                 root->fs_info->fsck_extent_cache = &extent_cache;
8087                 root->fs_info->free_extent_hook = free_extent_hook;
8088                 root->fs_info->corrupt_blocks = &corrupt_blocks;
8089         }
8090
8091         bits_nr = 1024;
8092         bits = malloc(bits_nr * sizeof(struct block_info));
8093         if (!bits) {
8094                 perror("malloc");
8095                 exit(1);
8096         }
8097
8098         if (ctx.progress_enabled) {
8099                 ctx.tp = TASK_EXTENTS;
8100                 task_start(ctx.info);
8101         }
8102
8103 again:
8104         root1 = root->fs_info->tree_root;
8105         level = btrfs_header_level(root1->node);
8106         ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
8107                                     root1->node->start, 0, level, 0,
8108                                     btrfs_level_size(root1, level), NULL);
8109         if (ret < 0)
8110                 goto out;
8111         root1 = root->fs_info->chunk_root;
8112         level = btrfs_header_level(root1->node);
8113         ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
8114                                     root1->node->start, 0, level, 0,
8115                                     btrfs_level_size(root1, level), NULL);
8116         if (ret < 0)
8117                 goto out;
8118         btrfs_init_path(&path);
8119         key.offset = 0;
8120         key.objectid = 0;
8121         btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY);
8122         ret = btrfs_search_slot(NULL, root->fs_info->tree_root,
8123                                         &key, &path, 0, 0);
8124         if (ret < 0)
8125                 goto out;
8126         while(1) {
8127                 leaf = path.nodes[0];
8128                 slot = path.slots[0];
8129                 if (slot >= btrfs_header_nritems(path.nodes[0])) {
8130                         ret = btrfs_next_leaf(root, &path);
8131                         if (ret != 0)
8132                                 break;
8133                         leaf = path.nodes[0];
8134                         slot = path.slots[0];
8135                 }
8136                 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
8137                 if (btrfs_key_type(&found_key) == BTRFS_ROOT_ITEM_KEY) {
8138                         unsigned long offset;
8139                         u64 last_snapshot;
8140
8141                         offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
8142                         read_extent_buffer(leaf, &ri, offset, sizeof(ri));
8143                         last_snapshot = btrfs_root_last_snapshot(&ri);
8144                         if (btrfs_disk_key_objectid(&ri.drop_progress) == 0) {
8145                                 level = btrfs_root_level(&ri);
8146                                 level_size = btrfs_level_size(root, level);
8147                                 ret = add_root_item_to_list(&normal_trees,
8148                                                 found_key.objectid,
8149                                                 btrfs_root_bytenr(&ri),
8150                                                 last_snapshot, level,
8151                                                 0, level_size, NULL);
8152                                 if (ret < 0)
8153                                         goto out;
8154                         } else {
8155                                 level = btrfs_root_level(&ri);
8156                                 level_size = btrfs_level_size(root, level);
8157                                 objectid = found_key.objectid;
8158                                 btrfs_disk_key_to_cpu(&found_key,
8159                                                       &ri.drop_progress);
8160                                 ret = add_root_item_to_list(&dropping_trees,
8161                                                 objectid,
8162                                                 btrfs_root_bytenr(&ri),
8163                                                 last_snapshot, level,
8164                                                 ri.drop_level,
8165                                                 level_size, &found_key);
8166                                 if (ret < 0)
8167                                         goto out;
8168                         }
8169                 }
8170                 path.slots[0]++;
8171         }
8172         btrfs_release_path(&path);
8173
8174         /*
8175          * check_block can return -EAGAIN if it fixes something, please keep
8176          * this in mind when dealing with return values from these functions, if
8177          * we get -EAGAIN we want to fall through and restart the loop.
8178          */
8179         ret = deal_root_from_list(&normal_trees, root, bits, bits_nr, &pending,
8180                                   &seen, &reada, &nodes, &extent_cache,
8181                                   &chunk_cache, &dev_cache, &block_group_cache,
8182                                   &dev_extent_cache);
8183         if (ret < 0) {
8184                 if (ret == -EAGAIN)
8185                         goto loop;
8186                 goto out;
8187         }
8188         ret = deal_root_from_list(&dropping_trees, root, bits, bits_nr,
8189                                   &pending, &seen, &reada, &nodes,
8190                                   &extent_cache, &chunk_cache, &dev_cache,
8191                                   &block_group_cache, &dev_extent_cache);
8192         if (ret < 0) {
8193                 if (ret == -EAGAIN)
8194                         goto loop;
8195                 goto out;
8196         }
8197
8198         err = check_chunks(&chunk_cache, &block_group_cache,
8199                            &dev_extent_cache, NULL, NULL, NULL, 0);
8200         if (err) {
8201                 if (err == -EAGAIN)
8202                         goto loop;
8203                 if (!ret)
8204                         ret = err;
8205         }
8206
8207         ret = check_extent_refs(root, &extent_cache);
8208         if (ret < 0) {
8209                 if (ret == -EAGAIN)
8210                         goto loop;
8211                 goto out;
8212         }
8213
8214         err = check_devices(&dev_cache, &dev_extent_cache);
8215         if (err && !ret)
8216                 ret = err;
8217
8218 out:
8219         task_stop(ctx.info);
8220         if (repair) {
8221                 free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
8222                 extent_io_tree_cleanup(&excluded_extents);
8223                 root->fs_info->fsck_extent_cache = NULL;
8224                 root->fs_info->free_extent_hook = NULL;
8225                 root->fs_info->corrupt_blocks = NULL;
8226                 root->fs_info->excluded_extents = NULL;
8227         }
8228         free(bits);
8229         free_chunk_cache_tree(&chunk_cache);
8230         free_device_cache_tree(&dev_cache);
8231         free_block_group_tree(&block_group_cache);
8232         free_device_extent_tree(&dev_extent_cache);
8233         free_extent_cache_tree(&seen);
8234         free_extent_cache_tree(&pending);
8235         free_extent_cache_tree(&reada);
8236         free_extent_cache_tree(&nodes);
8237         return ret;
8238 loop:
8239         free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
8240         free_extent_cache_tree(&seen);
8241         free_extent_cache_tree(&pending);
8242         free_extent_cache_tree(&reada);
8243         free_extent_cache_tree(&nodes);
8244         free_chunk_cache_tree(&chunk_cache);
8245         free_block_group_tree(&block_group_cache);
8246         free_device_cache_tree(&dev_cache);
8247         free_device_extent_tree(&dev_extent_cache);
8248         free_extent_record_cache(root->fs_info, &extent_cache);
8249         free_root_item_list(&normal_trees);
8250         free_root_item_list(&dropping_trees);
8251         extent_io_tree_cleanup(&excluded_extents);
8252         goto again;
8253 }
8254
8255 static int btrfs_fsck_reinit_root(struct btrfs_trans_handle *trans,
8256                            struct btrfs_root *root, int overwrite)
8257 {
8258         struct extent_buffer *c;
8259         struct extent_buffer *old = root->node;
8260         int level;
8261         int ret;
8262         struct btrfs_disk_key disk_key = {0,0,0};
8263
8264         level = 0;
8265
8266         if (overwrite) {
8267                 c = old;
8268                 extent_buffer_get(c);
8269                 goto init;
8270         }
8271         c = btrfs_alloc_free_block(trans, root,
8272                                    btrfs_level_size(root, 0),
8273                                    root->root_key.objectid,
8274                                    &disk_key, level, 0, 0);
8275         if (IS_ERR(c)) {
8276                 c = old;
8277                 extent_buffer_get(c);
8278                 overwrite = 1;
8279         }
8280 init:
8281         memset_extent_buffer(c, 0, 0, sizeof(struct btrfs_header));
8282         btrfs_set_header_level(c, level);
8283         btrfs_set_header_bytenr(c, c->start);
8284         btrfs_set_header_generation(c, trans->transid);
8285         btrfs_set_header_backref_rev(c, BTRFS_MIXED_BACKREF_REV);
8286         btrfs_set_header_owner(c, root->root_key.objectid);
8287
8288         write_extent_buffer(c, root->fs_info->fsid,
8289                             btrfs_header_fsid(), BTRFS_FSID_SIZE);
8290
8291         write_extent_buffer(c, root->fs_info->chunk_tree_uuid,
8292                             btrfs_header_chunk_tree_uuid(c),
8293                             BTRFS_UUID_SIZE);
8294
8295         btrfs_mark_buffer_dirty(c);
8296         /*
8297          * this case can happen in the following case:
8298          *
8299          * 1.overwrite previous root.
8300          *
8301          * 2.reinit reloc data root, this is because we skip pin
8302          * down reloc data tree before which means we can allocate
8303          * same block bytenr here.
8304          */
8305         if (old->start == c->start) {
8306                 btrfs_set_root_generation(&root->root_item,
8307                                           trans->transid);
8308                 root->root_item.level = btrfs_header_level(root->node);
8309                 ret = btrfs_update_root(trans, root->fs_info->tree_root,
8310                                         &root->root_key, &root->root_item);
8311                 if (ret) {
8312                         free_extent_buffer(c);
8313                         return ret;
8314                 }
8315         }
8316         free_extent_buffer(old);
8317         root->node = c;
8318         add_root_to_dirty_list(root);
8319         return 0;
8320 }
8321
8322 static int pin_down_tree_blocks(struct btrfs_fs_info *fs_info,
8323                                 struct extent_buffer *eb, int tree_root)
8324 {
8325         struct extent_buffer *tmp;
8326         struct btrfs_root_item *ri;
8327         struct btrfs_key key;
8328         u64 bytenr;
8329         u32 leafsize;
8330         int level = btrfs_header_level(eb);
8331         int nritems;
8332         int ret;
8333         int i;
8334
8335         /*
8336          * If we have pinned this block before, don't pin it again.
8337          * This can not only avoid forever loop with broken filesystem
8338          * but also give us some speedups.
8339          */
8340         if (test_range_bit(&fs_info->pinned_extents, eb->start,
8341                            eb->start + eb->len - 1, EXTENT_DIRTY, 0))
8342                 return 0;
8343
8344         btrfs_pin_extent(fs_info, eb->start, eb->len);
8345
8346         leafsize = btrfs_super_leafsize(fs_info->super_copy);
8347         nritems = btrfs_header_nritems(eb);
8348         for (i = 0; i < nritems; i++) {
8349                 if (level == 0) {
8350                         btrfs_item_key_to_cpu(eb, &key, i);
8351                         if (key.type != BTRFS_ROOT_ITEM_KEY)
8352                                 continue;
8353                         /* Skip the extent root and reloc roots */
8354                         if (key.objectid == BTRFS_EXTENT_TREE_OBJECTID ||
8355                             key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
8356                             key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
8357                                 continue;
8358                         ri = btrfs_item_ptr(eb, i, struct btrfs_root_item);
8359                         bytenr = btrfs_disk_root_bytenr(eb, ri);
8360
8361                         /*
8362                          * If at any point we start needing the real root we
8363                          * will have to build a stump root for the root we are
8364                          * in, but for now this doesn't actually use the root so
8365                          * just pass in extent_root.
8366                          */
8367                         tmp = read_tree_block(fs_info->extent_root, bytenr,
8368                                               leafsize, 0);
8369                         if (!extent_buffer_uptodate(tmp)) {
8370                                 fprintf(stderr, "Error reading root block\n");
8371                                 return -EIO;
8372                         }
8373                         ret = pin_down_tree_blocks(fs_info, tmp, 0);
8374                         free_extent_buffer(tmp);
8375                         if (ret)
8376                                 return ret;
8377                 } else {
8378                         bytenr = btrfs_node_blockptr(eb, i);
8379
8380                         /* If we aren't the tree root don't read the block */
8381                         if (level == 1 && !tree_root) {
8382                                 btrfs_pin_extent(fs_info, bytenr, leafsize);
8383                                 continue;
8384                         }
8385
8386                         tmp = read_tree_block(fs_info->extent_root, bytenr,
8387                                               leafsize, 0);
8388                         if (!extent_buffer_uptodate(tmp)) {
8389                                 fprintf(stderr, "Error reading tree block\n");
8390                                 return -EIO;
8391                         }
8392                         ret = pin_down_tree_blocks(fs_info, tmp, tree_root);
8393                         free_extent_buffer(tmp);
8394                         if (ret)
8395                                 return ret;
8396                 }
8397         }
8398
8399         return 0;
8400 }
8401
8402 static int pin_metadata_blocks(struct btrfs_fs_info *fs_info)
8403 {
8404         int ret;
8405
8406         ret = pin_down_tree_blocks(fs_info, fs_info->chunk_root->node, 0);
8407         if (ret)
8408                 return ret;
8409
8410         return pin_down_tree_blocks(fs_info, fs_info->tree_root->node, 1);
8411 }
8412
8413 static int reset_block_groups(struct btrfs_fs_info *fs_info)
8414 {
8415         struct btrfs_block_group_cache *cache;
8416         struct btrfs_path *path;
8417         struct extent_buffer *leaf;
8418         struct btrfs_chunk *chunk;
8419         struct btrfs_key key;
8420         int ret;
8421         u64 start;
8422
8423         path = btrfs_alloc_path();
8424         if (!path)
8425                 return -ENOMEM;
8426
8427         key.objectid = 0;
8428         key.type = BTRFS_CHUNK_ITEM_KEY;
8429         key.offset = 0;
8430
8431         ret = btrfs_search_slot(NULL, fs_info->chunk_root, &key, path, 0, 0);
8432         if (ret < 0) {
8433                 btrfs_free_path(path);
8434                 return ret;
8435         }
8436
8437         /*
8438          * We do this in case the block groups were screwed up and had alloc
8439          * bits that aren't actually set on the chunks.  This happens with
8440          * restored images every time and could happen in real life I guess.
8441          */
8442         fs_info->avail_data_alloc_bits = 0;
8443         fs_info->avail_metadata_alloc_bits = 0;
8444         fs_info->avail_system_alloc_bits = 0;
8445
8446         /* First we need to create the in-memory block groups */
8447         while (1) {
8448                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
8449                         ret = btrfs_next_leaf(fs_info->chunk_root, path);
8450                         if (ret < 0) {
8451                                 btrfs_free_path(path);
8452                                 return ret;
8453                         }
8454                         if (ret) {
8455                                 ret = 0;
8456                                 break;
8457                         }
8458                 }
8459                 leaf = path->nodes[0];
8460                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
8461                 if (key.type != BTRFS_CHUNK_ITEM_KEY) {
8462                         path->slots[0]++;
8463                         continue;
8464                 }
8465
8466                 chunk = btrfs_item_ptr(leaf, path->slots[0],
8467                                        struct btrfs_chunk);
8468                 btrfs_add_block_group(fs_info, 0,
8469                                       btrfs_chunk_type(leaf, chunk),
8470                                       key.objectid, key.offset,
8471                                       btrfs_chunk_length(leaf, chunk));
8472                 set_extent_dirty(&fs_info->free_space_cache, key.offset,
8473                                  key.offset + btrfs_chunk_length(leaf, chunk),
8474                                  GFP_NOFS);
8475                 path->slots[0]++;
8476         }
8477         start = 0;
8478         while (1) {
8479                 cache = btrfs_lookup_first_block_group(fs_info, start);
8480                 if (!cache)
8481                         break;
8482                 cache->cached = 1;
8483                 start = cache->key.objectid + cache->key.offset;
8484         }
8485
8486         btrfs_free_path(path);
8487         return 0;
8488 }
8489
8490 static int reset_balance(struct btrfs_trans_handle *trans,
8491                          struct btrfs_fs_info *fs_info)
8492 {
8493         struct btrfs_root *root = fs_info->tree_root;
8494         struct btrfs_path *path;
8495         struct extent_buffer *leaf;
8496         struct btrfs_key key;
8497         int del_slot, del_nr = 0;
8498         int ret;
8499         int found = 0;
8500
8501         path = btrfs_alloc_path();
8502         if (!path)
8503                 return -ENOMEM;
8504
8505         key.objectid = BTRFS_BALANCE_OBJECTID;
8506         key.type = BTRFS_BALANCE_ITEM_KEY;
8507         key.offset = 0;
8508
8509         ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
8510         if (ret) {
8511                 if (ret > 0)
8512                         ret = 0;
8513                 if (!ret)
8514                         goto reinit_data_reloc;
8515                 else
8516                         goto out;
8517         }
8518
8519         ret = btrfs_del_item(trans, root, path);
8520         if (ret)
8521                 goto out;
8522         btrfs_release_path(path);
8523
8524         key.objectid = BTRFS_TREE_RELOC_OBJECTID;
8525         key.type = BTRFS_ROOT_ITEM_KEY;
8526         key.offset = 0;
8527
8528         ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
8529         if (ret < 0)
8530                 goto out;
8531         while (1) {
8532                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
8533                         if (!found)
8534                                 break;
8535
8536                         if (del_nr) {
8537                                 ret = btrfs_del_items(trans, root, path,
8538                                                       del_slot, del_nr);
8539                                 del_nr = 0;
8540                                 if (ret)
8541                                         goto out;
8542                         }
8543                         key.offset++;
8544                         btrfs_release_path(path);
8545
8546                         found = 0;
8547                         ret = btrfs_search_slot(trans, root, &key, path,
8548                                                 -1, 1);
8549                         if (ret < 0)
8550                                 goto out;
8551                         continue;
8552                 }
8553                 found = 1;
8554                 leaf = path->nodes[0];
8555                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
8556                 if (key.objectid > BTRFS_TREE_RELOC_OBJECTID)
8557                         break;
8558                 if (key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
8559                         path->slots[0]++;
8560                         continue;
8561                 }
8562                 if (!del_nr) {
8563                         del_slot = path->slots[0];
8564                         del_nr = 1;
8565                 } else {
8566                         del_nr++;
8567                 }
8568                 path->slots[0]++;
8569         }
8570
8571         if (del_nr) {
8572                 ret = btrfs_del_items(trans, root, path, del_slot, del_nr);
8573                 if (ret)
8574                         goto out;
8575         }
8576         btrfs_release_path(path);
8577
8578 reinit_data_reloc:
8579         key.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID;
8580         key.type = BTRFS_ROOT_ITEM_KEY;
8581         key.offset = (u64)-1;
8582         root = btrfs_read_fs_root(fs_info, &key);
8583         if (IS_ERR(root)) {
8584                 fprintf(stderr, "Error reading data reloc tree\n");
8585                 ret = PTR_ERR(root);
8586                 goto out;
8587         }
8588         record_root_in_trans(trans, root);
8589         ret = btrfs_fsck_reinit_root(trans, root, 0);
8590         if (ret)
8591                 goto out;
8592         ret = btrfs_make_root_dir(trans, root, BTRFS_FIRST_FREE_OBJECTID);
8593 out:
8594         btrfs_free_path(path);
8595         return ret;
8596 }
8597
8598 static int reinit_extent_tree(struct btrfs_trans_handle *trans,
8599                               struct btrfs_fs_info *fs_info)
8600 {
8601         u64 start = 0;
8602         int ret;
8603
8604         /*
8605          * The only reason we don't do this is because right now we're just
8606          * walking the trees we find and pinning down their bytes, we don't look
8607          * at any of the leaves.  In order to do mixed groups we'd have to check
8608          * the leaves of any fs roots and pin down the bytes for any file
8609          * extents we find.  Not hard but why do it if we don't have to?
8610          */
8611         if (btrfs_fs_incompat(fs_info, BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS)) {
8612                 fprintf(stderr, "We don't support re-initing the extent tree "
8613                         "for mixed block groups yet, please notify a btrfs "
8614                         "developer you want to do this so they can add this "
8615                         "functionality.\n");
8616                 return -EINVAL;
8617         }
8618
8619         /*
8620          * first we need to walk all of the trees except the extent tree and pin
8621          * down the bytes that are in use so we don't overwrite any existing
8622          * metadata.
8623          */
8624         ret = pin_metadata_blocks(fs_info);
8625         if (ret) {
8626                 fprintf(stderr, "error pinning down used bytes\n");
8627                 return ret;
8628         }
8629
8630         /*
8631          * Need to drop all the block groups since we're going to recreate all
8632          * of them again.
8633          */
8634         btrfs_free_block_groups(fs_info);
8635         ret = reset_block_groups(fs_info);
8636         if (ret) {
8637                 fprintf(stderr, "error resetting the block groups\n");
8638                 return ret;
8639         }
8640
8641         /* Ok we can allocate now, reinit the extent root */
8642         ret = btrfs_fsck_reinit_root(trans, fs_info->extent_root, 0);
8643         if (ret) {
8644                 fprintf(stderr, "extent root initialization failed\n");
8645                 /*
8646                  * When the transaction code is updated we should end the
8647                  * transaction, but for now progs only knows about commit so
8648                  * just return an error.
8649                  */
8650                 return ret;
8651         }
8652
8653         /*
8654          * Now we have all the in-memory block groups setup so we can make
8655          * allocations properly, and the metadata we care about is safe since we
8656          * pinned all of it above.
8657          */
8658         while (1) {
8659                 struct btrfs_block_group_cache *cache;
8660
8661                 cache = btrfs_lookup_first_block_group(fs_info, start);
8662                 if (!cache)
8663                         break;
8664                 start = cache->key.objectid + cache->key.offset;
8665                 ret = btrfs_insert_item(trans, fs_info->extent_root,
8666                                         &cache->key, &cache->item,
8667                                         sizeof(cache->item));
8668                 if (ret) {
8669                         fprintf(stderr, "Error adding block group\n");
8670                         return ret;
8671                 }
8672                 btrfs_extent_post_op(trans, fs_info->extent_root);
8673         }
8674
8675         ret = reset_balance(trans, fs_info);
8676         if (ret)
8677                 fprintf(stderr, "error reseting the pending balance\n");
8678
8679         return ret;
8680 }
8681
8682 static int recow_extent_buffer(struct btrfs_root *root, struct extent_buffer *eb)
8683 {
8684         struct btrfs_path *path;
8685         struct btrfs_trans_handle *trans;
8686         struct btrfs_key key;
8687         int ret;
8688
8689         printf("Recowing metadata block %llu\n", eb->start);
8690         key.objectid = btrfs_header_owner(eb);
8691         key.type = BTRFS_ROOT_ITEM_KEY;
8692         key.offset = (u64)-1;
8693
8694         root = btrfs_read_fs_root(root->fs_info, &key);
8695         if (IS_ERR(root)) {
8696                 fprintf(stderr, "Couldn't find owner root %llu\n",
8697                         key.objectid);
8698                 return PTR_ERR(root);
8699         }
8700
8701         path = btrfs_alloc_path();
8702         if (!path)
8703                 return -ENOMEM;
8704
8705         trans = btrfs_start_transaction(root, 1);
8706         if (IS_ERR(trans)) {
8707                 btrfs_free_path(path);
8708                 return PTR_ERR(trans);
8709         }
8710
8711         path->lowest_level = btrfs_header_level(eb);
8712         if (path->lowest_level)
8713                 btrfs_node_key_to_cpu(eb, &key, 0);
8714         else
8715                 btrfs_item_key_to_cpu(eb, &key, 0);
8716
8717         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
8718         btrfs_commit_transaction(trans, root);
8719         btrfs_free_path(path);
8720         return ret;
8721 }
8722
8723 static int delete_bad_item(struct btrfs_root *root, struct bad_item *bad)
8724 {
8725         struct btrfs_path *path;
8726         struct btrfs_trans_handle *trans;
8727         struct btrfs_key key;
8728         int ret;
8729
8730         printf("Deleting bad item [%llu,%u,%llu]\n", bad->key.objectid,
8731                bad->key.type, bad->key.offset);
8732         key.objectid = bad->root_id;
8733         key.type = BTRFS_ROOT_ITEM_KEY;
8734         key.offset = (u64)-1;
8735
8736         root = btrfs_read_fs_root(root->fs_info, &key);
8737         if (IS_ERR(root)) {
8738                 fprintf(stderr, "Couldn't find owner root %llu\n",
8739                         key.objectid);
8740                 return PTR_ERR(root);
8741         }
8742
8743         path = btrfs_alloc_path();
8744         if (!path)
8745                 return -ENOMEM;
8746
8747         trans = btrfs_start_transaction(root, 1);
8748         if (IS_ERR(trans)) {
8749                 btrfs_free_path(path);
8750                 return PTR_ERR(trans);
8751         }
8752
8753         ret = btrfs_search_slot(trans, root, &bad->key, path, -1, 1);
8754         if (ret) {
8755                 if (ret > 0)
8756                         ret = 0;
8757                 goto out;
8758         }
8759         ret = btrfs_del_item(trans, root, path);
8760 out:
8761         btrfs_commit_transaction(trans, root);
8762         btrfs_free_path(path);
8763         return ret;
8764 }
8765
8766 static int zero_log_tree(struct btrfs_root *root)
8767 {
8768         struct btrfs_trans_handle *trans;
8769         int ret;
8770
8771         trans = btrfs_start_transaction(root, 1);
8772         if (IS_ERR(trans)) {
8773                 ret = PTR_ERR(trans);
8774                 return ret;
8775         }
8776         btrfs_set_super_log_root(root->fs_info->super_copy, 0);
8777         btrfs_set_super_log_root_level(root->fs_info->super_copy, 0);
8778         ret = btrfs_commit_transaction(trans, root);
8779         return ret;
8780 }
8781
8782 static int populate_csum(struct btrfs_trans_handle *trans,
8783                          struct btrfs_root *csum_root, char *buf, u64 start,
8784                          u64 len)
8785 {
8786         u64 offset = 0;
8787         u64 sectorsize;
8788         int ret = 0;
8789
8790         while (offset < len) {
8791                 sectorsize = csum_root->sectorsize;
8792                 ret = read_extent_data(csum_root, buf, start + offset,
8793                                        &sectorsize, 0);
8794                 if (ret)
8795                         break;
8796                 ret = btrfs_csum_file_block(trans, csum_root, start + len,
8797                                             start + offset, buf, sectorsize);
8798                 if (ret)
8799                         break;
8800                 offset += sectorsize;
8801         }
8802         return ret;
8803 }
8804
8805 static int fill_csum_tree_from_one_fs_root(struct btrfs_trans_handle *trans,
8806                                       struct btrfs_root *csum_root,
8807                                       struct btrfs_root *cur_root)
8808 {
8809         struct btrfs_path *path;
8810         struct btrfs_key key;
8811         struct extent_buffer *node;
8812         struct btrfs_file_extent_item *fi;
8813         char *buf = NULL;
8814         u64 start = 0;
8815         u64 len = 0;
8816         int slot = 0;
8817         int ret = 0;
8818
8819         path = btrfs_alloc_path();
8820         if (!path)
8821                 return -ENOMEM;
8822         buf = malloc(cur_root->fs_info->csum_root->sectorsize);
8823         if (!buf) {
8824                 ret = -ENOMEM;
8825                 goto out;
8826         }
8827
8828         key.objectid = 0;
8829         key.offset = 0;
8830         key.type = 0;
8831
8832         ret = btrfs_search_slot(NULL, cur_root, &key, path, 0, 0);
8833         if (ret < 0)
8834                 goto out;
8835         /* Iterate all regular file extents and fill its csum */
8836         while (1) {
8837                 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
8838
8839                 if (key.type != BTRFS_EXTENT_DATA_KEY)
8840                         goto next;
8841                 node = path->nodes[0];
8842                 slot = path->slots[0];
8843                 fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
8844                 if (btrfs_file_extent_type(node, fi) != BTRFS_FILE_EXTENT_REG)
8845                         goto next;
8846                 start = btrfs_file_extent_disk_bytenr(node, fi);
8847                 len = btrfs_file_extent_disk_num_bytes(node, fi);
8848
8849                 ret = populate_csum(trans, csum_root, buf, start, len);
8850                 if (ret == -EEXIST)
8851                         ret = 0;
8852                 if (ret < 0)
8853                         goto out;
8854 next:
8855                 /*
8856                  * TODO: if next leaf is corrupted, jump to nearest next valid
8857                  * leaf.
8858                  */
8859                 ret = btrfs_next_item(cur_root, path);
8860                 if (ret < 0)
8861                         goto out;
8862                 if (ret > 0) {
8863                         ret = 0;
8864                         goto out;
8865                 }
8866         }
8867
8868 out:
8869         btrfs_free_path(path);
8870         free(buf);
8871         return ret;
8872 }
8873
8874 static int fill_csum_tree_from_fs(struct btrfs_trans_handle *trans,
8875                                   struct btrfs_root *csum_root)
8876 {
8877         struct btrfs_fs_info *fs_info = csum_root->fs_info;
8878         struct btrfs_path *path;
8879         struct btrfs_root *tree_root = fs_info->tree_root;
8880         struct btrfs_root *cur_root;
8881         struct extent_buffer *node;
8882         struct btrfs_key key;
8883         int slot = 0;
8884         int ret = 0;
8885
8886         path = btrfs_alloc_path();
8887         if (!path)
8888                 return -ENOMEM;
8889
8890         key.objectid = BTRFS_FS_TREE_OBJECTID;
8891         key.offset = 0;
8892         key.type = BTRFS_ROOT_ITEM_KEY;
8893
8894         ret = btrfs_search_slot(NULL, tree_root, &key, path, 0, 0);
8895         if (ret < 0)
8896                 goto out;
8897         if (ret > 0) {
8898                 ret = -ENOENT;
8899                 goto out;
8900         }
8901
8902         while (1) {
8903                 node = path->nodes[0];
8904                 slot = path->slots[0];
8905                 btrfs_item_key_to_cpu(node, &key, slot);
8906                 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
8907                         goto out;
8908                 if (key.type != BTRFS_ROOT_ITEM_KEY)
8909                         goto next;
8910                 if (!is_fstree(key.objectid))
8911                         goto next;
8912                 key.offset = (u64)-1;
8913
8914                 cur_root = btrfs_read_fs_root(fs_info, &key);
8915                 if (IS_ERR(cur_root) || !cur_root) {
8916                         fprintf(stderr, "Fail to read fs/subvol tree: %lld\n",
8917                                 key.objectid);
8918                         goto out;
8919                 }
8920                 ret = fill_csum_tree_from_one_fs_root(trans, csum_root,
8921                                 cur_root);
8922                 if (ret < 0)
8923                         goto out;
8924 next:
8925                 ret = btrfs_next_item(tree_root, path);
8926                 if (ret > 0) {
8927                         ret = 0;
8928                         goto out;
8929                 }
8930                 if (ret < 0)
8931                         goto out;
8932         }
8933
8934 out:
8935         btrfs_free_path(path);
8936         return ret;
8937 }
8938
8939 static int fill_csum_tree_from_extent(struct btrfs_trans_handle *trans,
8940                                       struct btrfs_root *csum_root)
8941 {
8942         struct btrfs_root *extent_root = csum_root->fs_info->extent_root;
8943         struct btrfs_path *path;
8944         struct btrfs_extent_item *ei;
8945         struct extent_buffer *leaf;
8946         char *buf;
8947         struct btrfs_key key;
8948         int ret;
8949
8950         path = btrfs_alloc_path();
8951         if (!path)
8952                 return -ENOMEM;
8953
8954         key.objectid = 0;
8955         key.type = BTRFS_EXTENT_ITEM_KEY;
8956         key.offset = 0;
8957
8958         ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
8959         if (ret < 0) {
8960                 btrfs_free_path(path);
8961                 return ret;
8962         }
8963
8964         buf = malloc(csum_root->sectorsize);
8965         if (!buf) {
8966                 btrfs_free_path(path);
8967                 return -ENOMEM;
8968         }
8969
8970         while (1) {
8971                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
8972                         ret = btrfs_next_leaf(extent_root, path);
8973                         if (ret < 0)
8974                                 break;
8975                         if (ret) {
8976                                 ret = 0;
8977                                 break;
8978                         }
8979                 }
8980                 leaf = path->nodes[0];
8981
8982                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
8983                 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
8984                         path->slots[0]++;
8985                         continue;
8986                 }
8987
8988                 ei = btrfs_item_ptr(leaf, path->slots[0],
8989                                     struct btrfs_extent_item);
8990                 if (!(btrfs_extent_flags(leaf, ei) &
8991                       BTRFS_EXTENT_FLAG_DATA)) {
8992                         path->slots[0]++;
8993                         continue;
8994                 }
8995
8996                 ret = populate_csum(trans, csum_root, buf, key.objectid,
8997                                     key.offset);
8998                 if (ret)
8999                         break;
9000                 path->slots[0]++;
9001         }
9002
9003         btrfs_free_path(path);
9004         free(buf);
9005         return ret;
9006 }
9007
9008 /*
9009  * Recalculate the csum and put it into the csum tree.
9010  *
9011  * Extent tree init will wipe out all the extent info, so in that case, we
9012  * can't depend on extent tree, but use fs tree.  If search_fs_tree is set, we
9013  * will use fs/subvol trees to init the csum tree.
9014  */
9015 static int fill_csum_tree(struct btrfs_trans_handle *trans,
9016                           struct btrfs_root *csum_root,
9017                           int search_fs_tree)
9018 {
9019         if (search_fs_tree)
9020                 return fill_csum_tree_from_fs(trans, csum_root);
9021         else
9022                 return fill_csum_tree_from_extent(trans, csum_root);
9023 }
9024
9025 struct root_item_info {
9026         /* level of the root */
9027         u8 level;
9028         /* number of nodes at this level, must be 1 for a root */
9029         int node_count;
9030         u64 bytenr;
9031         u64 gen;
9032         struct cache_extent cache_extent;
9033 };
9034
9035 static struct cache_tree *roots_info_cache = NULL;
9036
9037 static void free_roots_info_cache(void)
9038 {
9039         if (!roots_info_cache)
9040                 return;
9041
9042         while (!cache_tree_empty(roots_info_cache)) {
9043                 struct cache_extent *entry;
9044                 struct root_item_info *rii;
9045
9046                 entry = first_cache_extent(roots_info_cache);
9047                 if (!entry)
9048                         break;
9049                 remove_cache_extent(roots_info_cache, entry);
9050                 rii = container_of(entry, struct root_item_info, cache_extent);
9051                 free(rii);
9052         }
9053
9054         free(roots_info_cache);
9055         roots_info_cache = NULL;
9056 }
9057
9058 static int build_roots_info_cache(struct btrfs_fs_info *info)
9059 {
9060         int ret = 0;
9061         struct btrfs_key key;
9062         struct extent_buffer *leaf;
9063         struct btrfs_path *path;
9064
9065         if (!roots_info_cache) {
9066                 roots_info_cache = malloc(sizeof(*roots_info_cache));
9067                 if (!roots_info_cache)
9068                         return -ENOMEM;
9069                 cache_tree_init(roots_info_cache);
9070         }
9071
9072         path = btrfs_alloc_path();
9073         if (!path)
9074                 return -ENOMEM;
9075
9076         key.objectid = 0;
9077         key.type = BTRFS_EXTENT_ITEM_KEY;
9078         key.offset = 0;
9079
9080         ret = btrfs_search_slot(NULL, info->extent_root, &key, path, 0, 0);
9081         if (ret < 0)
9082                 goto out;
9083         leaf = path->nodes[0];
9084
9085         while (1) {
9086                 struct btrfs_key found_key;
9087                 struct btrfs_extent_item *ei;
9088                 struct btrfs_extent_inline_ref *iref;
9089                 int slot = path->slots[0];
9090                 int type;
9091                 u64 flags;
9092                 u64 root_id;
9093                 u8 level;
9094                 struct cache_extent *entry;
9095                 struct root_item_info *rii;
9096
9097                 if (slot >= btrfs_header_nritems(leaf)) {
9098                         ret = btrfs_next_leaf(info->extent_root, path);
9099                         if (ret < 0) {
9100                                 break;
9101                         } else if (ret) {
9102                                 ret = 0;
9103                                 break;
9104                         }
9105                         leaf = path->nodes[0];
9106                         slot = path->slots[0];
9107                 }
9108
9109                 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
9110
9111                 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
9112                     found_key.type != BTRFS_METADATA_ITEM_KEY)
9113                         goto next;
9114
9115                 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
9116                 flags = btrfs_extent_flags(leaf, ei);
9117
9118                 if (found_key.type == BTRFS_EXTENT_ITEM_KEY &&
9119                     !(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK))
9120                         goto next;
9121
9122                 if (found_key.type == BTRFS_METADATA_ITEM_KEY) {
9123                         iref = (struct btrfs_extent_inline_ref *)(ei + 1);
9124                         level = found_key.offset;
9125                 } else {
9126                         struct btrfs_tree_block_info *info;
9127
9128                         info = (struct btrfs_tree_block_info *)(ei + 1);
9129                         iref = (struct btrfs_extent_inline_ref *)(info + 1);
9130                         level = btrfs_tree_block_level(leaf, info);
9131                 }
9132
9133                 /*
9134                  * For a root extent, it must be of the following type and the
9135                  * first (and only one) iref in the item.
9136                  */
9137                 type = btrfs_extent_inline_ref_type(leaf, iref);
9138                 if (type != BTRFS_TREE_BLOCK_REF_KEY)
9139                         goto next;
9140
9141                 root_id = btrfs_extent_inline_ref_offset(leaf, iref);
9142                 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
9143                 if (!entry) {
9144                         rii = malloc(sizeof(struct root_item_info));
9145                         if (!rii) {
9146                                 ret = -ENOMEM;
9147                                 goto out;
9148                         }
9149                         rii->cache_extent.start = root_id;
9150                         rii->cache_extent.size = 1;
9151                         rii->level = (u8)-1;
9152                         entry = &rii->cache_extent;
9153                         ret = insert_cache_extent(roots_info_cache, entry);
9154                         ASSERT(ret == 0);
9155                 } else {
9156                         rii = container_of(entry, struct root_item_info,
9157                                            cache_extent);
9158                 }
9159
9160                 ASSERT(rii->cache_extent.start == root_id);
9161                 ASSERT(rii->cache_extent.size == 1);
9162
9163                 if (level > rii->level || rii->level == (u8)-1) {
9164                         rii->level = level;
9165                         rii->bytenr = found_key.objectid;
9166                         rii->gen = btrfs_extent_generation(leaf, ei);
9167                         rii->node_count = 1;
9168                 } else if (level == rii->level) {
9169                         rii->node_count++;
9170                 }
9171 next:
9172                 path->slots[0]++;
9173         }
9174
9175 out:
9176         btrfs_free_path(path);
9177
9178         return ret;
9179 }
9180
9181 static int maybe_repair_root_item(struct btrfs_fs_info *info,
9182                                   struct btrfs_path *path,
9183                                   const struct btrfs_key *root_key,
9184                                   const int read_only_mode)
9185 {
9186         const u64 root_id = root_key->objectid;
9187         struct cache_extent *entry;
9188         struct root_item_info *rii;
9189         struct btrfs_root_item ri;
9190         unsigned long offset;
9191
9192         entry = lookup_cache_extent(roots_info_cache, root_id, 1);
9193         if (!entry) {
9194                 fprintf(stderr,
9195                         "Error: could not find extent items for root %llu\n",
9196                         root_key->objectid);
9197                 return -ENOENT;
9198         }
9199
9200         rii = container_of(entry, struct root_item_info, cache_extent);
9201         ASSERT(rii->cache_extent.start == root_id);
9202         ASSERT(rii->cache_extent.size == 1);
9203
9204         if (rii->node_count != 1) {
9205                 fprintf(stderr,
9206                         "Error: could not find btree root extent for root %llu\n",
9207                         root_id);
9208                 return -ENOENT;
9209         }
9210
9211         offset = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]);
9212         read_extent_buffer(path->nodes[0], &ri, offset, sizeof(ri));
9213
9214         if (btrfs_root_bytenr(&ri) != rii->bytenr ||
9215             btrfs_root_level(&ri) != rii->level ||
9216             btrfs_root_generation(&ri) != rii->gen) {
9217
9218                 /*
9219                  * If we're in repair mode but our caller told us to not update
9220                  * the root item, i.e. just check if it needs to be updated, don't
9221                  * print this message, since the caller will call us again shortly
9222                  * for the same root item without read only mode (the caller will
9223                  * open a transaction first).
9224                  */
9225                 if (!(read_only_mode && repair))
9226                         fprintf(stderr,
9227                                 "%sroot item for root %llu,"
9228                                 " current bytenr %llu, current gen %llu, current level %u,"
9229                                 " new bytenr %llu, new gen %llu, new level %u\n",
9230                                 (read_only_mode ? "" : "fixing "),
9231                                 root_id,
9232                                 btrfs_root_bytenr(&ri), btrfs_root_generation(&ri),
9233                                 btrfs_root_level(&ri),
9234                                 rii->bytenr, rii->gen, rii->level);
9235
9236                 if (btrfs_root_generation(&ri) > rii->gen) {
9237                         fprintf(stderr,
9238                                 "root %llu has a root item with a more recent gen (%llu) compared to the found root node (%llu)\n",
9239                                 root_id, btrfs_root_generation(&ri), rii->gen);
9240                         return -EINVAL;
9241                 }
9242
9243                 if (!read_only_mode) {
9244                         btrfs_set_root_bytenr(&ri, rii->bytenr);
9245                         btrfs_set_root_level(&ri, rii->level);
9246                         btrfs_set_root_generation(&ri, rii->gen);
9247                         write_extent_buffer(path->nodes[0], &ri,
9248                                             offset, sizeof(ri));
9249                 }
9250
9251                 return 1;
9252         }
9253
9254         return 0;
9255 }
9256
9257 /*
9258  * A regression introduced in the 3.17 kernel (more specifically in 3.17-rc2),
9259  * caused read-only snapshots to be corrupted if they were created at a moment
9260  * when the source subvolume/snapshot had orphan items. The issue was that the
9261  * on-disk root items became incorrect, referring to the pre orphan cleanup root
9262  * node instead of the post orphan cleanup root node.
9263  * So this function, and its callees, just detects and fixes those cases. Even
9264  * though the regression was for read-only snapshots, this function applies to
9265  * any snapshot/subvolume root.
9266  * This must be run before any other repair code - not doing it so, makes other
9267  * repair code delete or modify backrefs in the extent tree for example, which
9268  * will result in an inconsistent fs after repairing the root items.
9269  */
9270 static int repair_root_items(struct btrfs_fs_info *info)
9271 {
9272         struct btrfs_path *path = NULL;
9273         struct btrfs_key key;
9274         struct extent_buffer *leaf;
9275         struct btrfs_trans_handle *trans = NULL;
9276         int ret = 0;
9277         int bad_roots = 0;
9278         int need_trans = 0;
9279
9280         ret = build_roots_info_cache(info);
9281         if (ret)
9282                 goto out;
9283
9284         path = btrfs_alloc_path();
9285         if (!path) {
9286                 ret = -ENOMEM;
9287                 goto out;
9288         }
9289
9290         key.objectid = BTRFS_FIRST_FREE_OBJECTID;
9291         key.type = BTRFS_ROOT_ITEM_KEY;
9292         key.offset = 0;
9293
9294 again:
9295         /*
9296          * Avoid opening and committing transactions if a leaf doesn't have
9297          * any root items that need to be fixed, so that we avoid rotating
9298          * backup roots unnecessarily.
9299          */
9300         if (need_trans) {
9301                 trans = btrfs_start_transaction(info->tree_root, 1);
9302                 if (IS_ERR(trans)) {
9303                         ret = PTR_ERR(trans);
9304                         goto out;
9305                 }
9306         }
9307
9308         ret = btrfs_search_slot(trans, info->tree_root, &key, path,
9309                                 0, trans ? 1 : 0);
9310         if (ret < 0)
9311                 goto out;
9312         leaf = path->nodes[0];
9313
9314         while (1) {
9315                 struct btrfs_key found_key;
9316
9317                 if (path->slots[0] >= btrfs_header_nritems(leaf)) {
9318                         int no_more_keys = find_next_key(path, &key);
9319
9320                         btrfs_release_path(path);
9321                         if (trans) {
9322                                 ret = btrfs_commit_transaction(trans,
9323                                                                info->tree_root);
9324                                 trans = NULL;
9325                                 if (ret < 0)
9326                                         goto out;
9327                         }
9328                         need_trans = 0;
9329                         if (no_more_keys)
9330                                 break;
9331                         goto again;
9332                 }
9333
9334                 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
9335
9336                 if (found_key.type != BTRFS_ROOT_ITEM_KEY)
9337                         goto next;
9338                 if (found_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
9339                         goto next;
9340
9341                 ret = maybe_repair_root_item(info, path, &found_key,
9342                                              trans ? 0 : 1);
9343                 if (ret < 0)
9344                         goto out;
9345                 if (ret) {
9346                         if (!trans && repair) {
9347                                 need_trans = 1;
9348                                 key = found_key;
9349                                 btrfs_release_path(path);
9350                                 goto again;
9351                         }
9352                         bad_roots++;
9353                 }
9354 next:
9355                 path->slots[0]++;
9356         }
9357         ret = 0;
9358 out:
9359         free_roots_info_cache();
9360         btrfs_free_path(path);
9361         if (trans)
9362                 btrfs_commit_transaction(trans, info->tree_root);
9363         if (ret < 0)
9364                 return ret;
9365
9366         return bad_roots;
9367 }
9368
9369 const char * const cmd_check_usage[] = {
9370         "btrfs check [options] <device>",
9371         "Check structural inegrity of a filesystem (unmounted).",
9372         "Check structural inegrity of an unmounted filesystem. Verify internal",
9373         "trees' consistency and item connectivity. In the repair mode try to",
9374         "fix the problems found.",
9375         "WARNING: the repair mode is considered dangerous",
9376         "",
9377         "-s|--super <superblock>     use this superblock copy",
9378         "-b|--backup                 use the backup root copy",
9379         "--repair                    try to repair the filesystem",
9380         "--readonly                  run in read-only mode (default)",
9381         "--init-csum-tree            create a new CRC tree",
9382         "--init-extent-tree          create a new extent tree",
9383         "--check-data-csum           verify checkums of data blocks",
9384         "-Q|--qgroup-report           print a report on qgroup consistency",
9385         "-E|--subvol-extents <subvolid>",
9386         "                            print subvolume extents and sharing state",
9387         "-r|--tree-root <bytenr>     use the given bytenr for the tree root",
9388         "-p|--progress               indicate progress",
9389         NULL
9390 };
9391
9392 int cmd_check(int argc, char **argv)
9393 {
9394         struct cache_tree root_cache;
9395         struct btrfs_root *root;
9396         struct btrfs_fs_info *info;
9397         u64 bytenr = 0;
9398         u64 subvolid = 0;
9399         u64 tree_root_bytenr = 0;
9400         char uuidbuf[BTRFS_UUID_UNPARSED_SIZE];
9401         int ret;
9402         u64 num;
9403         int init_csum_tree = 0;
9404         int readonly = 0;
9405         int qgroup_report = 0;
9406         enum btrfs_open_ctree_flags ctree_flags = OPEN_CTREE_EXCLUSIVE;
9407
9408         while(1) {
9409                 int c;
9410                 enum { OPT_REPAIR = 257, OPT_INIT_CSUM, OPT_INIT_EXTENT,
9411                         OPT_CHECK_CSUM, OPT_READONLY };
9412                 static const struct option long_options[] = {
9413                         { "super", required_argument, NULL, 's' },
9414                         { "repair", no_argument, NULL, OPT_REPAIR },
9415                         { "readonly", no_argument, NULL, OPT_READONLY },
9416                         { "init-csum-tree", no_argument, NULL, OPT_INIT_CSUM },
9417                         { "init-extent-tree", no_argument, NULL, OPT_INIT_EXTENT },
9418                         { "check-data-csum", no_argument, NULL, OPT_CHECK_CSUM },
9419                         { "backup", no_argument, NULL, 'b' },
9420                         { "subvol-extents", required_argument, NULL, 'E' },
9421                         { "qgroup-report", no_argument, NULL, 'Q' },
9422                         { "tree-root", required_argument, NULL, 'r' },
9423                         { "progress", no_argument, NULL, 'p' },
9424                         { NULL, 0, NULL, 0}
9425                 };
9426
9427                 c = getopt_long(argc, argv, "as:br:p", long_options, NULL);
9428                 if (c < 0)
9429                         break;
9430                 switch(c) {
9431                         case 'a': /* ignored */ break;
9432                         case 'b':
9433                                 ctree_flags |= OPEN_CTREE_BACKUP_ROOT;
9434                                 break;
9435                         case 's':
9436                                 num = arg_strtou64(optarg);
9437                                 if (num >= BTRFS_SUPER_MIRROR_MAX) {
9438                                         fprintf(stderr,
9439                                                 "ERROR: super mirror should be less than: %d\n",
9440                                                 BTRFS_SUPER_MIRROR_MAX);
9441                                         exit(1);
9442                                 }
9443                                 bytenr = btrfs_sb_offset(((int)num));
9444                                 printf("using SB copy %llu, bytenr %llu\n", num,
9445                                        (unsigned long long)bytenr);
9446                                 break;
9447                         case 'Q':
9448                                 qgroup_report = 1;
9449                                 break;
9450                         case 'E':
9451                                 subvolid = arg_strtou64(optarg);
9452                                 break;
9453                         case 'r':
9454                                 tree_root_bytenr = arg_strtou64(optarg);
9455                                 break;
9456                         case 'p':
9457                                 ctx.progress_enabled = true;
9458                                 break;
9459                         case '?':
9460                         case 'h':
9461                                 usage(cmd_check_usage);
9462                         case OPT_REPAIR:
9463                                 printf("enabling repair mode\n");
9464                                 repair = 1;
9465                                 ctree_flags |= OPEN_CTREE_WRITES;
9466                                 break;
9467                         case OPT_READONLY:
9468                                 readonly = 1;
9469                                 break;
9470                         case OPT_INIT_CSUM:
9471                                 printf("Creating a new CRC tree\n");
9472                                 init_csum_tree = 1;
9473                                 repair = 1;
9474                                 ctree_flags |= OPEN_CTREE_WRITES;
9475                                 break;
9476                         case OPT_INIT_EXTENT:
9477                                 init_extent_tree = 1;
9478                                 ctree_flags |= (OPEN_CTREE_WRITES |
9479                                                 OPEN_CTREE_NO_BLOCK_GROUPS);
9480                                 repair = 1;
9481                                 break;
9482                         case OPT_CHECK_CSUM:
9483                                 check_data_csum = 1;
9484                                 break;
9485                 }
9486         }
9487         argc = argc - optind;
9488
9489         if (check_argc_exact(argc, 1))
9490                 usage(cmd_check_usage);
9491
9492         if (ctx.progress_enabled) {
9493                 ctx.tp = TASK_NOTHING;
9494                 ctx.info = task_init(print_status_check, print_status_return, &ctx);
9495         }
9496
9497         /* This check is the only reason for --readonly to exist */
9498         if (readonly && repair) {
9499                 fprintf(stderr, "Repair options are not compatible with --readonly\n");
9500                 exit(1);
9501         }
9502
9503         radix_tree_init();
9504         cache_tree_init(&root_cache);
9505
9506         if((ret = check_mounted(argv[optind])) < 0) {
9507                 fprintf(stderr, "Could not check mount status: %s\n", strerror(-ret));
9508                 goto err_out;
9509         } else if(ret) {
9510                 fprintf(stderr, "%s is currently mounted. Aborting.\n", argv[optind]);
9511                 ret = -EBUSY;
9512                 goto err_out;
9513         }
9514
9515         /* only allow partial opening under repair mode */
9516         if (repair)
9517                 ctree_flags |= OPEN_CTREE_PARTIAL;
9518
9519         info = open_ctree_fs_info(argv[optind], bytenr, tree_root_bytenr,
9520                                   ctree_flags);
9521         if (!info) {
9522                 fprintf(stderr, "Couldn't open file system\n");
9523                 ret = -EIO;
9524                 goto err_out;
9525         }
9526
9527         global_info = info;
9528         root = info->fs_root;
9529
9530         /*
9531          * repair mode will force us to commit transaction which
9532          * will make us fail to load log tree when mounting.
9533          */
9534         if (repair && btrfs_super_log_root(info->super_copy)) {
9535                 ret = ask_user("repair mode will force to clear out log tree, Are you sure?");
9536                 if (!ret) {
9537                         ret = 1;
9538                         goto close_out;
9539                 }
9540                 ret = zero_log_tree(root);
9541                 if (ret) {
9542                         fprintf(stderr, "fail to zero log tree\n");
9543                         goto close_out;
9544                 }
9545         }
9546
9547         uuid_unparse(info->super_copy->fsid, uuidbuf);
9548         if (qgroup_report) {
9549                 printf("Print quota groups for %s\nUUID: %s\n", argv[optind],
9550                        uuidbuf);
9551                 ret = qgroup_verify_all(info);
9552                 if (ret == 0)
9553                         print_qgroup_report(1);
9554                 goto close_out;
9555         }
9556         if (subvolid) {
9557                 printf("Print extent state for subvolume %llu on %s\nUUID: %s\n",
9558                        subvolid, argv[optind], uuidbuf);
9559                 ret = print_extent_state(info, subvolid);
9560                 goto close_out;
9561         }
9562         printf("Checking filesystem on %s\nUUID: %s\n", argv[optind], uuidbuf);
9563
9564         if (!extent_buffer_uptodate(info->tree_root->node) ||
9565             !extent_buffer_uptodate(info->dev_root->node) ||
9566             !extent_buffer_uptodate(info->chunk_root->node)) {
9567                 fprintf(stderr, "Critical roots corrupted, unable to fsck the FS\n");
9568                 ret = -EIO;
9569                 goto close_out;
9570         }
9571
9572         if (init_extent_tree || init_csum_tree) {
9573                 struct btrfs_trans_handle *trans;
9574
9575                 trans = btrfs_start_transaction(info->extent_root, 0);
9576                 if (IS_ERR(trans)) {
9577                         fprintf(stderr, "Error starting transaction\n");
9578                         ret = PTR_ERR(trans);
9579                         goto close_out;
9580                 }
9581
9582                 if (init_extent_tree) {
9583                         printf("Creating a new extent tree\n");
9584                         ret = reinit_extent_tree(trans, info);
9585                         if (ret)
9586                                 goto close_out;
9587                 }
9588
9589                 if (init_csum_tree) {
9590                         fprintf(stderr, "Reinit crc root\n");
9591                         ret = btrfs_fsck_reinit_root(trans, info->csum_root, 0);
9592                         if (ret) {
9593                                 fprintf(stderr, "crc root initialization failed\n");
9594                                 ret = -EIO;
9595                                 goto close_out;
9596                         }
9597
9598                         ret = fill_csum_tree(trans, info->csum_root,
9599                                              init_extent_tree);
9600                         if (ret) {
9601                                 fprintf(stderr, "crc refilling failed\n");
9602                                 return -EIO;
9603                         }
9604                 }
9605                 /*
9606                  * Ok now we commit and run the normal fsck, which will add
9607                  * extent entries for all of the items it finds.
9608                  */
9609                 ret = btrfs_commit_transaction(trans, info->extent_root);
9610                 if (ret)
9611                         goto close_out;
9612         }
9613         if (!extent_buffer_uptodate(info->extent_root->node)) {
9614                 fprintf(stderr, "Critical roots corrupted, unable to fsck the FS\n");
9615                 ret = -EIO;
9616                 goto close_out;
9617         }
9618         if (!extent_buffer_uptodate(info->csum_root->node)) {
9619                 fprintf(stderr, "Checksum root corrupted, rerun with --init-csum-tree option\n");
9620                 ret = -EIO;
9621                 goto close_out;
9622         }
9623
9624         if (!ctx.progress_enabled)
9625                 fprintf(stderr, "checking extents\n");
9626         ret = check_chunks_and_extents(root);
9627         if (ret)
9628                 fprintf(stderr, "Errors found in extent allocation tree or chunk allocation\n");
9629
9630         ret = repair_root_items(info);
9631         if (ret < 0)
9632                 goto close_out;
9633         if (repair) {
9634                 fprintf(stderr, "Fixed %d roots.\n", ret);
9635                 ret = 0;
9636         } else if (ret > 0) {
9637                 fprintf(stderr,
9638                        "Found %d roots with an outdated root item.\n",
9639                        ret);
9640                 fprintf(stderr,
9641                         "Please run a filesystem check with the option --repair to fix them.\n");
9642                 ret = 1;
9643                 goto close_out;
9644         }
9645
9646         if (!ctx.progress_enabled)
9647                 fprintf(stderr, "checking free space cache\n");
9648         ret = check_space_cache(root);
9649         if (ret)
9650                 goto out;
9651
9652         /*
9653          * We used to have to have these hole extents in between our real
9654          * extents so if we don't have this flag set we need to make sure there
9655          * are no gaps in the file extents for inodes, otherwise we can just
9656          * ignore it when this happens.
9657          */
9658         no_holes = btrfs_fs_incompat(root->fs_info,
9659                                      BTRFS_FEATURE_INCOMPAT_NO_HOLES);
9660         if (!ctx.progress_enabled)
9661                 fprintf(stderr, "checking fs roots\n");
9662         ret = check_fs_roots(root, &root_cache);
9663         if (ret)
9664                 goto out;
9665
9666         fprintf(stderr, "checking csums\n");
9667         ret = check_csums(root);
9668         if (ret)
9669                 goto out;
9670
9671         fprintf(stderr, "checking root refs\n");
9672         ret = check_root_refs(root, &root_cache);
9673         if (ret)
9674                 goto out;
9675
9676         while (repair && !list_empty(&root->fs_info->recow_ebs)) {
9677                 struct extent_buffer *eb;
9678
9679                 eb = list_first_entry(&root->fs_info->recow_ebs,
9680                                       struct extent_buffer, recow);
9681                 list_del_init(&eb->recow);
9682                 ret = recow_extent_buffer(root, eb);
9683                 if (ret)
9684                         break;
9685         }
9686
9687         while (!list_empty(&delete_items)) {
9688                 struct bad_item *bad;
9689
9690                 bad = list_first_entry(&delete_items, struct bad_item, list);
9691                 list_del_init(&bad->list);
9692                 if (repair)
9693                         ret = delete_bad_item(root, bad);
9694                 free(bad);
9695         }
9696
9697         if (info->quota_enabled) {
9698                 int err;
9699                 fprintf(stderr, "checking quota groups\n");
9700                 err = qgroup_verify_all(info);
9701                 if (err)
9702                         goto out;
9703         }
9704
9705         if (!list_empty(&root->fs_info->recow_ebs)) {
9706                 fprintf(stderr, "Transid errors in file system\n");
9707                 ret = 1;
9708         }
9709 out:
9710         print_qgroup_report(0);
9711         if (found_old_backref) { /*
9712                  * there was a disk format change when mixed
9713                  * backref was in testing tree. The old format
9714                  * existed about one week.
9715                  */
9716                 printf("\n * Found old mixed backref format. "
9717                        "The old format is not supported! *"
9718                        "\n * Please mount the FS in readonly mode, "
9719                        "backup data and re-format the FS. *\n\n");
9720                 ret = 1;
9721         }
9722         printf("found %llu bytes used err is %d\n",
9723                (unsigned long long)bytes_used, ret);
9724         printf("total csum bytes: %llu\n",(unsigned long long)total_csum_bytes);
9725         printf("total tree bytes: %llu\n",
9726                (unsigned long long)total_btree_bytes);
9727         printf("total fs tree bytes: %llu\n",
9728                (unsigned long long)total_fs_tree_bytes);
9729         printf("total extent tree bytes: %llu\n",
9730                (unsigned long long)total_extent_tree_bytes);
9731         printf("btree space waste bytes: %llu\n",
9732                (unsigned long long)btree_space_waste);
9733         printf("file data blocks allocated: %llu\n referenced %llu\n",
9734                 (unsigned long long)data_bytes_allocated,
9735                 (unsigned long long)data_bytes_referenced);
9736         printf("%s\n", PACKAGE_STRING);
9737
9738         free_root_recs_tree(&root_cache);
9739 close_out:
9740         close_ctree(root);
9741         btrfs_close_all_devices();
9742 err_out:
9743         if (ctx.progress_enabled)
9744                 task_deinit(ctx.info);
9745
9746         return ret;
9747 }