btrfs-progs: check: change find_dir_index/item
[platform/upstream/btrfs-progs.git] / cmds-check.c
1 /*
2  * Copyright (C) 2007 Oracle.  All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public
6  * License v2 as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public
14  * License along with this program; if not, write to the
15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16  * Boston, MA 021110-1307, USA.
17  */
18
19 #include <stdio.h>
20 #include <stdlib.h>
21 #include <unistd.h>
22 #include <fcntl.h>
23 #include <sys/types.h>
24 #include <sys/stat.h>
25 #include <unistd.h>
26 #include <getopt.h>
27 #include <uuid/uuid.h>
28 #include "ctree.h"
29 #include "volumes.h"
30 #include "repair.h"
31 #include "disk-io.h"
32 #include "print-tree.h"
33 #include "task-utils.h"
34 #include "transaction.h"
35 #include "utils.h"
36 #include "commands.h"
37 #include "free-space-cache.h"
38 #include "free-space-tree.h"
39 #include "btrfsck.h"
40 #include "qgroup-verify.h"
41 #include "rbtree-utils.h"
42 #include "backref.h"
43 #include "kernel-shared/ulist.h"
44 #include "hash.h"
45 #include "help.h"
46
47 enum task_position {
48         TASK_EXTENTS,
49         TASK_FREE_SPACE,
50         TASK_FS_ROOTS,
51         TASK_NOTHING, /* have to be the last element */
52 };
53
54 struct task_ctx {
55         int progress_enabled;
56         enum task_position tp;
57
58         struct task_info *info;
59 };
60
61 static u64 bytes_used = 0;
62 static u64 total_csum_bytes = 0;
63 static u64 total_btree_bytes = 0;
64 static u64 total_fs_tree_bytes = 0;
65 static u64 total_extent_tree_bytes = 0;
66 static u64 btree_space_waste = 0;
67 static u64 data_bytes_allocated = 0;
68 static u64 data_bytes_referenced = 0;
69 static LIST_HEAD(duplicate_extents);
70 static LIST_HEAD(delete_items);
71 static int no_holes = 0;
72 static int init_extent_tree = 0;
73 static int check_data_csum = 0;
74 static struct btrfs_fs_info *global_info;
75 static struct task_ctx ctx = { 0 };
76 static struct cache_tree *roots_info_cache = NULL;
77
78 enum btrfs_check_mode {
79         CHECK_MODE_ORIGINAL,
80         CHECK_MODE_LOWMEM,
81         CHECK_MODE_UNKNOWN,
82         CHECK_MODE_DEFAULT = CHECK_MODE_ORIGINAL
83 };
84
85 static enum btrfs_check_mode check_mode = CHECK_MODE_DEFAULT;
86
87 struct extent_backref {
88         struct rb_node node;
89         unsigned int is_data:1;
90         unsigned int found_extent_tree:1;
91         unsigned int full_backref:1;
92         unsigned int found_ref:1;
93         unsigned int broken:1;
94 };
95
96 static inline struct extent_backref* rb_node_to_extent_backref(struct rb_node *node)
97 {
98         return rb_entry(node, struct extent_backref, node);
99 }
100
101 struct data_backref {
102         struct extent_backref node;
103         union {
104                 u64 parent;
105                 u64 root;
106         };
107         u64 owner;
108         u64 offset;
109         u64 disk_bytenr;
110         u64 bytes;
111         u64 ram_bytes;
112         u32 num_refs;
113         u32 found_ref;
114 };
115
116 #define ROOT_DIR_ERROR          (1<<1)  /* bad ROOT_DIR */
117 #define DIR_ITEM_MISSING        (1<<2)  /* DIR_ITEM not found */
118 #define DIR_ITEM_MISMATCH       (1<<3)  /* DIR_ITEM found but not match */
119 #define INODE_REF_MISSING       (1<<4)  /* INODE_REF/INODE_EXTREF not found */
120 #define INODE_ITEM_MISSING      (1<<5)  /* INODE_ITEM not found */
121 #define INODE_ITEM_MISMATCH     (1<<6)  /* INODE_ITEM found but not match */
122 #define FILE_EXTENT_ERROR       (1<<7)  /* bad FILE_EXTENT */
123 #define ODD_CSUM_ITEM           (1<<8)  /* CSUM_ITEM error */
124 #define CSUM_ITEM_MISSING       (1<<9)  /* CSUM_ITEM not found */
125 #define LINK_COUNT_ERROR        (1<<10) /* INODE_ITEM nlink count error */
126 #define NBYTES_ERROR            (1<<11) /* INODE_ITEM nbytes count error */
127 #define ISIZE_ERROR             (1<<12) /* INODE_ITEM size count error */
128 #define ORPHAN_ITEM             (1<<13) /* INODE_ITEM no reference */
129 #define NO_INODE_ITEM           (1<<14) /* no inode_item */
130 #define LAST_ITEM               (1<<15) /* Complete this tree traversal */
131 #define ROOT_REF_MISSING        (1<<16) /* ROOT_REF not found */
132 #define ROOT_REF_MISMATCH       (1<<17) /* ROOT_REF found but not match */
133 #define DIR_INDEX_MISSING       (1<<18) /* INODE_INDEX not found */
134 #define DIR_INDEX_MISMATCH      (1<<19) /* INODE_INDEX found but not match */
135
136 static inline struct data_backref* to_data_backref(struct extent_backref *back)
137 {
138         return container_of(back, struct data_backref, node);
139 }
140
141 static int compare_data_backref(struct rb_node *node1, struct rb_node *node2)
142 {
143         struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
144         struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
145         struct data_backref *back1 = to_data_backref(ext1);
146         struct data_backref *back2 = to_data_backref(ext2);
147
148         WARN_ON(!ext1->is_data);
149         WARN_ON(!ext2->is_data);
150
151         /* parent and root are a union, so this covers both */
152         if (back1->parent > back2->parent)
153                 return 1;
154         if (back1->parent < back2->parent)
155                 return -1;
156
157         /* This is a full backref and the parents match. */
158         if (back1->node.full_backref)
159                 return 0;
160
161         if (back1->owner > back2->owner)
162                 return 1;
163         if (back1->owner < back2->owner)
164                 return -1;
165
166         if (back1->offset > back2->offset)
167                 return 1;
168         if (back1->offset < back2->offset)
169                 return -1;
170
171         if (back1->found_ref && back2->found_ref) {
172                 if (back1->disk_bytenr > back2->disk_bytenr)
173                         return 1;
174                 if (back1->disk_bytenr < back2->disk_bytenr)
175                         return -1;
176
177                 if (back1->bytes > back2->bytes)
178                         return 1;
179                 if (back1->bytes < back2->bytes)
180                         return -1;
181         }
182
183         return 0;
184 }
185
186 /*
187  * Much like data_backref, just removed the undetermined members
188  * and change it to use list_head.
189  * During extent scan, it is stored in root->orphan_data_extent.
190  * During fs tree scan, it is then moved to inode_rec->orphan_data_extents.
191  */
192 struct orphan_data_extent {
193         struct list_head list;
194         u64 root;
195         u64 objectid;
196         u64 offset;
197         u64 disk_bytenr;
198         u64 disk_len;
199 };
200
201 struct tree_backref {
202         struct extent_backref node;
203         union {
204                 u64 parent;
205                 u64 root;
206         };
207 };
208
209 static inline struct tree_backref* to_tree_backref(struct extent_backref *back)
210 {
211         return container_of(back, struct tree_backref, node);
212 }
213
214 static int compare_tree_backref(struct rb_node *node1, struct rb_node *node2)
215 {
216         struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
217         struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
218         struct tree_backref *back1 = to_tree_backref(ext1);
219         struct tree_backref *back2 = to_tree_backref(ext2);
220
221         WARN_ON(ext1->is_data);
222         WARN_ON(ext2->is_data);
223
224         /* parent and root are a union, so this covers both */
225         if (back1->parent > back2->parent)
226                 return 1;
227         if (back1->parent < back2->parent)
228                 return -1;
229
230         return 0;
231 }
232
233 static int compare_extent_backref(struct rb_node *node1, struct rb_node *node2)
234 {
235         struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
236         struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
237
238         if (ext1->is_data > ext2->is_data)
239                 return 1;
240
241         if (ext1->is_data < ext2->is_data)
242                 return -1;
243
244         if (ext1->full_backref > ext2->full_backref)
245                 return 1;
246         if (ext1->full_backref < ext2->full_backref)
247                 return -1;
248
249         if (ext1->is_data)
250                 return compare_data_backref(node1, node2);
251         else
252                 return compare_tree_backref(node1, node2);
253 }
254
255 /* Explicit initialization for extent_record::flag_block_full_backref */
256 enum { FLAG_UNSET = 2 };
257
258 struct extent_record {
259         struct list_head backrefs;
260         struct list_head dups;
261         struct rb_root backref_tree;
262         struct list_head list;
263         struct cache_extent cache;
264         struct btrfs_disk_key parent_key;
265         u64 start;
266         u64 max_size;
267         u64 nr;
268         u64 refs;
269         u64 extent_item_refs;
270         u64 generation;
271         u64 parent_generation;
272         u64 info_objectid;
273         u32 num_duplicates;
274         u8 info_level;
275         unsigned int flag_block_full_backref:2;
276         unsigned int found_rec:1;
277         unsigned int content_checked:1;
278         unsigned int owner_ref_checked:1;
279         unsigned int is_root:1;
280         unsigned int metadata:1;
281         unsigned int bad_full_backref:1;
282         unsigned int crossing_stripes:1;
283         unsigned int wrong_chunk_type:1;
284 };
285
286 static inline struct extent_record* to_extent_record(struct list_head *entry)
287 {
288         return container_of(entry, struct extent_record, list);
289 }
290
291 struct inode_backref {
292         struct list_head list;
293         unsigned int found_dir_item:1;
294         unsigned int found_dir_index:1;
295         unsigned int found_inode_ref:1;
296         u8 filetype;
297         u8 ref_type;
298         int errors;
299         u64 dir;
300         u64 index;
301         u16 namelen;
302         char name[0];
303 };
304
305 static inline struct inode_backref* to_inode_backref(struct list_head *entry)
306 {
307         return list_entry(entry, struct inode_backref, list);
308 }
309
310 struct root_item_record {
311         struct list_head list;
312         u64 objectid;
313         u64 bytenr;
314         u64 last_snapshot;
315         u8 level;
316         u8 drop_level;
317         struct btrfs_key drop_key;
318 };
319
320 #define REF_ERR_NO_DIR_ITEM             (1 << 0)
321 #define REF_ERR_NO_DIR_INDEX            (1 << 1)
322 #define REF_ERR_NO_INODE_REF            (1 << 2)
323 #define REF_ERR_DUP_DIR_ITEM            (1 << 3)
324 #define REF_ERR_DUP_DIR_INDEX           (1 << 4)
325 #define REF_ERR_DUP_INODE_REF           (1 << 5)
326 #define REF_ERR_INDEX_UNMATCH           (1 << 6)
327 #define REF_ERR_FILETYPE_UNMATCH        (1 << 7)
328 #define REF_ERR_NAME_TOO_LONG           (1 << 8) // 100
329 #define REF_ERR_NO_ROOT_REF             (1 << 9)
330 #define REF_ERR_NO_ROOT_BACKREF         (1 << 10)
331 #define REF_ERR_DUP_ROOT_REF            (1 << 11)
332 #define REF_ERR_DUP_ROOT_BACKREF        (1 << 12)
333
334 struct file_extent_hole {
335         struct rb_node node;
336         u64 start;
337         u64 len;
338 };
339
340 struct inode_record {
341         struct list_head backrefs;
342         unsigned int checked:1;
343         unsigned int merging:1;
344         unsigned int found_inode_item:1;
345         unsigned int found_dir_item:1;
346         unsigned int found_file_extent:1;
347         unsigned int found_csum_item:1;
348         unsigned int some_csum_missing:1;
349         unsigned int nodatasum:1;
350         int errors;
351
352         u64 ino;
353         u32 nlink;
354         u32 imode;
355         u64 isize;
356         u64 nbytes;
357
358         u32 found_link;
359         u64 found_size;
360         u64 extent_start;
361         u64 extent_end;
362         struct rb_root holes;
363         struct list_head orphan_extents;
364
365         u32 refs;
366 };
367
368 #define I_ERR_NO_INODE_ITEM             (1 << 0)
369 #define I_ERR_NO_ORPHAN_ITEM            (1 << 1)
370 #define I_ERR_DUP_INODE_ITEM            (1 << 2)
371 #define I_ERR_DUP_DIR_INDEX             (1 << 3)
372 #define I_ERR_ODD_DIR_ITEM              (1 << 4)
373 #define I_ERR_ODD_FILE_EXTENT           (1 << 5)
374 #define I_ERR_BAD_FILE_EXTENT           (1 << 6)
375 #define I_ERR_FILE_EXTENT_OVERLAP       (1 << 7)
376 #define I_ERR_FILE_EXTENT_DISCOUNT      (1 << 8) // 100
377 #define I_ERR_DIR_ISIZE_WRONG           (1 << 9)
378 #define I_ERR_FILE_NBYTES_WRONG         (1 << 10) // 400
379 #define I_ERR_ODD_CSUM_ITEM             (1 << 11)
380 #define I_ERR_SOME_CSUM_MISSING         (1 << 12)
381 #define I_ERR_LINK_COUNT_WRONG          (1 << 13)
382 #define I_ERR_FILE_EXTENT_ORPHAN        (1 << 14)
383
384 struct root_backref {
385         struct list_head list;
386         unsigned int found_dir_item:1;
387         unsigned int found_dir_index:1;
388         unsigned int found_back_ref:1;
389         unsigned int found_forward_ref:1;
390         unsigned int reachable:1;
391         int errors;
392         u64 ref_root;
393         u64 dir;
394         u64 index;
395         u16 namelen;
396         char name[0];
397 };
398
399 static inline struct root_backref* to_root_backref(struct list_head *entry)
400 {
401         return list_entry(entry, struct root_backref, list);
402 }
403
404 struct root_record {
405         struct list_head backrefs;
406         struct cache_extent cache;
407         unsigned int found_root_item:1;
408         u64 objectid;
409         u32 found_ref;
410 };
411
412 struct ptr_node {
413         struct cache_extent cache;
414         void *data;
415 };
416
417 struct shared_node {
418         struct cache_extent cache;
419         struct cache_tree root_cache;
420         struct cache_tree inode_cache;
421         struct inode_record *current;
422         u32 refs;
423 };
424
425 struct block_info {
426         u64 start;
427         u32 size;
428 };
429
430 struct walk_control {
431         struct cache_tree shared;
432         struct shared_node *nodes[BTRFS_MAX_LEVEL];
433         int active_node;
434         int root_level;
435 };
436
437 struct bad_item {
438         struct btrfs_key key;
439         u64 root_id;
440         struct list_head list;
441 };
442
443 struct extent_entry {
444         u64 bytenr;
445         u64 bytes;
446         int count;
447         int broken;
448         struct list_head list;
449 };
450
451 struct root_item_info {
452         /* level of the root */
453         u8 level;
454         /* number of nodes at this level, must be 1 for a root */
455         int node_count;
456         u64 bytenr;
457         u64 gen;
458         struct cache_extent cache_extent;
459 };
460
461 /*
462  * Error bit for low memory mode check.
463  *
464  * Currently no caller cares about it yet.  Just internal use for error
465  * classification.
466  */
467 #define BACKREF_MISSING         (1 << 0) /* Backref missing in extent tree */
468 #define BACKREF_MISMATCH        (1 << 1) /* Backref exists but does not match */
469 #define BYTES_UNALIGNED         (1 << 2) /* Some bytes are not aligned */
470 #define REFERENCER_MISSING      (1 << 3) /* Referencer not found */
471 #define REFERENCER_MISMATCH     (1 << 4) /* Referenceer found but does not match */
472 #define CROSSING_STRIPE_BOUNDARY (1 << 4) /* For kernel scrub workaround */
473 #define ITEM_SIZE_MISMATCH      (1 << 5) /* Bad item size */
474 #define UNKNOWN_TYPE            (1 << 6) /* Unknown type */
475 #define ACCOUNTING_MISMATCH     (1 << 7) /* Used space accounting error */
476 #define CHUNK_TYPE_MISMATCH     (1 << 8)
477
478 static void *print_status_check(void *p)
479 {
480         struct task_ctx *priv = p;
481         const char work_indicator[] = { '.', 'o', 'O', 'o' };
482         uint32_t count = 0;
483         static char *task_position_string[] = {
484                 "checking extents",
485                 "checking free space cache",
486                 "checking fs roots",
487         };
488
489         task_period_start(priv->info, 1000 /* 1s */);
490
491         if (priv->tp == TASK_NOTHING)
492                 return NULL;
493
494         while (1) {
495                 printf("%s [%c]\r", task_position_string[priv->tp],
496                                 work_indicator[count % 4]);
497                 count++;
498                 fflush(stdout);
499                 task_period_wait(priv->info);
500         }
501         return NULL;
502 }
503
504 static int print_status_return(void *p)
505 {
506         printf("\n");
507         fflush(stdout);
508
509         return 0;
510 }
511
512 static enum btrfs_check_mode parse_check_mode(const char *str)
513 {
514         if (strcmp(str, "lowmem") == 0)
515                 return CHECK_MODE_LOWMEM;
516         if (strcmp(str, "orig") == 0)
517                 return CHECK_MODE_ORIGINAL;
518         if (strcmp(str, "original") == 0)
519                 return CHECK_MODE_ORIGINAL;
520
521         return CHECK_MODE_UNKNOWN;
522 }
523
524 /* Compatible function to allow reuse of old codes */
525 static u64 first_extent_gap(struct rb_root *holes)
526 {
527         struct file_extent_hole *hole;
528
529         if (RB_EMPTY_ROOT(holes))
530                 return (u64)-1;
531
532         hole = rb_entry(rb_first(holes), struct file_extent_hole, node);
533         return hole->start;
534 }
535
536 static int compare_hole(struct rb_node *node1, struct rb_node *node2)
537 {
538         struct file_extent_hole *hole1;
539         struct file_extent_hole *hole2;
540
541         hole1 = rb_entry(node1, struct file_extent_hole, node);
542         hole2 = rb_entry(node2, struct file_extent_hole, node);
543
544         if (hole1->start > hole2->start)
545                 return -1;
546         if (hole1->start < hole2->start)
547                 return 1;
548         /* Now hole1->start == hole2->start */
549         if (hole1->len >= hole2->len)
550                 /*
551                  * Hole 1 will be merge center
552                  * Same hole will be merged later
553                  */
554                 return -1;
555         /* Hole 2 will be merge center */
556         return 1;
557 }
558
559 /*
560  * Add a hole to the record
561  *
562  * This will do hole merge for copy_file_extent_holes(),
563  * which will ensure there won't be continuous holes.
564  */
565 static int add_file_extent_hole(struct rb_root *holes,
566                                 u64 start, u64 len)
567 {
568         struct file_extent_hole *hole;
569         struct file_extent_hole *prev = NULL;
570         struct file_extent_hole *next = NULL;
571
572         hole = malloc(sizeof(*hole));
573         if (!hole)
574                 return -ENOMEM;
575         hole->start = start;
576         hole->len = len;
577         /* Since compare will not return 0, no -EEXIST will happen */
578         rb_insert(holes, &hole->node, compare_hole);
579
580         /* simple merge with previous hole */
581         if (rb_prev(&hole->node))
582                 prev = rb_entry(rb_prev(&hole->node), struct file_extent_hole,
583                                 node);
584         if (prev && prev->start + prev->len >= hole->start) {
585                 hole->len = hole->start + hole->len - prev->start;
586                 hole->start = prev->start;
587                 rb_erase(&prev->node, holes);
588                 free(prev);
589                 prev = NULL;
590         }
591
592         /* iterate merge with next holes */
593         while (1) {
594                 if (!rb_next(&hole->node))
595                         break;
596                 next = rb_entry(rb_next(&hole->node), struct file_extent_hole,
597                                         node);
598                 if (hole->start + hole->len >= next->start) {
599                         if (hole->start + hole->len <= next->start + next->len)
600                                 hole->len = next->start + next->len -
601                                             hole->start;
602                         rb_erase(&next->node, holes);
603                         free(next);
604                         next = NULL;
605                 } else
606                         break;
607         }
608         return 0;
609 }
610
611 static int compare_hole_range(struct rb_node *node, void *data)
612 {
613         struct file_extent_hole *hole;
614         u64 start;
615
616         hole = (struct file_extent_hole *)data;
617         start = hole->start;
618
619         hole = rb_entry(node, struct file_extent_hole, node);
620         if (start < hole->start)
621                 return -1;
622         if (start >= hole->start && start < hole->start + hole->len)
623                 return 0;
624         return 1;
625 }
626
627 /*
628  * Delete a hole in the record
629  *
630  * This will do the hole split and is much restrict than add.
631  */
632 static int del_file_extent_hole(struct rb_root *holes,
633                                 u64 start, u64 len)
634 {
635         struct file_extent_hole *hole;
636         struct file_extent_hole tmp;
637         u64 prev_start = 0;
638         u64 prev_len = 0;
639         u64 next_start = 0;
640         u64 next_len = 0;
641         struct rb_node *node;
642         int have_prev = 0;
643         int have_next = 0;
644         int ret = 0;
645
646         tmp.start = start;
647         tmp.len = len;
648         node = rb_search(holes, &tmp, compare_hole_range, NULL);
649         if (!node)
650                 return -EEXIST;
651         hole = rb_entry(node, struct file_extent_hole, node);
652         if (start + len > hole->start + hole->len)
653                 return -EEXIST;
654
655         /*
656          * Now there will be no overlap, delete the hole and re-add the
657          * split(s) if they exists.
658          */
659         if (start > hole->start) {
660                 prev_start = hole->start;
661                 prev_len = start - hole->start;
662                 have_prev = 1;
663         }
664         if (hole->start + hole->len > start + len) {
665                 next_start = start + len;
666                 next_len = hole->start + hole->len - start - len;
667                 have_next = 1;
668         }
669         rb_erase(node, holes);
670         free(hole);
671         if (have_prev) {
672                 ret = add_file_extent_hole(holes, prev_start, prev_len);
673                 if (ret < 0)
674                         return ret;
675         }
676         if (have_next) {
677                 ret = add_file_extent_hole(holes, next_start, next_len);
678                 if (ret < 0)
679                         return ret;
680         }
681         return 0;
682 }
683
684 static int copy_file_extent_holes(struct rb_root *dst,
685                                   struct rb_root *src)
686 {
687         struct file_extent_hole *hole;
688         struct rb_node *node;
689         int ret = 0;
690
691         node = rb_first(src);
692         while (node) {
693                 hole = rb_entry(node, struct file_extent_hole, node);
694                 ret = add_file_extent_hole(dst, hole->start, hole->len);
695                 if (ret)
696                         break;
697                 node = rb_next(node);
698         }
699         return ret;
700 }
701
702 static void free_file_extent_holes(struct rb_root *holes)
703 {
704         struct rb_node *node;
705         struct file_extent_hole *hole;
706
707         node = rb_first(holes);
708         while (node) {
709                 hole = rb_entry(node, struct file_extent_hole, node);
710                 rb_erase(node, holes);
711                 free(hole);
712                 node = rb_first(holes);
713         }
714 }
715
716 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info);
717
718 static void record_root_in_trans(struct btrfs_trans_handle *trans,
719                                  struct btrfs_root *root)
720 {
721         if (root->last_trans != trans->transid) {
722                 root->track_dirty = 1;
723                 root->last_trans = trans->transid;
724                 root->commit_root = root->node;
725                 extent_buffer_get(root->node);
726         }
727 }
728
729 static u8 imode_to_type(u32 imode)
730 {
731 #define S_SHIFT 12
732         static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = {
733                 [S_IFREG >> S_SHIFT]    = BTRFS_FT_REG_FILE,
734                 [S_IFDIR >> S_SHIFT]    = BTRFS_FT_DIR,
735                 [S_IFCHR >> S_SHIFT]    = BTRFS_FT_CHRDEV,
736                 [S_IFBLK >> S_SHIFT]    = BTRFS_FT_BLKDEV,
737                 [S_IFIFO >> S_SHIFT]    = BTRFS_FT_FIFO,
738                 [S_IFSOCK >> S_SHIFT]   = BTRFS_FT_SOCK,
739                 [S_IFLNK >> S_SHIFT]    = BTRFS_FT_SYMLINK,
740         };
741
742         return btrfs_type_by_mode[(imode & S_IFMT) >> S_SHIFT];
743 #undef S_SHIFT
744 }
745
746 static int device_record_compare(struct rb_node *node1, struct rb_node *node2)
747 {
748         struct device_record *rec1;
749         struct device_record *rec2;
750
751         rec1 = rb_entry(node1, struct device_record, node);
752         rec2 = rb_entry(node2, struct device_record, node);
753         if (rec1->devid > rec2->devid)
754                 return -1;
755         else if (rec1->devid < rec2->devid)
756                 return 1;
757         else
758                 return 0;
759 }
760
761 static struct inode_record *clone_inode_rec(struct inode_record *orig_rec)
762 {
763         struct inode_record *rec;
764         struct inode_backref *backref;
765         struct inode_backref *orig;
766         struct inode_backref *tmp;
767         struct orphan_data_extent *src_orphan;
768         struct orphan_data_extent *dst_orphan;
769         struct rb_node *rb;
770         size_t size;
771         int ret;
772
773         rec = malloc(sizeof(*rec));
774         if (!rec)
775                 return ERR_PTR(-ENOMEM);
776         memcpy(rec, orig_rec, sizeof(*rec));
777         rec->refs = 1;
778         INIT_LIST_HEAD(&rec->backrefs);
779         INIT_LIST_HEAD(&rec->orphan_extents);
780         rec->holes = RB_ROOT;
781
782         list_for_each_entry(orig, &orig_rec->backrefs, list) {
783                 size = sizeof(*orig) + orig->namelen + 1;
784                 backref = malloc(size);
785                 if (!backref) {
786                         ret = -ENOMEM;
787                         goto cleanup;
788                 }
789                 memcpy(backref, orig, size);
790                 list_add_tail(&backref->list, &rec->backrefs);
791         }
792         list_for_each_entry(src_orphan, &orig_rec->orphan_extents, list) {
793                 dst_orphan = malloc(sizeof(*dst_orphan));
794                 if (!dst_orphan) {
795                         ret = -ENOMEM;
796                         goto cleanup;
797                 }
798                 memcpy(dst_orphan, src_orphan, sizeof(*src_orphan));
799                 list_add_tail(&dst_orphan->list, &rec->orphan_extents);
800         }
801         ret = copy_file_extent_holes(&rec->holes, &orig_rec->holes);
802         if (ret < 0)
803                 goto cleanup_rb;
804
805         return rec;
806
807 cleanup_rb:
808         rb = rb_first(&rec->holes);
809         while (rb) {
810                 struct file_extent_hole *hole;
811
812                 hole = rb_entry(rb, struct file_extent_hole, node);
813                 rb = rb_next(rb);
814                 free(hole);
815         }
816
817 cleanup:
818         if (!list_empty(&rec->backrefs))
819                 list_for_each_entry_safe(orig, tmp, &rec->backrefs, list) {
820                         list_del(&orig->list);
821                         free(orig);
822                 }
823
824         if (!list_empty(&rec->orphan_extents))
825                 list_for_each_entry_safe(orig, tmp, &rec->orphan_extents, list) {
826                         list_del(&orig->list);
827                         free(orig);
828                 }
829
830         free(rec);
831
832         return ERR_PTR(ret);
833 }
834
835 static void print_orphan_data_extents(struct list_head *orphan_extents,
836                                       u64 objectid)
837 {
838         struct orphan_data_extent *orphan;
839
840         if (list_empty(orphan_extents))
841                 return;
842         printf("The following data extent is lost in tree %llu:\n",
843                objectid);
844         list_for_each_entry(orphan, orphan_extents, list) {
845                 printf("\tinode: %llu, offset:%llu, disk_bytenr: %llu, disk_len: %llu\n",
846                        orphan->objectid, orphan->offset, orphan->disk_bytenr,
847                        orphan->disk_len);
848         }
849 }
850
851 static void print_inode_error(struct btrfs_root *root, struct inode_record *rec)
852 {
853         u64 root_objectid = root->root_key.objectid;
854         int errors = rec->errors;
855
856         if (!errors)
857                 return;
858         /* reloc root errors, we print its corresponding fs root objectid*/
859         if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
860                 root_objectid = root->root_key.offset;
861                 fprintf(stderr, "reloc");
862         }
863         fprintf(stderr, "root %llu inode %llu errors %x",
864                 (unsigned long long) root_objectid,
865                 (unsigned long long) rec->ino, rec->errors);
866
867         if (errors & I_ERR_NO_INODE_ITEM)
868                 fprintf(stderr, ", no inode item");
869         if (errors & I_ERR_NO_ORPHAN_ITEM)
870                 fprintf(stderr, ", no orphan item");
871         if (errors & I_ERR_DUP_INODE_ITEM)
872                 fprintf(stderr, ", dup inode item");
873         if (errors & I_ERR_DUP_DIR_INDEX)
874                 fprintf(stderr, ", dup dir index");
875         if (errors & I_ERR_ODD_DIR_ITEM)
876                 fprintf(stderr, ", odd dir item");
877         if (errors & I_ERR_ODD_FILE_EXTENT)
878                 fprintf(stderr, ", odd file extent");
879         if (errors & I_ERR_BAD_FILE_EXTENT)
880                 fprintf(stderr, ", bad file extent");
881         if (errors & I_ERR_FILE_EXTENT_OVERLAP)
882                 fprintf(stderr, ", file extent overlap");
883         if (errors & I_ERR_FILE_EXTENT_DISCOUNT)
884                 fprintf(stderr, ", file extent discount");
885         if (errors & I_ERR_DIR_ISIZE_WRONG)
886                 fprintf(stderr, ", dir isize wrong");
887         if (errors & I_ERR_FILE_NBYTES_WRONG)
888                 fprintf(stderr, ", nbytes wrong");
889         if (errors & I_ERR_ODD_CSUM_ITEM)
890                 fprintf(stderr, ", odd csum item");
891         if (errors & I_ERR_SOME_CSUM_MISSING)
892                 fprintf(stderr, ", some csum missing");
893         if (errors & I_ERR_LINK_COUNT_WRONG)
894                 fprintf(stderr, ", link count wrong");
895         if (errors & I_ERR_FILE_EXTENT_ORPHAN)
896                 fprintf(stderr, ", orphan file extent");
897         fprintf(stderr, "\n");
898         /* Print the orphan extents if needed */
899         if (errors & I_ERR_FILE_EXTENT_ORPHAN)
900                 print_orphan_data_extents(&rec->orphan_extents, root->objectid);
901
902         /* Print the holes if needed */
903         if (errors & I_ERR_FILE_EXTENT_DISCOUNT) {
904                 struct file_extent_hole *hole;
905                 struct rb_node *node;
906                 int found = 0;
907
908                 node = rb_first(&rec->holes);
909                 fprintf(stderr, "Found file extent holes:\n");
910                 while (node) {
911                         found = 1;
912                         hole = rb_entry(node, struct file_extent_hole, node);
913                         fprintf(stderr, "\tstart: %llu, len: %llu\n",
914                                 hole->start, hole->len);
915                         node = rb_next(node);
916                 }
917                 if (!found)
918                         fprintf(stderr, "\tstart: 0, len: %llu\n",
919                                 round_up(rec->isize,
920                                          root->fs_info->sectorsize));
921         }
922 }
923
924 static void print_ref_error(int errors)
925 {
926         if (errors & REF_ERR_NO_DIR_ITEM)
927                 fprintf(stderr, ", no dir item");
928         if (errors & REF_ERR_NO_DIR_INDEX)
929                 fprintf(stderr, ", no dir index");
930         if (errors & REF_ERR_NO_INODE_REF)
931                 fprintf(stderr, ", no inode ref");
932         if (errors & REF_ERR_DUP_DIR_ITEM)
933                 fprintf(stderr, ", dup dir item");
934         if (errors & REF_ERR_DUP_DIR_INDEX)
935                 fprintf(stderr, ", dup dir index");
936         if (errors & REF_ERR_DUP_INODE_REF)
937                 fprintf(stderr, ", dup inode ref");
938         if (errors & REF_ERR_INDEX_UNMATCH)
939                 fprintf(stderr, ", index mismatch");
940         if (errors & REF_ERR_FILETYPE_UNMATCH)
941                 fprintf(stderr, ", filetype mismatch");
942         if (errors & REF_ERR_NAME_TOO_LONG)
943                 fprintf(stderr, ", name too long");
944         if (errors & REF_ERR_NO_ROOT_REF)
945                 fprintf(stderr, ", no root ref");
946         if (errors & REF_ERR_NO_ROOT_BACKREF)
947                 fprintf(stderr, ", no root backref");
948         if (errors & REF_ERR_DUP_ROOT_REF)
949                 fprintf(stderr, ", dup root ref");
950         if (errors & REF_ERR_DUP_ROOT_BACKREF)
951                 fprintf(stderr, ", dup root backref");
952         fprintf(stderr, "\n");
953 }
954
955 static struct inode_record *get_inode_rec(struct cache_tree *inode_cache,
956                                           u64 ino, int mod)
957 {
958         struct ptr_node *node;
959         struct cache_extent *cache;
960         struct inode_record *rec = NULL;
961         int ret;
962
963         cache = lookup_cache_extent(inode_cache, ino, 1);
964         if (cache) {
965                 node = container_of(cache, struct ptr_node, cache);
966                 rec = node->data;
967                 if (mod && rec->refs > 1) {
968                         node->data = clone_inode_rec(rec);
969                         if (IS_ERR(node->data))
970                                 return node->data;
971                         rec->refs--;
972                         rec = node->data;
973                 }
974         } else if (mod) {
975                 rec = calloc(1, sizeof(*rec));
976                 if (!rec)
977                         return ERR_PTR(-ENOMEM);
978                 rec->ino = ino;
979                 rec->extent_start = (u64)-1;
980                 rec->refs = 1;
981                 INIT_LIST_HEAD(&rec->backrefs);
982                 INIT_LIST_HEAD(&rec->orphan_extents);
983                 rec->holes = RB_ROOT;
984
985                 node = malloc(sizeof(*node));
986                 if (!node) {
987                         free(rec);
988                         return ERR_PTR(-ENOMEM);
989                 }
990                 node->cache.start = ino;
991                 node->cache.size = 1;
992                 node->data = rec;
993
994                 if (ino == BTRFS_FREE_INO_OBJECTID)
995                         rec->found_link = 1;
996
997                 ret = insert_cache_extent(inode_cache, &node->cache);
998                 if (ret)
999                         return ERR_PTR(-EEXIST);
1000         }
1001         return rec;
1002 }
1003
1004 static void free_orphan_data_extents(struct list_head *orphan_extents)
1005 {
1006         struct orphan_data_extent *orphan;
1007
1008         while (!list_empty(orphan_extents)) {
1009                 orphan = list_entry(orphan_extents->next,
1010                                     struct orphan_data_extent, list);
1011                 list_del(&orphan->list);
1012                 free(orphan);
1013         }
1014 }
1015
1016 static void free_inode_rec(struct inode_record *rec)
1017 {
1018         struct inode_backref *backref;
1019
1020         if (--rec->refs > 0)
1021                 return;
1022
1023         while (!list_empty(&rec->backrefs)) {
1024                 backref = to_inode_backref(rec->backrefs.next);
1025                 list_del(&backref->list);
1026                 free(backref);
1027         }
1028         free_orphan_data_extents(&rec->orphan_extents);
1029         free_file_extent_holes(&rec->holes);
1030         free(rec);
1031 }
1032
1033 static int can_free_inode_rec(struct inode_record *rec)
1034 {
1035         if (!rec->errors && rec->checked && rec->found_inode_item &&
1036             rec->nlink == rec->found_link && list_empty(&rec->backrefs))
1037                 return 1;
1038         return 0;
1039 }
1040
1041 static void maybe_free_inode_rec(struct cache_tree *inode_cache,
1042                                  struct inode_record *rec)
1043 {
1044         struct cache_extent *cache;
1045         struct inode_backref *tmp, *backref;
1046         struct ptr_node *node;
1047         u8 filetype;
1048
1049         if (!rec->found_inode_item)
1050                 return;
1051
1052         filetype = imode_to_type(rec->imode);
1053         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
1054                 if (backref->found_dir_item && backref->found_dir_index) {
1055                         if (backref->filetype != filetype)
1056                                 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1057                         if (!backref->errors && backref->found_inode_ref &&
1058                             rec->nlink == rec->found_link) {
1059                                 list_del(&backref->list);
1060                                 free(backref);
1061                         }
1062                 }
1063         }
1064
1065         if (!rec->checked || rec->merging)
1066                 return;
1067
1068         if (S_ISDIR(rec->imode)) {
1069                 if (rec->found_size != rec->isize)
1070                         rec->errors |= I_ERR_DIR_ISIZE_WRONG;
1071                 if (rec->found_file_extent)
1072                         rec->errors |= I_ERR_ODD_FILE_EXTENT;
1073         } else if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
1074                 if (rec->found_dir_item)
1075                         rec->errors |= I_ERR_ODD_DIR_ITEM;
1076                 if (rec->found_size != rec->nbytes)
1077                         rec->errors |= I_ERR_FILE_NBYTES_WRONG;
1078                 if (rec->nlink > 0 && !no_holes &&
1079                     (rec->extent_end < rec->isize ||
1080                      first_extent_gap(&rec->holes) < rec->isize))
1081                         rec->errors |= I_ERR_FILE_EXTENT_DISCOUNT;
1082         }
1083
1084         if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
1085                 if (rec->found_csum_item && rec->nodatasum)
1086                         rec->errors |= I_ERR_ODD_CSUM_ITEM;
1087                 if (rec->some_csum_missing && !rec->nodatasum)
1088                         rec->errors |= I_ERR_SOME_CSUM_MISSING;
1089         }
1090
1091         BUG_ON(rec->refs != 1);
1092         if (can_free_inode_rec(rec)) {
1093                 cache = lookup_cache_extent(inode_cache, rec->ino, 1);
1094                 node = container_of(cache, struct ptr_node, cache);
1095                 BUG_ON(node->data != rec);
1096                 remove_cache_extent(inode_cache, &node->cache);
1097                 free(node);
1098                 free_inode_rec(rec);
1099         }
1100 }
1101
1102 static int check_orphan_item(struct btrfs_root *root, u64 ino)
1103 {
1104         struct btrfs_path path;
1105         struct btrfs_key key;
1106         int ret;
1107
1108         key.objectid = BTRFS_ORPHAN_OBJECTID;
1109         key.type = BTRFS_ORPHAN_ITEM_KEY;
1110         key.offset = ino;
1111
1112         btrfs_init_path(&path);
1113         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
1114         btrfs_release_path(&path);
1115         if (ret > 0)
1116                 ret = -ENOENT;
1117         return ret;
1118 }
1119
1120 static int process_inode_item(struct extent_buffer *eb,
1121                               int slot, struct btrfs_key *key,
1122                               struct shared_node *active_node)
1123 {
1124         struct inode_record *rec;
1125         struct btrfs_inode_item *item;
1126
1127         rec = active_node->current;
1128         BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1129         if (rec->found_inode_item) {
1130                 rec->errors |= I_ERR_DUP_INODE_ITEM;
1131                 return 1;
1132         }
1133         item = btrfs_item_ptr(eb, slot, struct btrfs_inode_item);
1134         rec->nlink = btrfs_inode_nlink(eb, item);
1135         rec->isize = btrfs_inode_size(eb, item);
1136         rec->nbytes = btrfs_inode_nbytes(eb, item);
1137         rec->imode = btrfs_inode_mode(eb, item);
1138         if (btrfs_inode_flags(eb, item) & BTRFS_INODE_NODATASUM)
1139                 rec->nodatasum = 1;
1140         rec->found_inode_item = 1;
1141         if (rec->nlink == 0)
1142                 rec->errors |= I_ERR_NO_ORPHAN_ITEM;
1143         maybe_free_inode_rec(&active_node->inode_cache, rec);
1144         return 0;
1145 }
1146
1147 static struct inode_backref *get_inode_backref(struct inode_record *rec,
1148                                                 const char *name,
1149                                                 int namelen, u64 dir)
1150 {
1151         struct inode_backref *backref;
1152
1153         list_for_each_entry(backref, &rec->backrefs, list) {
1154                 if (rec->ino == BTRFS_MULTIPLE_OBJECTIDS)
1155                         break;
1156                 if (backref->dir != dir || backref->namelen != namelen)
1157                         continue;
1158                 if (memcmp(name, backref->name, namelen))
1159                         continue;
1160                 return backref;
1161         }
1162
1163         backref = malloc(sizeof(*backref) + namelen + 1);
1164         if (!backref)
1165                 return NULL;
1166         memset(backref, 0, sizeof(*backref));
1167         backref->dir = dir;
1168         backref->namelen = namelen;
1169         memcpy(backref->name, name, namelen);
1170         backref->name[namelen] = '\0';
1171         list_add_tail(&backref->list, &rec->backrefs);
1172         return backref;
1173 }
1174
1175 static int add_inode_backref(struct cache_tree *inode_cache,
1176                              u64 ino, u64 dir, u64 index,
1177                              const char *name, int namelen,
1178                              u8 filetype, u8 itemtype, int errors)
1179 {
1180         struct inode_record *rec;
1181         struct inode_backref *backref;
1182
1183         rec = get_inode_rec(inode_cache, ino, 1);
1184         BUG_ON(IS_ERR(rec));
1185         backref = get_inode_backref(rec, name, namelen, dir);
1186         BUG_ON(!backref);
1187         if (errors)
1188                 backref->errors |= errors;
1189         if (itemtype == BTRFS_DIR_INDEX_KEY) {
1190                 if (backref->found_dir_index)
1191                         backref->errors |= REF_ERR_DUP_DIR_INDEX;
1192                 if (backref->found_inode_ref && backref->index != index)
1193                         backref->errors |= REF_ERR_INDEX_UNMATCH;
1194                 if (backref->found_dir_item && backref->filetype != filetype)
1195                         backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1196
1197                 backref->index = index;
1198                 backref->filetype = filetype;
1199                 backref->found_dir_index = 1;
1200         } else if (itemtype == BTRFS_DIR_ITEM_KEY) {
1201                 rec->found_link++;
1202                 if (backref->found_dir_item)
1203                         backref->errors |= REF_ERR_DUP_DIR_ITEM;
1204                 if (backref->found_dir_index && backref->filetype != filetype)
1205                         backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1206
1207                 backref->filetype = filetype;
1208                 backref->found_dir_item = 1;
1209         } else if ((itemtype == BTRFS_INODE_REF_KEY) ||
1210                    (itemtype == BTRFS_INODE_EXTREF_KEY)) {
1211                 if (backref->found_inode_ref)
1212                         backref->errors |= REF_ERR_DUP_INODE_REF;
1213                 if (backref->found_dir_index && backref->index != index)
1214                         backref->errors |= REF_ERR_INDEX_UNMATCH;
1215                 else
1216                         backref->index = index;
1217
1218                 backref->ref_type = itemtype;
1219                 backref->found_inode_ref = 1;
1220         } else {
1221                 BUG_ON(1);
1222         }
1223
1224         maybe_free_inode_rec(inode_cache, rec);
1225         return 0;
1226 }
1227
1228 static int merge_inode_recs(struct inode_record *src, struct inode_record *dst,
1229                             struct cache_tree *dst_cache)
1230 {
1231         struct inode_backref *backref;
1232         u32 dir_count = 0;
1233         int ret = 0;
1234
1235         dst->merging = 1;
1236         list_for_each_entry(backref, &src->backrefs, list) {
1237                 if (backref->found_dir_index) {
1238                         add_inode_backref(dst_cache, dst->ino, backref->dir,
1239                                         backref->index, backref->name,
1240                                         backref->namelen, backref->filetype,
1241                                         BTRFS_DIR_INDEX_KEY, backref->errors);
1242                 }
1243                 if (backref->found_dir_item) {
1244                         dir_count++;
1245                         add_inode_backref(dst_cache, dst->ino,
1246                                         backref->dir, 0, backref->name,
1247                                         backref->namelen, backref->filetype,
1248                                         BTRFS_DIR_ITEM_KEY, backref->errors);
1249                 }
1250                 if (backref->found_inode_ref) {
1251                         add_inode_backref(dst_cache, dst->ino,
1252                                         backref->dir, backref->index,
1253                                         backref->name, backref->namelen, 0,
1254                                         backref->ref_type, backref->errors);
1255                 }
1256         }
1257
1258         if (src->found_dir_item)
1259                 dst->found_dir_item = 1;
1260         if (src->found_file_extent)
1261                 dst->found_file_extent = 1;
1262         if (src->found_csum_item)
1263                 dst->found_csum_item = 1;
1264         if (src->some_csum_missing)
1265                 dst->some_csum_missing = 1;
1266         if (first_extent_gap(&dst->holes) > first_extent_gap(&src->holes)) {
1267                 ret = copy_file_extent_holes(&dst->holes, &src->holes);
1268                 if (ret < 0)
1269                         return ret;
1270         }
1271
1272         BUG_ON(src->found_link < dir_count);
1273         dst->found_link += src->found_link - dir_count;
1274         dst->found_size += src->found_size;
1275         if (src->extent_start != (u64)-1) {
1276                 if (dst->extent_start == (u64)-1) {
1277                         dst->extent_start = src->extent_start;
1278                         dst->extent_end = src->extent_end;
1279                 } else {
1280                         if (dst->extent_end > src->extent_start)
1281                                 dst->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1282                         else if (dst->extent_end < src->extent_start) {
1283                                 ret = add_file_extent_hole(&dst->holes,
1284                                         dst->extent_end,
1285                                         src->extent_start - dst->extent_end);
1286                         }
1287                         if (dst->extent_end < src->extent_end)
1288                                 dst->extent_end = src->extent_end;
1289                 }
1290         }
1291
1292         dst->errors |= src->errors;
1293         if (src->found_inode_item) {
1294                 if (!dst->found_inode_item) {
1295                         dst->nlink = src->nlink;
1296                         dst->isize = src->isize;
1297                         dst->nbytes = src->nbytes;
1298                         dst->imode = src->imode;
1299                         dst->nodatasum = src->nodatasum;
1300                         dst->found_inode_item = 1;
1301                 } else {
1302                         dst->errors |= I_ERR_DUP_INODE_ITEM;
1303                 }
1304         }
1305         dst->merging = 0;
1306
1307         return 0;
1308 }
1309
1310 static int splice_shared_node(struct shared_node *src_node,
1311                               struct shared_node *dst_node)
1312 {
1313         struct cache_extent *cache;
1314         struct ptr_node *node, *ins;
1315         struct cache_tree *src, *dst;
1316         struct inode_record *rec, *conflict;
1317         u64 current_ino = 0;
1318         int splice = 0;
1319         int ret;
1320
1321         if (--src_node->refs == 0)
1322                 splice = 1;
1323         if (src_node->current)
1324                 current_ino = src_node->current->ino;
1325
1326         src = &src_node->root_cache;
1327         dst = &dst_node->root_cache;
1328 again:
1329         cache = search_cache_extent(src, 0);
1330         while (cache) {
1331                 node = container_of(cache, struct ptr_node, cache);
1332                 rec = node->data;
1333                 cache = next_cache_extent(cache);
1334
1335                 if (splice) {
1336                         remove_cache_extent(src, &node->cache);
1337                         ins = node;
1338                 } else {
1339                         ins = malloc(sizeof(*ins));
1340                         BUG_ON(!ins);
1341                         ins->cache.start = node->cache.start;
1342                         ins->cache.size = node->cache.size;
1343                         ins->data = rec;
1344                         rec->refs++;
1345                 }
1346                 ret = insert_cache_extent(dst, &ins->cache);
1347                 if (ret == -EEXIST) {
1348                         conflict = get_inode_rec(dst, rec->ino, 1);
1349                         BUG_ON(IS_ERR(conflict));
1350                         merge_inode_recs(rec, conflict, dst);
1351                         if (rec->checked) {
1352                                 conflict->checked = 1;
1353                                 if (dst_node->current == conflict)
1354                                         dst_node->current = NULL;
1355                         }
1356                         maybe_free_inode_rec(dst, conflict);
1357                         free_inode_rec(rec);
1358                         free(ins);
1359                 } else {
1360                         BUG_ON(ret);
1361                 }
1362         }
1363
1364         if (src == &src_node->root_cache) {
1365                 src = &src_node->inode_cache;
1366                 dst = &dst_node->inode_cache;
1367                 goto again;
1368         }
1369
1370         if (current_ino > 0 && (!dst_node->current ||
1371             current_ino > dst_node->current->ino)) {
1372                 if (dst_node->current) {
1373                         dst_node->current->checked = 1;
1374                         maybe_free_inode_rec(dst, dst_node->current);
1375                 }
1376                 dst_node->current = get_inode_rec(dst, current_ino, 1);
1377                 BUG_ON(IS_ERR(dst_node->current));
1378         }
1379         return 0;
1380 }
1381
1382 static void free_inode_ptr(struct cache_extent *cache)
1383 {
1384         struct ptr_node *node;
1385         struct inode_record *rec;
1386
1387         node = container_of(cache, struct ptr_node, cache);
1388         rec = node->data;
1389         free_inode_rec(rec);
1390         free(node);
1391 }
1392
1393 FREE_EXTENT_CACHE_BASED_TREE(inode_recs, free_inode_ptr);
1394
1395 static struct shared_node *find_shared_node(struct cache_tree *shared,
1396                                             u64 bytenr)
1397 {
1398         struct cache_extent *cache;
1399         struct shared_node *node;
1400
1401         cache = lookup_cache_extent(shared, bytenr, 1);
1402         if (cache) {
1403                 node = container_of(cache, struct shared_node, cache);
1404                 return node;
1405         }
1406         return NULL;
1407 }
1408
1409 static int add_shared_node(struct cache_tree *shared, u64 bytenr, u32 refs)
1410 {
1411         int ret;
1412         struct shared_node *node;
1413
1414         node = calloc(1, sizeof(*node));
1415         if (!node)
1416                 return -ENOMEM;
1417         node->cache.start = bytenr;
1418         node->cache.size = 1;
1419         cache_tree_init(&node->root_cache);
1420         cache_tree_init(&node->inode_cache);
1421         node->refs = refs;
1422
1423         ret = insert_cache_extent(shared, &node->cache);
1424
1425         return ret;
1426 }
1427
1428 static int enter_shared_node(struct btrfs_root *root, u64 bytenr, u32 refs,
1429                              struct walk_control *wc, int level)
1430 {
1431         struct shared_node *node;
1432         struct shared_node *dest;
1433         int ret;
1434
1435         if (level == wc->active_node)
1436                 return 0;
1437
1438         BUG_ON(wc->active_node <= level);
1439         node = find_shared_node(&wc->shared, bytenr);
1440         if (!node) {
1441                 ret = add_shared_node(&wc->shared, bytenr, refs);
1442                 BUG_ON(ret);
1443                 node = find_shared_node(&wc->shared, bytenr);
1444                 wc->nodes[level] = node;
1445                 wc->active_node = level;
1446                 return 0;
1447         }
1448
1449         if (wc->root_level == wc->active_node &&
1450             btrfs_root_refs(&root->root_item) == 0) {
1451                 if (--node->refs == 0) {
1452                         free_inode_recs_tree(&node->root_cache);
1453                         free_inode_recs_tree(&node->inode_cache);
1454                         remove_cache_extent(&wc->shared, &node->cache);
1455                         free(node);
1456                 }
1457                 return 1;
1458         }
1459
1460         dest = wc->nodes[wc->active_node];
1461         splice_shared_node(node, dest);
1462         if (node->refs == 0) {
1463                 remove_cache_extent(&wc->shared, &node->cache);
1464                 free(node);
1465         }
1466         return 1;
1467 }
1468
1469 static int leave_shared_node(struct btrfs_root *root,
1470                              struct walk_control *wc, int level)
1471 {
1472         struct shared_node *node;
1473         struct shared_node *dest;
1474         int i;
1475
1476         if (level == wc->root_level)
1477                 return 0;
1478
1479         for (i = level + 1; i < BTRFS_MAX_LEVEL; i++) {
1480                 if (wc->nodes[i])
1481                         break;
1482         }
1483         BUG_ON(i >= BTRFS_MAX_LEVEL);
1484
1485         node = wc->nodes[wc->active_node];
1486         wc->nodes[wc->active_node] = NULL;
1487         wc->active_node = i;
1488
1489         dest = wc->nodes[wc->active_node];
1490         if (wc->active_node < wc->root_level ||
1491             btrfs_root_refs(&root->root_item) > 0) {
1492                 BUG_ON(node->refs <= 1);
1493                 splice_shared_node(node, dest);
1494         } else {
1495                 BUG_ON(node->refs < 2);
1496                 node->refs--;
1497         }
1498         return 0;
1499 }
1500
1501 /*
1502  * Returns:
1503  * < 0 - on error
1504  * 1   - if the root with id child_root_id is a child of root parent_root_id
1505  * 0   - if the root child_root_id isn't a child of the root parent_root_id but
1506  *       has other root(s) as parent(s)
1507  * 2   - if the root child_root_id doesn't have any parent roots
1508  */
1509 static int is_child_root(struct btrfs_root *root, u64 parent_root_id,
1510                          u64 child_root_id)
1511 {
1512         struct btrfs_path path;
1513         struct btrfs_key key;
1514         struct extent_buffer *leaf;
1515         int has_parent = 0;
1516         int ret;
1517
1518         btrfs_init_path(&path);
1519
1520         key.objectid = parent_root_id;
1521         key.type = BTRFS_ROOT_REF_KEY;
1522         key.offset = child_root_id;
1523         ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1524                                 0, 0);
1525         if (ret < 0)
1526                 return ret;
1527         btrfs_release_path(&path);
1528         if (!ret)
1529                 return 1;
1530
1531         key.objectid = child_root_id;
1532         key.type = BTRFS_ROOT_BACKREF_KEY;
1533         key.offset = 0;
1534         ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1535                                 0, 0);
1536         if (ret < 0)
1537                 goto out;
1538
1539         while (1) {
1540                 leaf = path.nodes[0];
1541                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1542                         ret = btrfs_next_leaf(root->fs_info->tree_root, &path);
1543                         if (ret)
1544                                 break;
1545                         leaf = path.nodes[0];
1546                 }
1547
1548                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1549                 if (key.objectid != child_root_id ||
1550                     key.type != BTRFS_ROOT_BACKREF_KEY)
1551                         break;
1552
1553                 has_parent = 1;
1554
1555                 if (key.offset == parent_root_id) {
1556                         btrfs_release_path(&path);
1557                         return 1;
1558                 }
1559
1560                 path.slots[0]++;
1561         }
1562 out:
1563         btrfs_release_path(&path);
1564         if (ret < 0)
1565                 return ret;
1566         return has_parent ? 0 : 2;
1567 }
1568
1569 static int process_dir_item(struct extent_buffer *eb,
1570                             int slot, struct btrfs_key *key,
1571                             struct shared_node *active_node)
1572 {
1573         u32 total;
1574         u32 cur = 0;
1575         u32 len;
1576         u32 name_len;
1577         u32 data_len;
1578         int error;
1579         int nritems = 0;
1580         u8 filetype;
1581         struct btrfs_dir_item *di;
1582         struct inode_record *rec;
1583         struct cache_tree *root_cache;
1584         struct cache_tree *inode_cache;
1585         struct btrfs_key location;
1586         char namebuf[BTRFS_NAME_LEN];
1587
1588         root_cache = &active_node->root_cache;
1589         inode_cache = &active_node->inode_cache;
1590         rec = active_node->current;
1591         rec->found_dir_item = 1;
1592
1593         di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
1594         total = btrfs_item_size_nr(eb, slot);
1595         while (cur < total) {
1596                 nritems++;
1597                 btrfs_dir_item_key_to_cpu(eb, di, &location);
1598                 name_len = btrfs_dir_name_len(eb, di);
1599                 data_len = btrfs_dir_data_len(eb, di);
1600                 filetype = btrfs_dir_type(eb, di);
1601
1602                 rec->found_size += name_len;
1603                 if (cur + sizeof(*di) + name_len > total ||
1604                     name_len > BTRFS_NAME_LEN) {
1605                         error = REF_ERR_NAME_TOO_LONG;
1606
1607                         if (cur + sizeof(*di) > total)
1608                                 break;
1609                         len = min_t(u32, total - cur - sizeof(*di),
1610                                     BTRFS_NAME_LEN);
1611                 } else {
1612                         len = name_len;
1613                         error = 0;
1614                 }
1615
1616                 read_extent_buffer(eb, namebuf, (unsigned long)(di + 1), len);
1617
1618                 if (key->type == BTRFS_DIR_ITEM_KEY &&
1619                     key->offset != btrfs_name_hash(namebuf, len)) {
1620                         rec->errors |= I_ERR_ODD_DIR_ITEM;
1621                         error("DIR_ITEM[%llu %llu] name %s namelen %u filetype %u mismatch with its hash, wanted %llu have %llu",
1622                         key->objectid, key->offset, namebuf, len, filetype,
1623                         key->offset, btrfs_name_hash(namebuf, len));
1624                 }
1625
1626                 if (location.type == BTRFS_INODE_ITEM_KEY) {
1627                         add_inode_backref(inode_cache, location.objectid,
1628                                           key->objectid, key->offset, namebuf,
1629                                           len, filetype, key->type, error);
1630                 } else if (location.type == BTRFS_ROOT_ITEM_KEY) {
1631                         add_inode_backref(root_cache, location.objectid,
1632                                           key->objectid, key->offset,
1633                                           namebuf, len, filetype,
1634                                           key->type, error);
1635                 } else {
1636                         fprintf(stderr, "invalid location in dir item %u\n",
1637                                 location.type);
1638                         add_inode_backref(inode_cache, BTRFS_MULTIPLE_OBJECTIDS,
1639                                           key->objectid, key->offset, namebuf,
1640                                           len, filetype, key->type, error);
1641                 }
1642
1643                 len = sizeof(*di) + name_len + data_len;
1644                 di = (struct btrfs_dir_item *)((char *)di + len);
1645                 cur += len;
1646         }
1647         if (key->type == BTRFS_DIR_INDEX_KEY && nritems > 1)
1648                 rec->errors |= I_ERR_DUP_DIR_INDEX;
1649
1650         return 0;
1651 }
1652
1653 static int process_inode_ref(struct extent_buffer *eb,
1654                              int slot, struct btrfs_key *key,
1655                              struct shared_node *active_node)
1656 {
1657         u32 total;
1658         u32 cur = 0;
1659         u32 len;
1660         u32 name_len;
1661         u64 index;
1662         int error;
1663         struct cache_tree *inode_cache;
1664         struct btrfs_inode_ref *ref;
1665         char namebuf[BTRFS_NAME_LEN];
1666
1667         inode_cache = &active_node->inode_cache;
1668
1669         ref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref);
1670         total = btrfs_item_size_nr(eb, slot);
1671         while (cur < total) {
1672                 name_len = btrfs_inode_ref_name_len(eb, ref);
1673                 index = btrfs_inode_ref_index(eb, ref);
1674
1675                 /* inode_ref + namelen should not cross item boundary */
1676                 if (cur + sizeof(*ref) + name_len > total ||
1677                     name_len > BTRFS_NAME_LEN) {
1678                         if (total < cur + sizeof(*ref))
1679                                 break;
1680
1681                         /* Still try to read out the remaining part */
1682                         len = min_t(u32, total - cur - sizeof(*ref),
1683                                     BTRFS_NAME_LEN);
1684                         error = REF_ERR_NAME_TOO_LONG;
1685                 } else {
1686                         len = name_len;
1687                         error = 0;
1688                 }
1689
1690                 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
1691                 add_inode_backref(inode_cache, key->objectid, key->offset,
1692                                   index, namebuf, len, 0, key->type, error);
1693
1694                 len = sizeof(*ref) + name_len;
1695                 ref = (struct btrfs_inode_ref *)((char *)ref + len);
1696                 cur += len;
1697         }
1698         return 0;
1699 }
1700
1701 static int process_inode_extref(struct extent_buffer *eb,
1702                                 int slot, struct btrfs_key *key,
1703                                 struct shared_node *active_node)
1704 {
1705         u32 total;
1706         u32 cur = 0;
1707         u32 len;
1708         u32 name_len;
1709         u64 index;
1710         u64 parent;
1711         int error;
1712         struct cache_tree *inode_cache;
1713         struct btrfs_inode_extref *extref;
1714         char namebuf[BTRFS_NAME_LEN];
1715
1716         inode_cache = &active_node->inode_cache;
1717
1718         extref = btrfs_item_ptr(eb, slot, struct btrfs_inode_extref);
1719         total = btrfs_item_size_nr(eb, slot);
1720         while (cur < total) {
1721                 name_len = btrfs_inode_extref_name_len(eb, extref);
1722                 index = btrfs_inode_extref_index(eb, extref);
1723                 parent = btrfs_inode_extref_parent(eb, extref);
1724                 if (name_len <= BTRFS_NAME_LEN) {
1725                         len = name_len;
1726                         error = 0;
1727                 } else {
1728                         len = BTRFS_NAME_LEN;
1729                         error = REF_ERR_NAME_TOO_LONG;
1730                 }
1731                 read_extent_buffer(eb, namebuf,
1732                                    (unsigned long)(extref + 1), len);
1733                 add_inode_backref(inode_cache, key->objectid, parent,
1734                                   index, namebuf, len, 0, key->type, error);
1735
1736                 len = sizeof(*extref) + name_len;
1737                 extref = (struct btrfs_inode_extref *)((char *)extref + len);
1738                 cur += len;
1739         }
1740         return 0;
1741
1742 }
1743
1744 static int count_csum_range(struct btrfs_root *root, u64 start,
1745                             u64 len, u64 *found)
1746 {
1747         struct btrfs_key key;
1748         struct btrfs_path path;
1749         struct extent_buffer *leaf;
1750         int ret;
1751         size_t size;
1752         *found = 0;
1753         u64 csum_end;
1754         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
1755
1756         btrfs_init_path(&path);
1757
1758         key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
1759         key.offset = start;
1760         key.type = BTRFS_EXTENT_CSUM_KEY;
1761
1762         ret = btrfs_search_slot(NULL, root->fs_info->csum_root,
1763                                 &key, &path, 0, 0);
1764         if (ret < 0)
1765                 goto out;
1766         if (ret > 0 && path.slots[0] > 0) {
1767                 leaf = path.nodes[0];
1768                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0] - 1);
1769                 if (key.objectid == BTRFS_EXTENT_CSUM_OBJECTID &&
1770                     key.type == BTRFS_EXTENT_CSUM_KEY)
1771                         path.slots[0]--;
1772         }
1773
1774         while (len > 0) {
1775                 leaf = path.nodes[0];
1776                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1777                         ret = btrfs_next_leaf(root->fs_info->csum_root, &path);
1778                         if (ret > 0)
1779                                 break;
1780                         else if (ret < 0)
1781                                 goto out;
1782                         leaf = path.nodes[0];
1783                 }
1784
1785                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1786                 if (key.objectid != BTRFS_EXTENT_CSUM_OBJECTID ||
1787                     key.type != BTRFS_EXTENT_CSUM_KEY)
1788                         break;
1789
1790                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1791                 if (key.offset >= start + len)
1792                         break;
1793
1794                 if (key.offset > start)
1795                         start = key.offset;
1796
1797                 size = btrfs_item_size_nr(leaf, path.slots[0]);
1798                 csum_end = key.offset + (size / csum_size) *
1799                            root->fs_info->sectorsize;
1800                 if (csum_end > start) {
1801                         size = min(csum_end - start, len);
1802                         len -= size;
1803                         start += size;
1804                         *found += size;
1805                 }
1806
1807                 path.slots[0]++;
1808         }
1809 out:
1810         btrfs_release_path(&path);
1811         if (ret < 0)
1812                 return ret;
1813         return 0;
1814 }
1815
1816 static int process_file_extent(struct btrfs_root *root,
1817                                 struct extent_buffer *eb,
1818                                 int slot, struct btrfs_key *key,
1819                                 struct shared_node *active_node)
1820 {
1821         struct inode_record *rec;
1822         struct btrfs_file_extent_item *fi;
1823         u64 num_bytes = 0;
1824         u64 disk_bytenr = 0;
1825         u64 extent_offset = 0;
1826         u64 mask = root->fs_info->sectorsize - 1;
1827         int extent_type;
1828         int ret;
1829
1830         rec = active_node->current;
1831         BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1832         rec->found_file_extent = 1;
1833
1834         if (rec->extent_start == (u64)-1) {
1835                 rec->extent_start = key->offset;
1836                 rec->extent_end = key->offset;
1837         }
1838
1839         if (rec->extent_end > key->offset)
1840                 rec->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1841         else if (rec->extent_end < key->offset) {
1842                 ret = add_file_extent_hole(&rec->holes, rec->extent_end,
1843                                            key->offset - rec->extent_end);
1844                 if (ret < 0)
1845                         return ret;
1846         }
1847
1848         fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
1849         extent_type = btrfs_file_extent_type(eb, fi);
1850
1851         if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
1852                 num_bytes = btrfs_file_extent_inline_len(eb, slot, fi);
1853                 if (num_bytes == 0)
1854                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1855                 rec->found_size += num_bytes;
1856                 num_bytes = (num_bytes + mask) & ~mask;
1857         } else if (extent_type == BTRFS_FILE_EXTENT_REG ||
1858                    extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1859                 num_bytes = btrfs_file_extent_num_bytes(eb, fi);
1860                 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
1861                 extent_offset = btrfs_file_extent_offset(eb, fi);
1862                 if (num_bytes == 0 || (num_bytes & mask))
1863                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1864                 if (num_bytes + extent_offset >
1865                     btrfs_file_extent_ram_bytes(eb, fi))
1866                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1867                 if (extent_type == BTRFS_FILE_EXTENT_PREALLOC &&
1868                     (btrfs_file_extent_compression(eb, fi) ||
1869                      btrfs_file_extent_encryption(eb, fi) ||
1870                      btrfs_file_extent_other_encoding(eb, fi)))
1871                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1872                 if (disk_bytenr > 0)
1873                         rec->found_size += num_bytes;
1874         } else {
1875                 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1876         }
1877         rec->extent_end = key->offset + num_bytes;
1878
1879         /*
1880          * The data reloc tree will copy full extents into its inode and then
1881          * copy the corresponding csums.  Because the extent it copied could be
1882          * a preallocated extent that hasn't been written to yet there may be no
1883          * csums to copy, ergo we won't have csums for our file extent.  This is
1884          * ok so just don't bother checking csums if the inode belongs to the
1885          * data reloc tree.
1886          */
1887         if (disk_bytenr > 0 &&
1888             btrfs_header_owner(eb) != BTRFS_DATA_RELOC_TREE_OBJECTID) {
1889                 u64 found;
1890                 if (btrfs_file_extent_compression(eb, fi))
1891                         num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
1892                 else
1893                         disk_bytenr += extent_offset;
1894
1895                 ret = count_csum_range(root, disk_bytenr, num_bytes, &found);
1896                 if (ret < 0)
1897                         return ret;
1898                 if (extent_type == BTRFS_FILE_EXTENT_REG) {
1899                         if (found > 0)
1900                                 rec->found_csum_item = 1;
1901                         if (found < num_bytes)
1902                                 rec->some_csum_missing = 1;
1903                 } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1904                         if (found > 0)
1905                                 rec->errors |= I_ERR_ODD_CSUM_ITEM;
1906                 }
1907         }
1908         return 0;
1909 }
1910
1911 static int process_one_leaf(struct btrfs_root *root, struct extent_buffer *eb,
1912                             struct walk_control *wc)
1913 {
1914         struct btrfs_key key;
1915         u32 nritems;
1916         int i;
1917         int ret = 0;
1918         struct cache_tree *inode_cache;
1919         struct shared_node *active_node;
1920
1921         if (wc->root_level == wc->active_node &&
1922             btrfs_root_refs(&root->root_item) == 0)
1923                 return 0;
1924
1925         active_node = wc->nodes[wc->active_node];
1926         inode_cache = &active_node->inode_cache;
1927         nritems = btrfs_header_nritems(eb);
1928         for (i = 0; i < nritems; i++) {
1929                 btrfs_item_key_to_cpu(eb, &key, i);
1930
1931                 if (key.objectid == BTRFS_FREE_SPACE_OBJECTID)
1932                         continue;
1933                 if (key.type == BTRFS_ORPHAN_ITEM_KEY)
1934                         continue;
1935
1936                 if (active_node->current == NULL ||
1937                     active_node->current->ino < key.objectid) {
1938                         if (active_node->current) {
1939                                 active_node->current->checked = 1;
1940                                 maybe_free_inode_rec(inode_cache,
1941                                                      active_node->current);
1942                         }
1943                         active_node->current = get_inode_rec(inode_cache,
1944                                                              key.objectid, 1);
1945                         BUG_ON(IS_ERR(active_node->current));
1946                 }
1947                 switch (key.type) {
1948                 case BTRFS_DIR_ITEM_KEY:
1949                 case BTRFS_DIR_INDEX_KEY:
1950                         ret = process_dir_item(eb, i, &key, active_node);
1951                         break;
1952                 case BTRFS_INODE_REF_KEY:
1953                         ret = process_inode_ref(eb, i, &key, active_node);
1954                         break;
1955                 case BTRFS_INODE_EXTREF_KEY:
1956                         ret = process_inode_extref(eb, i, &key, active_node);
1957                         break;
1958                 case BTRFS_INODE_ITEM_KEY:
1959                         ret = process_inode_item(eb, i, &key, active_node);
1960                         break;
1961                 case BTRFS_EXTENT_DATA_KEY:
1962                         ret = process_file_extent(root, eb, i, &key,
1963                                                   active_node);
1964                         break;
1965                 default:
1966                         break;
1967                 };
1968         }
1969         return ret;
1970 }
1971
1972 struct node_refs {
1973         u64 bytenr[BTRFS_MAX_LEVEL];
1974         u64 refs[BTRFS_MAX_LEVEL];
1975         int need_check[BTRFS_MAX_LEVEL];
1976 };
1977
1978 static int update_nodes_refs(struct btrfs_root *root, u64 bytenr,
1979                              struct node_refs *nrefs, u64 level);
1980 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
1981                             unsigned int ext_ref);
1982
1983 /*
1984  * Returns >0  Found error, not fatal, should continue
1985  * Returns <0  Fatal error, must exit the whole check
1986  * Returns 0   No errors found
1987  */
1988 static int process_one_leaf_v2(struct btrfs_root *root, struct btrfs_path *path,
1989                                struct node_refs *nrefs, int *level, int ext_ref)
1990 {
1991         struct extent_buffer *cur = path->nodes[0];
1992         struct btrfs_key key;
1993         u64 cur_bytenr;
1994         u32 nritems;
1995         u64 first_ino = 0;
1996         int root_level = btrfs_header_level(root->node);
1997         int i;
1998         int ret = 0; /* Final return value */
1999         int err = 0; /* Positive error bitmap */
2000
2001         cur_bytenr = cur->start;
2002
2003         /* skip to first inode item or the first inode number change */
2004         nritems = btrfs_header_nritems(cur);
2005         for (i = 0; i < nritems; i++) {
2006                 btrfs_item_key_to_cpu(cur, &key, i);
2007                 if (i == 0)
2008                         first_ino = key.objectid;
2009                 if (key.type == BTRFS_INODE_ITEM_KEY ||
2010                     (first_ino && first_ino != key.objectid))
2011                         break;
2012         }
2013         if (i == nritems) {
2014                 path->slots[0] = nritems;
2015                 return 0;
2016         }
2017         path->slots[0] = i;
2018
2019 again:
2020         err |= check_inode_item(root, path, ext_ref);
2021
2022         /* modify cur since check_inode_item may change path */
2023         cur = path->nodes[0];
2024
2025         if (err & LAST_ITEM)
2026                 goto out;
2027
2028         /* still have inode items in thie leaf */
2029         if (cur->start == cur_bytenr)
2030                 goto again;
2031
2032         /*
2033          * we have switched to another leaf, above nodes may
2034          * have changed, here walk down the path, if a node
2035          * or leaf is shared, check whether we can skip this
2036          * node or leaf.
2037          */
2038         for (i = root_level; i >= 0; i--) {
2039                 if (path->nodes[i]->start == nrefs->bytenr[i])
2040                         continue;
2041
2042                 ret = update_nodes_refs(root,
2043                                 path->nodes[i]->start,
2044                                 nrefs, i);
2045                 if (ret)
2046                         goto out;
2047
2048                 if (!nrefs->need_check[i]) {
2049                         *level += 1;
2050                         break;
2051                 }
2052         }
2053
2054         for (i = 0; i < *level; i++) {
2055                 free_extent_buffer(path->nodes[i]);
2056                 path->nodes[i] = NULL;
2057         }
2058 out:
2059         err &= ~LAST_ITEM;
2060         if (err && !ret)
2061                 ret = err;
2062         return ret;
2063 }
2064
2065 static void reada_walk_down(struct btrfs_root *root,
2066                             struct extent_buffer *node, int slot)
2067 {
2068         struct btrfs_fs_info *fs_info = root->fs_info;
2069         u64 bytenr;
2070         u64 ptr_gen;
2071         u32 nritems;
2072         int i;
2073         int level;
2074
2075         level = btrfs_header_level(node);
2076         if (level != 1)
2077                 return;
2078
2079         nritems = btrfs_header_nritems(node);
2080         for (i = slot; i < nritems; i++) {
2081                 bytenr = btrfs_node_blockptr(node, i);
2082                 ptr_gen = btrfs_node_ptr_generation(node, i);
2083                 readahead_tree_block(fs_info, bytenr, ptr_gen);
2084         }
2085 }
2086
2087 /*
2088  * Check the child node/leaf by the following condition:
2089  * 1. the first item key of the node/leaf should be the same with the one
2090  *    in parent.
2091  * 2. block in parent node should match the child node/leaf.
2092  * 3. generation of parent node and child's header should be consistent.
2093  *
2094  * Or the child node/leaf pointed by the key in parent is not valid.
2095  *
2096  * We hope to check leaf owner too, but since subvol may share leaves,
2097  * which makes leaf owner check not so strong, key check should be
2098  * sufficient enough for that case.
2099  */
2100 static int check_child_node(struct extent_buffer *parent, int slot,
2101                             struct extent_buffer *child)
2102 {
2103         struct btrfs_key parent_key;
2104         struct btrfs_key child_key;
2105         int ret = 0;
2106
2107         btrfs_node_key_to_cpu(parent, &parent_key, slot);
2108         if (btrfs_header_level(child) == 0)
2109                 btrfs_item_key_to_cpu(child, &child_key, 0);
2110         else
2111                 btrfs_node_key_to_cpu(child, &child_key, 0);
2112
2113         if (memcmp(&parent_key, &child_key, sizeof(parent_key))) {
2114                 ret = -EINVAL;
2115                 fprintf(stderr,
2116                         "Wrong key of child node/leaf, wanted: (%llu, %u, %llu), have: (%llu, %u, %llu)\n",
2117                         parent_key.objectid, parent_key.type, parent_key.offset,
2118                         child_key.objectid, child_key.type, child_key.offset);
2119         }
2120         if (btrfs_header_bytenr(child) != btrfs_node_blockptr(parent, slot)) {
2121                 ret = -EINVAL;
2122                 fprintf(stderr, "Wrong block of child node/leaf, wanted: %llu, have: %llu\n",
2123                         btrfs_node_blockptr(parent, slot),
2124                         btrfs_header_bytenr(child));
2125         }
2126         if (btrfs_node_ptr_generation(parent, slot) !=
2127             btrfs_header_generation(child)) {
2128                 ret = -EINVAL;
2129                 fprintf(stderr, "Wrong generation of child node/leaf, wanted: %llu, have: %llu\n",
2130                         btrfs_header_generation(child),
2131                         btrfs_node_ptr_generation(parent, slot));
2132         }
2133         return ret;
2134 }
2135
2136 /*
2137  * for a tree node or leaf, if it's shared, indeed we don't need to iterate it
2138  * in every fs or file tree check. Here we find its all root ids, and only check
2139  * it in the fs or file tree which has the smallest root id.
2140  */
2141 static int need_check(struct btrfs_root *root, struct ulist *roots)
2142 {
2143         struct rb_node *node;
2144         struct ulist_node *u;
2145
2146         if (roots->nnodes == 1)
2147                 return 1;
2148
2149         node = rb_first(&roots->root);
2150         u = rb_entry(node, struct ulist_node, rb_node);
2151         /*
2152          * current root id is not smallest, we skip it and let it be checked
2153          * in the fs or file tree who hash the smallest root id.
2154          */
2155         if (root->objectid != u->val)
2156                 return 0;
2157
2158         return 1;
2159 }
2160
2161 /*
2162  * for a tree node or leaf, we record its reference count, so later if we still
2163  * process this node or leaf, don't need to compute its reference count again.
2164  */
2165 static int update_nodes_refs(struct btrfs_root *root, u64 bytenr,
2166                              struct node_refs *nrefs, u64 level)
2167 {
2168         int check, ret;
2169         u64 refs;
2170         struct ulist *roots;
2171
2172         if (nrefs->bytenr[level] != bytenr) {
2173                 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
2174                                        level, 1, &refs, NULL);
2175                 if (ret < 0)
2176                         return ret;
2177
2178                 nrefs->bytenr[level] = bytenr;
2179                 nrefs->refs[level] = refs;
2180                 if (refs > 1) {
2181                         ret = btrfs_find_all_roots(NULL, root->fs_info, bytenr,
2182                                                    0, &roots);
2183                         if (ret)
2184                                 return -EIO;
2185
2186                         check = need_check(root, roots);
2187                         ulist_free(roots);
2188                         nrefs->need_check[level] = check;
2189                 } else {
2190                         nrefs->need_check[level] = 1;
2191                 }
2192         }
2193
2194         return 0;
2195 }
2196
2197 static int walk_down_tree(struct btrfs_root *root, struct btrfs_path *path,
2198                           struct walk_control *wc, int *level,
2199                           struct node_refs *nrefs)
2200 {
2201         enum btrfs_tree_block_status status;
2202         u64 bytenr;
2203         u64 ptr_gen;
2204         struct btrfs_fs_info *fs_info = root->fs_info;
2205         struct extent_buffer *next;
2206         struct extent_buffer *cur;
2207         int ret, err = 0;
2208         u64 refs;
2209
2210         WARN_ON(*level < 0);
2211         WARN_ON(*level >= BTRFS_MAX_LEVEL);
2212
2213         if (path->nodes[*level]->start == nrefs->bytenr[*level]) {
2214                 refs = nrefs->refs[*level];
2215                 ret = 0;
2216         } else {
2217                 ret = btrfs_lookup_extent_info(NULL, root,
2218                                        path->nodes[*level]->start,
2219                                        *level, 1, &refs, NULL);
2220                 if (ret < 0) {
2221                         err = ret;
2222                         goto out;
2223                 }
2224                 nrefs->bytenr[*level] = path->nodes[*level]->start;
2225                 nrefs->refs[*level] = refs;
2226         }
2227
2228         if (refs > 1) {
2229                 ret = enter_shared_node(root, path->nodes[*level]->start,
2230                                         refs, wc, *level);
2231                 if (ret > 0) {
2232                         err = ret;
2233                         goto out;
2234                 }
2235         }
2236
2237         while (*level >= 0) {
2238                 WARN_ON(*level < 0);
2239                 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2240                 cur = path->nodes[*level];
2241
2242                 if (btrfs_header_level(cur) != *level)
2243                         WARN_ON(1);
2244
2245                 if (path->slots[*level] >= btrfs_header_nritems(cur))
2246                         break;
2247                 if (*level == 0) {
2248                         ret = process_one_leaf(root, cur, wc);
2249                         if (ret < 0)
2250                                 err = ret;
2251                         break;
2252                 }
2253                 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
2254                 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
2255
2256                 if (bytenr == nrefs->bytenr[*level - 1]) {
2257                         refs = nrefs->refs[*level - 1];
2258                 } else {
2259                         ret = btrfs_lookup_extent_info(NULL, root, bytenr,
2260                                         *level - 1, 1, &refs, NULL);
2261                         if (ret < 0) {
2262                                 refs = 0;
2263                         } else {
2264                                 nrefs->bytenr[*level - 1] = bytenr;
2265                                 nrefs->refs[*level - 1] = refs;
2266                         }
2267                 }
2268
2269                 if (refs > 1) {
2270                         ret = enter_shared_node(root, bytenr, refs,
2271                                                 wc, *level - 1);
2272                         if (ret > 0) {
2273                                 path->slots[*level]++;
2274                                 continue;
2275                         }
2276                 }
2277
2278                 next = btrfs_find_tree_block(fs_info, bytenr, fs_info->nodesize);
2279                 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2280                         free_extent_buffer(next);
2281                         reada_walk_down(root, cur, path->slots[*level]);
2282                         next = read_tree_block(root->fs_info, bytenr, ptr_gen);
2283                         if (!extent_buffer_uptodate(next)) {
2284                                 struct btrfs_key node_key;
2285
2286                                 btrfs_node_key_to_cpu(path->nodes[*level],
2287                                                       &node_key,
2288                                                       path->slots[*level]);
2289                                 btrfs_add_corrupt_extent_record(root->fs_info,
2290                                                 &node_key,
2291                                                 path->nodes[*level]->start,
2292                                                 root->fs_info->nodesize,
2293                                                 *level);
2294                                 err = -EIO;
2295                                 goto out;
2296                         }
2297                 }
2298
2299                 ret = check_child_node(cur, path->slots[*level], next);
2300                 if (ret) {
2301                         free_extent_buffer(next);
2302                         err = ret;
2303                         goto out;
2304                 }
2305
2306                 if (btrfs_is_leaf(next))
2307                         status = btrfs_check_leaf(root, NULL, next);
2308                 else
2309                         status = btrfs_check_node(root, NULL, next);
2310                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2311                         free_extent_buffer(next);
2312                         err = -EIO;
2313                         goto out;
2314                 }
2315
2316                 *level = *level - 1;
2317                 free_extent_buffer(path->nodes[*level]);
2318                 path->nodes[*level] = next;
2319                 path->slots[*level] = 0;
2320         }
2321 out:
2322         path->slots[*level] = btrfs_header_nritems(path->nodes[*level]);
2323         return err;
2324 }
2325
2326 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
2327                             unsigned int ext_ref);
2328
2329 /*
2330  * Returns >0  Found error, should continue
2331  * Returns <0  Fatal error, must exit the whole check
2332  * Returns 0   No errors found
2333  */
2334 static int walk_down_tree_v2(struct btrfs_root *root, struct btrfs_path *path,
2335                              int *level, struct node_refs *nrefs, int ext_ref)
2336 {
2337         enum btrfs_tree_block_status status;
2338         u64 bytenr;
2339         u64 ptr_gen;
2340         struct btrfs_fs_info *fs_info = root->fs_info;
2341         struct extent_buffer *next;
2342         struct extent_buffer *cur;
2343         int ret;
2344
2345         WARN_ON(*level < 0);
2346         WARN_ON(*level >= BTRFS_MAX_LEVEL);
2347
2348         ret = update_nodes_refs(root, path->nodes[*level]->start,
2349                                 nrefs, *level);
2350         if (ret < 0)
2351                 return ret;
2352
2353         while (*level >= 0) {
2354                 WARN_ON(*level < 0);
2355                 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2356                 cur = path->nodes[*level];
2357
2358                 if (btrfs_header_level(cur) != *level)
2359                         WARN_ON(1);
2360
2361                 if (path->slots[*level] >= btrfs_header_nritems(cur))
2362                         break;
2363                 /* Don't forgot to check leaf/node validation */
2364                 if (*level == 0) {
2365                         ret = btrfs_check_leaf(root, NULL, cur);
2366                         if (ret != BTRFS_TREE_BLOCK_CLEAN) {
2367                                 ret = -EIO;
2368                                 break;
2369                         }
2370                         ret = process_one_leaf_v2(root, path, nrefs,
2371                                                   level, ext_ref);
2372                         cur = path->nodes[*level];
2373                         break;
2374                 } else {
2375                         ret = btrfs_check_node(root, NULL, cur);
2376                         if (ret != BTRFS_TREE_BLOCK_CLEAN) {
2377                                 ret = -EIO;
2378                                 break;
2379                         }
2380                 }
2381                 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
2382                 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
2383
2384                 ret = update_nodes_refs(root, bytenr, nrefs, *level - 1);
2385                 if (ret)
2386                         break;
2387                 if (!nrefs->need_check[*level - 1]) {
2388                         path->slots[*level]++;
2389                         continue;
2390                 }
2391
2392                 next = btrfs_find_tree_block(fs_info, bytenr, fs_info->nodesize);
2393                 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2394                         free_extent_buffer(next);
2395                         reada_walk_down(root, cur, path->slots[*level]);
2396                         next = read_tree_block(fs_info, bytenr, ptr_gen);
2397                         if (!extent_buffer_uptodate(next)) {
2398                                 struct btrfs_key node_key;
2399
2400                                 btrfs_node_key_to_cpu(path->nodes[*level],
2401                                                       &node_key,
2402                                                       path->slots[*level]);
2403                                 btrfs_add_corrupt_extent_record(fs_info,
2404                                                 &node_key,
2405                                                 path->nodes[*level]->start,
2406                                                 fs_info->nodesize,
2407                                                 *level);
2408                                 ret = -EIO;
2409                                 break;
2410                         }
2411                 }
2412
2413                 ret = check_child_node(cur, path->slots[*level], next);
2414                 if (ret < 0) 
2415                         break;
2416
2417                 if (btrfs_is_leaf(next))
2418                         status = btrfs_check_leaf(root, NULL, next);
2419                 else
2420                         status = btrfs_check_node(root, NULL, next);
2421                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2422                         free_extent_buffer(next);
2423                         ret = -EIO;
2424                         break;
2425                 }
2426
2427                 *level = *level - 1;
2428                 free_extent_buffer(path->nodes[*level]);
2429                 path->nodes[*level] = next;
2430                 path->slots[*level] = 0;
2431         }
2432         return ret;
2433 }
2434
2435 static int walk_up_tree(struct btrfs_root *root, struct btrfs_path *path,
2436                         struct walk_control *wc, int *level)
2437 {
2438         int i;
2439         struct extent_buffer *leaf;
2440
2441         for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2442                 leaf = path->nodes[i];
2443                 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2444                         path->slots[i]++;
2445                         *level = i;
2446                         return 0;
2447                 } else {
2448                         free_extent_buffer(path->nodes[*level]);
2449                         path->nodes[*level] = NULL;
2450                         BUG_ON(*level > wc->active_node);
2451                         if (*level == wc->active_node)
2452                                 leave_shared_node(root, wc, *level);
2453                         *level = i + 1;
2454                 }
2455         }
2456         return 1;
2457 }
2458
2459 static int walk_up_tree_v2(struct btrfs_root *root, struct btrfs_path *path,
2460                            int *level)
2461 {
2462         int i;
2463         struct extent_buffer *leaf;
2464
2465         for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2466                 leaf = path->nodes[i];
2467                 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2468                         path->slots[i]++;
2469                         *level = i;
2470                         return 0;
2471                 } else {
2472                         free_extent_buffer(path->nodes[*level]);
2473                         path->nodes[*level] = NULL;
2474                         *level = i + 1;
2475                 }
2476         }
2477         return 1;
2478 }
2479
2480 static int check_root_dir(struct inode_record *rec)
2481 {
2482         struct inode_backref *backref;
2483         int ret = -1;
2484
2485         if (!rec->found_inode_item || rec->errors)
2486                 goto out;
2487         if (rec->nlink != 1 || rec->found_link != 0)
2488                 goto out;
2489         if (list_empty(&rec->backrefs))
2490                 goto out;
2491         backref = to_inode_backref(rec->backrefs.next);
2492         if (!backref->found_inode_ref)
2493                 goto out;
2494         if (backref->index != 0 || backref->namelen != 2 ||
2495             memcmp(backref->name, "..", 2))
2496                 goto out;
2497         if (backref->found_dir_index || backref->found_dir_item)
2498                 goto out;
2499         ret = 0;
2500 out:
2501         return ret;
2502 }
2503
2504 static int repair_inode_isize(struct btrfs_trans_handle *trans,
2505                               struct btrfs_root *root, struct btrfs_path *path,
2506                               struct inode_record *rec)
2507 {
2508         struct btrfs_inode_item *ei;
2509         struct btrfs_key key;
2510         int ret;
2511
2512         key.objectid = rec->ino;
2513         key.type = BTRFS_INODE_ITEM_KEY;
2514         key.offset = (u64)-1;
2515
2516         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2517         if (ret < 0)
2518                 goto out;
2519         if (ret) {
2520                 if (!path->slots[0]) {
2521                         ret = -ENOENT;
2522                         goto out;
2523                 }
2524                 path->slots[0]--;
2525                 ret = 0;
2526         }
2527         btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
2528         if (key.objectid != rec->ino) {
2529                 ret = -ENOENT;
2530                 goto out;
2531         }
2532
2533         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2534                             struct btrfs_inode_item);
2535         btrfs_set_inode_size(path->nodes[0], ei, rec->found_size);
2536         btrfs_mark_buffer_dirty(path->nodes[0]);
2537         rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2538         printf("reset isize for dir %Lu root %Lu\n", rec->ino,
2539                root->root_key.objectid);
2540 out:
2541         btrfs_release_path(path);
2542         return ret;
2543 }
2544
2545 static int repair_inode_orphan_item(struct btrfs_trans_handle *trans,
2546                                     struct btrfs_root *root,
2547                                     struct btrfs_path *path,
2548                                     struct inode_record *rec)
2549 {
2550         int ret;
2551
2552         ret = btrfs_add_orphan_item(trans, root, path, rec->ino);
2553         btrfs_release_path(path);
2554         if (!ret)
2555                 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
2556         return ret;
2557 }
2558
2559 static int repair_inode_nbytes(struct btrfs_trans_handle *trans,
2560                                struct btrfs_root *root,
2561                                struct btrfs_path *path,
2562                                struct inode_record *rec)
2563 {
2564         struct btrfs_inode_item *ei;
2565         struct btrfs_key key;
2566         int ret = 0;
2567
2568         key.objectid = rec->ino;
2569         key.type = BTRFS_INODE_ITEM_KEY;
2570         key.offset = 0;
2571
2572         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2573         if (ret) {
2574                 if (ret > 0)
2575                         ret = -ENOENT;
2576                 goto out;
2577         }
2578
2579         /* Since ret == 0, no need to check anything */
2580         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2581                             struct btrfs_inode_item);
2582         btrfs_set_inode_nbytes(path->nodes[0], ei, rec->found_size);
2583         btrfs_mark_buffer_dirty(path->nodes[0]);
2584         rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2585         printf("reset nbytes for ino %llu root %llu\n",
2586                rec->ino, root->root_key.objectid);
2587 out:
2588         btrfs_release_path(path);
2589         return ret;
2590 }
2591
2592 static int add_missing_dir_index(struct btrfs_root *root,
2593                                  struct cache_tree *inode_cache,
2594                                  struct inode_record *rec,
2595                                  struct inode_backref *backref)
2596 {
2597         struct btrfs_path path;
2598         struct btrfs_trans_handle *trans;
2599         struct btrfs_dir_item *dir_item;
2600         struct extent_buffer *leaf;
2601         struct btrfs_key key;
2602         struct btrfs_disk_key disk_key;
2603         struct inode_record *dir_rec;
2604         unsigned long name_ptr;
2605         u32 data_size = sizeof(*dir_item) + backref->namelen;
2606         int ret;
2607
2608         trans = btrfs_start_transaction(root, 1);
2609         if (IS_ERR(trans))
2610                 return PTR_ERR(trans);
2611
2612         fprintf(stderr, "repairing missing dir index item for inode %llu\n",
2613                 (unsigned long long)rec->ino);
2614
2615         btrfs_init_path(&path);
2616         key.objectid = backref->dir;
2617         key.type = BTRFS_DIR_INDEX_KEY;
2618         key.offset = backref->index;
2619         ret = btrfs_insert_empty_item(trans, root, &path, &key, data_size);
2620         BUG_ON(ret);
2621
2622         leaf = path.nodes[0];
2623         dir_item = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_dir_item);
2624
2625         disk_key.objectid = cpu_to_le64(rec->ino);
2626         disk_key.type = BTRFS_INODE_ITEM_KEY;
2627         disk_key.offset = 0;
2628
2629         btrfs_set_dir_item_key(leaf, dir_item, &disk_key);
2630         btrfs_set_dir_type(leaf, dir_item, imode_to_type(rec->imode));
2631         btrfs_set_dir_data_len(leaf, dir_item, 0);
2632         btrfs_set_dir_name_len(leaf, dir_item, backref->namelen);
2633         name_ptr = (unsigned long)(dir_item + 1);
2634         write_extent_buffer(leaf, backref->name, name_ptr, backref->namelen);
2635         btrfs_mark_buffer_dirty(leaf);
2636         btrfs_release_path(&path);
2637         btrfs_commit_transaction(trans, root);
2638
2639         backref->found_dir_index = 1;
2640         dir_rec = get_inode_rec(inode_cache, backref->dir, 0);
2641         BUG_ON(IS_ERR(dir_rec));
2642         if (!dir_rec)
2643                 return 0;
2644         dir_rec->found_size += backref->namelen;
2645         if (dir_rec->found_size == dir_rec->isize &&
2646             (dir_rec->errors & I_ERR_DIR_ISIZE_WRONG))
2647                 dir_rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2648         if (dir_rec->found_size != dir_rec->isize)
2649                 dir_rec->errors |= I_ERR_DIR_ISIZE_WRONG;
2650
2651         return 0;
2652 }
2653
2654 static int delete_dir_index(struct btrfs_root *root,
2655                             struct inode_backref *backref)
2656 {
2657         struct btrfs_trans_handle *trans;
2658         struct btrfs_dir_item *di;
2659         struct btrfs_path path;
2660         int ret = 0;
2661
2662         trans = btrfs_start_transaction(root, 1);
2663         if (IS_ERR(trans))
2664                 return PTR_ERR(trans);
2665
2666         fprintf(stderr, "Deleting bad dir index [%llu,%u,%llu] root %llu\n",
2667                 (unsigned long long)backref->dir,
2668                 BTRFS_DIR_INDEX_KEY, (unsigned long long)backref->index,
2669                 (unsigned long long)root->objectid);
2670
2671         btrfs_init_path(&path);
2672         di = btrfs_lookup_dir_index(trans, root, &path, backref->dir,
2673                                     backref->name, backref->namelen,
2674                                     backref->index, -1);
2675         if (IS_ERR(di)) {
2676                 ret = PTR_ERR(di);
2677                 btrfs_release_path(&path);
2678                 btrfs_commit_transaction(trans, root);
2679                 if (ret == -ENOENT)
2680                         return 0;
2681                 return ret;
2682         }
2683
2684         if (!di)
2685                 ret = btrfs_del_item(trans, root, &path);
2686         else
2687                 ret = btrfs_delete_one_dir_name(trans, root, &path, di);
2688         BUG_ON(ret);
2689         btrfs_release_path(&path);
2690         btrfs_commit_transaction(trans, root);
2691         return ret;
2692 }
2693
2694 static int create_inode_item(struct btrfs_root *root,
2695                              struct inode_record *rec,
2696                              int root_dir)
2697 {
2698         struct btrfs_trans_handle *trans;
2699         struct btrfs_inode_item inode_item;
2700         time_t now = time(NULL);
2701         int ret;
2702
2703         trans = btrfs_start_transaction(root, 1);
2704         if (IS_ERR(trans)) {
2705                 ret = PTR_ERR(trans);
2706                 return ret;
2707         }
2708
2709         fprintf(stderr, "root %llu inode %llu recreating inode item, this may "
2710                 "be incomplete, please check permissions and content after "
2711                 "the fsck completes.\n", (unsigned long long)root->objectid,
2712                 (unsigned long long)rec->ino);
2713
2714         memset(&inode_item, 0, sizeof(inode_item));
2715         btrfs_set_stack_inode_generation(&inode_item, trans->transid);
2716         if (root_dir)
2717                 btrfs_set_stack_inode_nlink(&inode_item, 1);
2718         else
2719                 btrfs_set_stack_inode_nlink(&inode_item, rec->found_link);
2720         btrfs_set_stack_inode_nbytes(&inode_item, rec->found_size);
2721         if (rec->found_dir_item) {
2722                 if (rec->found_file_extent)
2723                         fprintf(stderr, "root %llu inode %llu has both a dir "
2724                                 "item and extents, unsure if it is a dir or a "
2725                                 "regular file so setting it as a directory\n",
2726                                 (unsigned long long)root->objectid,
2727                                 (unsigned long long)rec->ino);
2728                 btrfs_set_stack_inode_mode(&inode_item, S_IFDIR | 0755);
2729                 btrfs_set_stack_inode_size(&inode_item, rec->found_size);
2730         } else if (!rec->found_dir_item) {
2731                 btrfs_set_stack_inode_size(&inode_item, rec->extent_end);
2732                 btrfs_set_stack_inode_mode(&inode_item, S_IFREG | 0755);
2733         }
2734         btrfs_set_stack_timespec_sec(&inode_item.atime, now);
2735         btrfs_set_stack_timespec_nsec(&inode_item.atime, 0);
2736         btrfs_set_stack_timespec_sec(&inode_item.ctime, now);
2737         btrfs_set_stack_timespec_nsec(&inode_item.ctime, 0);
2738         btrfs_set_stack_timespec_sec(&inode_item.mtime, now);
2739         btrfs_set_stack_timespec_nsec(&inode_item.mtime, 0);
2740         btrfs_set_stack_timespec_sec(&inode_item.otime, 0);
2741         btrfs_set_stack_timespec_nsec(&inode_item.otime, 0);
2742
2743         ret = btrfs_insert_inode(trans, root, rec->ino, &inode_item);
2744         BUG_ON(ret);
2745         btrfs_commit_transaction(trans, root);
2746         return 0;
2747 }
2748
2749 static int repair_inode_backrefs(struct btrfs_root *root,
2750                                  struct inode_record *rec,
2751                                  struct cache_tree *inode_cache,
2752                                  int delete)
2753 {
2754         struct inode_backref *tmp, *backref;
2755         u64 root_dirid = btrfs_root_dirid(&root->root_item);
2756         int ret = 0;
2757         int repaired = 0;
2758
2759         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2760                 if (!delete && rec->ino == root_dirid) {
2761                         if (!rec->found_inode_item) {
2762                                 ret = create_inode_item(root, rec, 1);
2763                                 if (ret)
2764                                         break;
2765                                 repaired++;
2766                         }
2767                 }
2768
2769                 /* Index 0 for root dir's are special, don't mess with it */
2770                 if (rec->ino == root_dirid && backref->index == 0)
2771                         continue;
2772
2773                 if (delete &&
2774                     ((backref->found_dir_index && !backref->found_inode_ref) ||
2775                      (backref->found_dir_index && backref->found_inode_ref &&
2776                       (backref->errors & REF_ERR_INDEX_UNMATCH)))) {
2777                         ret = delete_dir_index(root, backref);
2778                         if (ret)
2779                                 break;
2780                         repaired++;
2781                         list_del(&backref->list);
2782                         free(backref);
2783                         continue;
2784                 }
2785
2786                 if (!delete && !backref->found_dir_index &&
2787                     backref->found_dir_item && backref->found_inode_ref) {
2788                         ret = add_missing_dir_index(root, inode_cache, rec,
2789                                                     backref);
2790                         if (ret)
2791                                 break;
2792                         repaired++;
2793                         if (backref->found_dir_item &&
2794                             backref->found_dir_index) {
2795                                 if (!backref->errors &&
2796                                     backref->found_inode_ref) {
2797                                         list_del(&backref->list);
2798                                         free(backref);
2799                                         continue;
2800                                 }
2801                         }
2802                 }
2803
2804                 if (!delete && (!backref->found_dir_index &&
2805                                 !backref->found_dir_item &&
2806                                 backref->found_inode_ref)) {
2807                         struct btrfs_trans_handle *trans;
2808                         struct btrfs_key location;
2809
2810                         ret = check_dir_conflict(root, backref->name,
2811                                                  backref->namelen,
2812                                                  backref->dir,
2813                                                  backref->index);
2814                         if (ret) {
2815                                 /*
2816                                  * let nlink fixing routine to handle it,
2817                                  * which can do it better.
2818                                  */
2819                                 ret = 0;
2820                                 break;
2821                         }
2822                         location.objectid = rec->ino;
2823                         location.type = BTRFS_INODE_ITEM_KEY;
2824                         location.offset = 0;
2825
2826                         trans = btrfs_start_transaction(root, 1);
2827                         if (IS_ERR(trans)) {
2828                                 ret = PTR_ERR(trans);
2829                                 break;
2830                         }
2831                         fprintf(stderr, "adding missing dir index/item pair "
2832                                 "for inode %llu\n",
2833                                 (unsigned long long)rec->ino);
2834                         ret = btrfs_insert_dir_item(trans, root, backref->name,
2835                                                     backref->namelen,
2836                                                     backref->dir, &location,
2837                                                     imode_to_type(rec->imode),
2838                                                     backref->index);
2839                         BUG_ON(ret);
2840                         btrfs_commit_transaction(trans, root);
2841                         repaired++;
2842                 }
2843
2844                 if (!delete && (backref->found_inode_ref &&
2845                                 backref->found_dir_index &&
2846                                 backref->found_dir_item &&
2847                                 !(backref->errors & REF_ERR_INDEX_UNMATCH) &&
2848                                 !rec->found_inode_item)) {
2849                         ret = create_inode_item(root, rec, 0);
2850                         if (ret)
2851                                 break;
2852                         repaired++;
2853                 }
2854
2855         }
2856         return ret ? ret : repaired;
2857 }
2858
2859 /*
2860  * To determine the file type for nlink/inode_item repair
2861  *
2862  * Return 0 if file type is found and BTRFS_FT_* is stored into type.
2863  * Return -ENOENT if file type is not found.
2864  */
2865 static int find_file_type(struct inode_record *rec, u8 *type)
2866 {
2867         struct inode_backref *backref;
2868
2869         /* For inode item recovered case */
2870         if (rec->found_inode_item) {
2871                 *type = imode_to_type(rec->imode);
2872                 return 0;
2873         }
2874
2875         list_for_each_entry(backref, &rec->backrefs, list) {
2876                 if (backref->found_dir_index || backref->found_dir_item) {
2877                         *type = backref->filetype;
2878                         return 0;
2879                 }
2880         }
2881         return -ENOENT;
2882 }
2883
2884 /*
2885  * To determine the file name for nlink repair
2886  *
2887  * Return 0 if file name is found, set name and namelen.
2888  * Return -ENOENT if file name is not found.
2889  */
2890 static int find_file_name(struct inode_record *rec,
2891                           char *name, int *namelen)
2892 {
2893         struct inode_backref *backref;
2894
2895         list_for_each_entry(backref, &rec->backrefs, list) {
2896                 if (backref->found_dir_index || backref->found_dir_item ||
2897                     backref->found_inode_ref) {
2898                         memcpy(name, backref->name, backref->namelen);
2899                         *namelen = backref->namelen;
2900                         return 0;
2901                 }
2902         }
2903         return -ENOENT;
2904 }
2905
2906 /* Reset the nlink of the inode to the correct one */
2907 static int reset_nlink(struct btrfs_trans_handle *trans,
2908                        struct btrfs_root *root,
2909                        struct btrfs_path *path,
2910                        struct inode_record *rec)
2911 {
2912         struct inode_backref *backref;
2913         struct inode_backref *tmp;
2914         struct btrfs_key key;
2915         struct btrfs_inode_item *inode_item;
2916         int ret = 0;
2917
2918         /* We don't believe this either, reset it and iterate backref */
2919         rec->found_link = 0;
2920
2921         /* Remove all backref including the valid ones */
2922         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2923                 ret = btrfs_unlink(trans, root, rec->ino, backref->dir,
2924                                    backref->index, backref->name,
2925                                    backref->namelen, 0);
2926                 if (ret < 0)
2927                         goto out;
2928
2929                 /* remove invalid backref, so it won't be added back */
2930                 if (!(backref->found_dir_index &&
2931                       backref->found_dir_item &&
2932                       backref->found_inode_ref)) {
2933                         list_del(&backref->list);
2934                         free(backref);
2935                 } else {
2936                         rec->found_link++;
2937                 }
2938         }
2939
2940         /* Set nlink to 0 */
2941         key.objectid = rec->ino;
2942         key.type = BTRFS_INODE_ITEM_KEY;
2943         key.offset = 0;
2944         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2945         if (ret < 0)
2946                 goto out;
2947         if (ret > 0) {
2948                 ret = -ENOENT;
2949                 goto out;
2950         }
2951         inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
2952                                     struct btrfs_inode_item);
2953         btrfs_set_inode_nlink(path->nodes[0], inode_item, 0);
2954         btrfs_mark_buffer_dirty(path->nodes[0]);
2955         btrfs_release_path(path);
2956
2957         /*
2958          * Add back valid inode_ref/dir_item/dir_index,
2959          * add_link() will handle the nlink inc, so new nlink must be correct
2960          */
2961         list_for_each_entry(backref, &rec->backrefs, list) {
2962                 ret = btrfs_add_link(trans, root, rec->ino, backref->dir,
2963                                      backref->name, backref->namelen,
2964                                      backref->filetype, &backref->index, 1);
2965                 if (ret < 0)
2966                         goto out;
2967         }
2968 out:
2969         btrfs_release_path(path);
2970         return ret;
2971 }
2972
2973 static int get_highest_inode(struct btrfs_trans_handle *trans,
2974                                 struct btrfs_root *root,
2975                                 struct btrfs_path *path,
2976                                 u64 *highest_ino)
2977 {
2978         struct btrfs_key key, found_key;
2979         int ret;
2980
2981         btrfs_init_path(path);
2982         key.objectid = BTRFS_LAST_FREE_OBJECTID;
2983         key.offset = -1;
2984         key.type = BTRFS_INODE_ITEM_KEY;
2985         ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
2986         if (ret == 1) {
2987                 btrfs_item_key_to_cpu(path->nodes[0], &found_key,
2988                                 path->slots[0] - 1);
2989                 *highest_ino = found_key.objectid;
2990                 ret = 0;
2991         }
2992         if (*highest_ino >= BTRFS_LAST_FREE_OBJECTID)
2993                 ret = -EOVERFLOW;
2994         btrfs_release_path(path);
2995         return ret;
2996 }
2997
2998 static int repair_inode_nlinks(struct btrfs_trans_handle *trans,
2999                                struct btrfs_root *root,
3000                                struct btrfs_path *path,
3001                                struct inode_record *rec)
3002 {
3003         char *dir_name = "lost+found";
3004         char namebuf[BTRFS_NAME_LEN] = {0};
3005         u64 lost_found_ino;
3006         u32 mode = 0700;
3007         u8 type = 0;
3008         int namelen = 0;
3009         int name_recovered = 0;
3010         int type_recovered = 0;
3011         int ret = 0;
3012
3013         /*
3014          * Get file name and type first before these invalid inode ref
3015          * are deleted by remove_all_invalid_backref()
3016          */
3017         name_recovered = !find_file_name(rec, namebuf, &namelen);
3018         type_recovered = !find_file_type(rec, &type);
3019
3020         if (!name_recovered) {
3021                 printf("Can't get file name for inode %llu, using '%llu' as fallback\n",
3022                        rec->ino, rec->ino);
3023                 namelen = count_digits(rec->ino);
3024                 sprintf(namebuf, "%llu", rec->ino);
3025                 name_recovered = 1;
3026         }
3027         if (!type_recovered) {
3028                 printf("Can't get file type for inode %llu, using FILE as fallback\n",
3029                        rec->ino);
3030                 type = BTRFS_FT_REG_FILE;
3031                 type_recovered = 1;
3032         }
3033
3034         ret = reset_nlink(trans, root, path, rec);
3035         if (ret < 0) {
3036                 fprintf(stderr,
3037                         "Failed to reset nlink for inode %llu: %s\n",
3038                         rec->ino, strerror(-ret));
3039                 goto out;
3040         }
3041
3042         if (rec->found_link == 0) {
3043                 ret = get_highest_inode(trans, root, path, &lost_found_ino);
3044                 if (ret < 0)
3045                         goto out;
3046                 lost_found_ino++;
3047                 ret = btrfs_mkdir(trans, root, dir_name, strlen(dir_name),
3048                                   BTRFS_FIRST_FREE_OBJECTID, &lost_found_ino,
3049                                   mode);
3050                 if (ret < 0) {
3051                         fprintf(stderr, "Failed to create '%s' dir: %s\n",
3052                                 dir_name, strerror(-ret));
3053                         goto out;
3054                 }
3055                 ret = btrfs_add_link(trans, root, rec->ino, lost_found_ino,
3056                                      namebuf, namelen, type, NULL, 1);
3057                 /*
3058                  * Add ".INO" suffix several times to handle case where
3059                  * "FILENAME.INO" is already taken by another file.
3060                  */
3061                 while (ret == -EEXIST) {
3062                         /*
3063                          * Conflicting file name, add ".INO" as suffix * +1 for '.'
3064                          */
3065                         if (namelen + count_digits(rec->ino) + 1 >
3066                             BTRFS_NAME_LEN) {
3067                                 ret = -EFBIG;
3068                                 goto out;
3069                         }
3070                         snprintf(namebuf + namelen, BTRFS_NAME_LEN - namelen,
3071                                  ".%llu", rec->ino);
3072                         namelen += count_digits(rec->ino) + 1;
3073                         ret = btrfs_add_link(trans, root, rec->ino,
3074                                              lost_found_ino, namebuf,
3075                                              namelen, type, NULL, 1);
3076                 }
3077                 if (ret < 0) {
3078                         fprintf(stderr,
3079                                 "Failed to link the inode %llu to %s dir: %s\n",
3080                                 rec->ino, dir_name, strerror(-ret));
3081                         goto out;
3082                 }
3083                 /*
3084                  * Just increase the found_link, don't actually add the
3085                  * backref. This will make things easier and this inode
3086                  * record will be freed after the repair is done.
3087                  * So fsck will not report problem about this inode.
3088                  */
3089                 rec->found_link++;
3090                 printf("Moving file '%.*s' to '%s' dir since it has no valid backref\n",
3091                        namelen, namebuf, dir_name);
3092         }
3093         printf("Fixed the nlink of inode %llu\n", rec->ino);
3094 out:
3095         /*
3096          * Clear the flag anyway, or we will loop forever for the same inode
3097          * as it will not be removed from the bad inode list and the dead loop
3098          * happens.
3099          */
3100         rec->errors &= ~I_ERR_LINK_COUNT_WRONG;
3101         btrfs_release_path(path);
3102         return ret;
3103 }
3104
3105 /*
3106  * Check if there is any normal(reg or prealloc) file extent for given
3107  * ino.
3108  * This is used to determine the file type when neither its dir_index/item or
3109  * inode_item exists.
3110  *
3111  * This will *NOT* report error, if any error happens, just consider it does
3112  * not have any normal file extent.
3113  */
3114 static int find_normal_file_extent(struct btrfs_root *root, u64 ino)
3115 {
3116         struct btrfs_path path;
3117         struct btrfs_key key;
3118         struct btrfs_key found_key;
3119         struct btrfs_file_extent_item *fi;
3120         u8 type;
3121         int ret = 0;
3122
3123         btrfs_init_path(&path);
3124         key.objectid = ino;
3125         key.type = BTRFS_EXTENT_DATA_KEY;
3126         key.offset = 0;
3127
3128         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
3129         if (ret < 0) {
3130                 ret = 0;
3131                 goto out;
3132         }
3133         if (ret && path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
3134                 ret = btrfs_next_leaf(root, &path);
3135                 if (ret) {
3136                         ret = 0;
3137                         goto out;
3138                 }
3139         }
3140         while (1) {
3141                 btrfs_item_key_to_cpu(path.nodes[0], &found_key,
3142                                       path.slots[0]);
3143                 if (found_key.objectid != ino ||
3144                     found_key.type != BTRFS_EXTENT_DATA_KEY)
3145                         break;
3146                 fi = btrfs_item_ptr(path.nodes[0], path.slots[0],
3147                                     struct btrfs_file_extent_item);
3148                 type = btrfs_file_extent_type(path.nodes[0], fi);
3149                 if (type != BTRFS_FILE_EXTENT_INLINE) {
3150                         ret = 1;
3151                         goto out;
3152                 }
3153         }
3154 out:
3155         btrfs_release_path(&path);
3156         return ret;
3157 }
3158
3159 static u32 btrfs_type_to_imode(u8 type)
3160 {
3161         static u32 imode_by_btrfs_type[] = {
3162                 [BTRFS_FT_REG_FILE]     = S_IFREG,
3163                 [BTRFS_FT_DIR]          = S_IFDIR,
3164                 [BTRFS_FT_CHRDEV]       = S_IFCHR,
3165                 [BTRFS_FT_BLKDEV]       = S_IFBLK,
3166                 [BTRFS_FT_FIFO]         = S_IFIFO,
3167                 [BTRFS_FT_SOCK]         = S_IFSOCK,
3168                 [BTRFS_FT_SYMLINK]      = S_IFLNK,
3169         };
3170
3171         return imode_by_btrfs_type[(type)];
3172 }
3173
3174 static int repair_inode_no_item(struct btrfs_trans_handle *trans,
3175                                 struct btrfs_root *root,
3176                                 struct btrfs_path *path,
3177                                 struct inode_record *rec)
3178 {
3179         u8 filetype;
3180         u32 mode = 0700;
3181         int type_recovered = 0;
3182         int ret = 0;
3183
3184         printf("Trying to rebuild inode:%llu\n", rec->ino);
3185
3186         type_recovered = !find_file_type(rec, &filetype);
3187
3188         /*
3189          * Try to determine inode type if type not found.
3190          *
3191          * For found regular file extent, it must be FILE.
3192          * For found dir_item/index, it must be DIR.
3193          *
3194          * For undetermined one, use FILE as fallback.
3195          *
3196          * TODO:
3197          * 1. If found backref(inode_index/item is already handled) to it,
3198          *    it must be DIR.
3199          *    Need new inode-inode ref structure to allow search for that.
3200          */
3201         if (!type_recovered) {
3202                 if (rec->found_file_extent &&
3203                     find_normal_file_extent(root, rec->ino)) {
3204                         type_recovered = 1;
3205                         filetype = BTRFS_FT_REG_FILE;
3206                 } else if (rec->found_dir_item) {
3207                         type_recovered = 1;
3208                         filetype = BTRFS_FT_DIR;
3209                 } else if (!list_empty(&rec->orphan_extents)) {
3210                         type_recovered = 1;
3211                         filetype = BTRFS_FT_REG_FILE;
3212                 } else{
3213                         printf("Can't determine the filetype for inode %llu, assume it is a normal file\n",
3214                                rec->ino);
3215                         type_recovered = 1;
3216                         filetype = BTRFS_FT_REG_FILE;
3217                 }
3218         }
3219
3220         ret = btrfs_new_inode(trans, root, rec->ino,
3221                               mode | btrfs_type_to_imode(filetype));
3222         if (ret < 0)
3223                 goto out;
3224
3225         /*
3226          * Here inode rebuild is done, we only rebuild the inode item,
3227          * don't repair the nlink(like move to lost+found).
3228          * That is the job of nlink repair.
3229          *
3230          * We just fill the record and return
3231          */
3232         rec->found_dir_item = 1;
3233         rec->imode = mode | btrfs_type_to_imode(filetype);
3234         rec->nlink = 0;
3235         rec->errors &= ~I_ERR_NO_INODE_ITEM;
3236         /* Ensure the inode_nlinks repair function will be called */
3237         rec->errors |= I_ERR_LINK_COUNT_WRONG;
3238 out:
3239         return ret;
3240 }
3241
3242 static int repair_inode_orphan_extent(struct btrfs_trans_handle *trans,
3243                                       struct btrfs_root *root,
3244                                       struct btrfs_path *path,
3245                                       struct inode_record *rec)
3246 {
3247         struct orphan_data_extent *orphan;
3248         struct orphan_data_extent *tmp;
3249         int ret = 0;
3250
3251         list_for_each_entry_safe(orphan, tmp, &rec->orphan_extents, list) {
3252                 /*
3253                  * Check for conflicting file extents
3254                  *
3255                  * Here we don't know whether the extents is compressed or not,
3256                  * so we can only assume it not compressed nor data offset,
3257                  * and use its disk_len as extent length.
3258                  */
3259                 ret = btrfs_get_extent(NULL, root, path, orphan->objectid,
3260                                        orphan->offset, orphan->disk_len, 0);
3261                 btrfs_release_path(path);
3262                 if (ret < 0)
3263                         goto out;
3264                 if (!ret) {
3265                         fprintf(stderr,
3266                                 "orphan extent (%llu, %llu) conflicts, delete the orphan\n",
3267                                 orphan->disk_bytenr, orphan->disk_len);
3268                         ret = btrfs_free_extent(trans,
3269                                         root->fs_info->extent_root,
3270                                         orphan->disk_bytenr, orphan->disk_len,
3271                                         0, root->objectid, orphan->objectid,
3272                                         orphan->offset);
3273                         if (ret < 0)
3274                                 goto out;
3275                 }
3276                 ret = btrfs_insert_file_extent(trans, root, orphan->objectid,
3277                                 orphan->offset, orphan->disk_bytenr,
3278                                 orphan->disk_len, orphan->disk_len);
3279                 if (ret < 0)
3280                         goto out;
3281
3282                 /* Update file size info */
3283                 rec->found_size += orphan->disk_len;
3284                 if (rec->found_size == rec->nbytes)
3285                         rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
3286
3287                 /* Update the file extent hole info too */
3288                 ret = del_file_extent_hole(&rec->holes, orphan->offset,
3289                                            orphan->disk_len);
3290                 if (ret < 0)
3291                         goto out;
3292                 if (RB_EMPTY_ROOT(&rec->holes))
3293                         rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
3294
3295                 list_del(&orphan->list);
3296                 free(orphan);
3297         }
3298         rec->errors &= ~I_ERR_FILE_EXTENT_ORPHAN;
3299 out:
3300         return ret;
3301 }
3302
3303 static int repair_inode_discount_extent(struct btrfs_trans_handle *trans,
3304                                         struct btrfs_root *root,
3305                                         struct btrfs_path *path,
3306                                         struct inode_record *rec)
3307 {
3308         struct rb_node *node;
3309         struct file_extent_hole *hole;
3310         int found = 0;
3311         int ret = 0;
3312
3313         node = rb_first(&rec->holes);
3314
3315         while (node) {
3316                 found = 1;
3317                 hole = rb_entry(node, struct file_extent_hole, node);
3318                 ret = btrfs_punch_hole(trans, root, rec->ino,
3319                                        hole->start, hole->len);
3320                 if (ret < 0)
3321                         goto out;
3322                 ret = del_file_extent_hole(&rec->holes, hole->start,
3323                                            hole->len);
3324                 if (ret < 0)
3325                         goto out;
3326                 if (RB_EMPTY_ROOT(&rec->holes))
3327                         rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
3328                 node = rb_first(&rec->holes);
3329         }
3330         /* special case for a file losing all its file extent */
3331         if (!found) {
3332                 ret = btrfs_punch_hole(trans, root, rec->ino, 0,
3333                                        round_up(rec->isize,
3334                                                 root->fs_info->sectorsize));
3335                 if (ret < 0)
3336                         goto out;
3337         }
3338         printf("Fixed discount file extents for inode: %llu in root: %llu\n",
3339                rec->ino, root->objectid);
3340 out:
3341         return ret;
3342 }
3343
3344 static int try_repair_inode(struct btrfs_root *root, struct inode_record *rec)
3345 {
3346         struct btrfs_trans_handle *trans;
3347         struct btrfs_path path;
3348         int ret = 0;
3349
3350         if (!(rec->errors & (I_ERR_DIR_ISIZE_WRONG |
3351                              I_ERR_NO_ORPHAN_ITEM |
3352                              I_ERR_LINK_COUNT_WRONG |
3353                              I_ERR_NO_INODE_ITEM |
3354                              I_ERR_FILE_EXTENT_ORPHAN |
3355                              I_ERR_FILE_EXTENT_DISCOUNT|
3356                              I_ERR_FILE_NBYTES_WRONG)))
3357                 return rec->errors;
3358
3359         /*
3360          * For nlink repair, it may create a dir and add link, so
3361          * 2 for parent(256)'s dir_index and dir_item
3362          * 2 for lost+found dir's inode_item and inode_ref
3363          * 1 for the new inode_ref of the file
3364          * 2 for lost+found dir's dir_index and dir_item for the file
3365          */
3366         trans = btrfs_start_transaction(root, 7);
3367         if (IS_ERR(trans))
3368                 return PTR_ERR(trans);
3369
3370         btrfs_init_path(&path);
3371         if (rec->errors & I_ERR_NO_INODE_ITEM)
3372                 ret = repair_inode_no_item(trans, root, &path, rec);
3373         if (!ret && rec->errors & I_ERR_FILE_EXTENT_ORPHAN)
3374                 ret = repair_inode_orphan_extent(trans, root, &path, rec);
3375         if (!ret && rec->errors & I_ERR_FILE_EXTENT_DISCOUNT)
3376                 ret = repair_inode_discount_extent(trans, root, &path, rec);
3377         if (!ret && rec->errors & I_ERR_DIR_ISIZE_WRONG)
3378                 ret = repair_inode_isize(trans, root, &path, rec);
3379         if (!ret && rec->errors & I_ERR_NO_ORPHAN_ITEM)
3380                 ret = repair_inode_orphan_item(trans, root, &path, rec);
3381         if (!ret && rec->errors & I_ERR_LINK_COUNT_WRONG)
3382                 ret = repair_inode_nlinks(trans, root, &path, rec);
3383         if (!ret && rec->errors & I_ERR_FILE_NBYTES_WRONG)
3384                 ret = repair_inode_nbytes(trans, root, &path, rec);
3385         btrfs_commit_transaction(trans, root);
3386         btrfs_release_path(&path);
3387         return ret;
3388 }
3389
3390 static int check_inode_recs(struct btrfs_root *root,
3391                             struct cache_tree *inode_cache)
3392 {
3393         struct cache_extent *cache;
3394         struct ptr_node *node;
3395         struct inode_record *rec;
3396         struct inode_backref *backref;
3397         int stage = 0;
3398         int ret = 0;
3399         int err = 0;
3400         u64 error = 0;
3401         u64 root_dirid = btrfs_root_dirid(&root->root_item);
3402
3403         if (btrfs_root_refs(&root->root_item) == 0) {
3404                 if (!cache_tree_empty(inode_cache))
3405                         fprintf(stderr, "warning line %d\n", __LINE__);
3406                 return 0;
3407         }
3408
3409         /*
3410          * We need to repair backrefs first because we could change some of the
3411          * errors in the inode recs.
3412          *
3413          * We also need to go through and delete invalid backrefs first and then
3414          * add the correct ones second.  We do this because we may get EEXIST
3415          * when adding back the correct index because we hadn't yet deleted the
3416          * invalid index.
3417          *
3418          * For example, if we were missing a dir index then the directories
3419          * isize would be wrong, so if we fixed the isize to what we thought it
3420          * would be and then fixed the backref we'd still have a invalid fs, so
3421          * we need to add back the dir index and then check to see if the isize
3422          * is still wrong.
3423          */
3424         while (stage < 3) {
3425                 stage++;
3426                 if (stage == 3 && !err)
3427                         break;
3428
3429                 cache = search_cache_extent(inode_cache, 0);
3430                 while (repair && cache) {
3431                         node = container_of(cache, struct ptr_node, cache);
3432                         rec = node->data;
3433                         cache = next_cache_extent(cache);
3434
3435                         /* Need to free everything up and rescan */
3436                         if (stage == 3) {
3437                                 remove_cache_extent(inode_cache, &node->cache);
3438                                 free(node);
3439                                 free_inode_rec(rec);
3440                                 continue;
3441                         }
3442
3443                         if (list_empty(&rec->backrefs))
3444                                 continue;
3445
3446                         ret = repair_inode_backrefs(root, rec, inode_cache,
3447                                                     stage == 1);
3448                         if (ret < 0) {
3449                                 err = ret;
3450                                 stage = 2;
3451                                 break;
3452                         } if (ret > 0) {
3453                                 err = -EAGAIN;
3454                         }
3455                 }
3456         }
3457         if (err)
3458                 return err;
3459
3460         rec = get_inode_rec(inode_cache, root_dirid, 0);
3461         BUG_ON(IS_ERR(rec));
3462         if (rec) {
3463                 ret = check_root_dir(rec);
3464                 if (ret) {
3465                         fprintf(stderr, "root %llu root dir %llu error\n",
3466                                 (unsigned long long)root->root_key.objectid,
3467                                 (unsigned long long)root_dirid);
3468                         print_inode_error(root, rec);
3469                         error++;
3470                 }
3471         } else {
3472                 if (repair) {
3473                         struct btrfs_trans_handle *trans;
3474
3475                         trans = btrfs_start_transaction(root, 1);
3476                         if (IS_ERR(trans)) {
3477                                 err = PTR_ERR(trans);
3478                                 return err;
3479                         }
3480
3481                         fprintf(stderr,
3482                                 "root %llu missing its root dir, recreating\n",
3483                                 (unsigned long long)root->objectid);
3484
3485                         ret = btrfs_make_root_dir(trans, root, root_dirid);
3486                         BUG_ON(ret);
3487
3488                         btrfs_commit_transaction(trans, root);
3489                         return -EAGAIN;
3490                 }
3491
3492                 fprintf(stderr, "root %llu root dir %llu not found\n",
3493                         (unsigned long long)root->root_key.objectid,
3494                         (unsigned long long)root_dirid);
3495         }
3496
3497         while (1) {
3498                 cache = search_cache_extent(inode_cache, 0);
3499                 if (!cache)
3500                         break;
3501                 node = container_of(cache, struct ptr_node, cache);
3502                 rec = node->data;
3503                 remove_cache_extent(inode_cache, &node->cache);
3504                 free(node);
3505                 if (rec->ino == root_dirid ||
3506                     rec->ino == BTRFS_ORPHAN_OBJECTID) {
3507                         free_inode_rec(rec);
3508                         continue;
3509                 }
3510
3511                 if (rec->errors & I_ERR_NO_ORPHAN_ITEM) {
3512                         ret = check_orphan_item(root, rec->ino);
3513                         if (ret == 0)
3514                                 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
3515                         if (can_free_inode_rec(rec)) {
3516                                 free_inode_rec(rec);
3517                                 continue;
3518                         }
3519                 }
3520
3521                 if (!rec->found_inode_item)
3522                         rec->errors |= I_ERR_NO_INODE_ITEM;
3523                 if (rec->found_link != rec->nlink)
3524                         rec->errors |= I_ERR_LINK_COUNT_WRONG;
3525                 if (repair) {
3526                         ret = try_repair_inode(root, rec);
3527                         if (ret == 0 && can_free_inode_rec(rec)) {
3528                                 free_inode_rec(rec);
3529                                 continue;
3530                         }
3531                         ret = 0;
3532                 }
3533
3534                 if (!(repair && ret == 0))
3535                         error++;
3536                 print_inode_error(root, rec);
3537                 list_for_each_entry(backref, &rec->backrefs, list) {
3538                         if (!backref->found_dir_item)
3539                                 backref->errors |= REF_ERR_NO_DIR_ITEM;
3540                         if (!backref->found_dir_index)
3541                                 backref->errors |= REF_ERR_NO_DIR_INDEX;
3542                         if (!backref->found_inode_ref)
3543                                 backref->errors |= REF_ERR_NO_INODE_REF;
3544                         fprintf(stderr, "\tunresolved ref dir %llu index %llu"
3545                                 " namelen %u name %s filetype %d errors %x",
3546                                 (unsigned long long)backref->dir,
3547                                 (unsigned long long)backref->index,
3548                                 backref->namelen, backref->name,
3549                                 backref->filetype, backref->errors);
3550                         print_ref_error(backref->errors);
3551                 }
3552                 free_inode_rec(rec);
3553         }
3554         return (error > 0) ? -1 : 0;
3555 }
3556
3557 static struct root_record *get_root_rec(struct cache_tree *root_cache,
3558                                         u64 objectid)
3559 {
3560         struct cache_extent *cache;
3561         struct root_record *rec = NULL;
3562         int ret;
3563
3564         cache = lookup_cache_extent(root_cache, objectid, 1);
3565         if (cache) {
3566                 rec = container_of(cache, struct root_record, cache);
3567         } else {
3568                 rec = calloc(1, sizeof(*rec));
3569                 if (!rec)
3570                         return ERR_PTR(-ENOMEM);
3571                 rec->objectid = objectid;
3572                 INIT_LIST_HEAD(&rec->backrefs);
3573                 rec->cache.start = objectid;
3574                 rec->cache.size = 1;
3575
3576                 ret = insert_cache_extent(root_cache, &rec->cache);
3577                 if (ret)
3578                         return ERR_PTR(-EEXIST);
3579         }
3580         return rec;
3581 }
3582
3583 static struct root_backref *get_root_backref(struct root_record *rec,
3584                                              u64 ref_root, u64 dir, u64 index,
3585                                              const char *name, int namelen)
3586 {
3587         struct root_backref *backref;
3588
3589         list_for_each_entry(backref, &rec->backrefs, list) {
3590                 if (backref->ref_root != ref_root || backref->dir != dir ||
3591                     backref->namelen != namelen)
3592                         continue;
3593                 if (memcmp(name, backref->name, namelen))
3594                         continue;
3595                 return backref;
3596         }
3597
3598         backref = calloc(1, sizeof(*backref) + namelen + 1);
3599         if (!backref)
3600                 return NULL;
3601         backref->ref_root = ref_root;
3602         backref->dir = dir;
3603         backref->index = index;
3604         backref->namelen = namelen;
3605         memcpy(backref->name, name, namelen);
3606         backref->name[namelen] = '\0';
3607         list_add_tail(&backref->list, &rec->backrefs);
3608         return backref;
3609 }
3610
3611 static void free_root_record(struct cache_extent *cache)
3612 {
3613         struct root_record *rec;
3614         struct root_backref *backref;
3615
3616         rec = container_of(cache, struct root_record, cache);
3617         while (!list_empty(&rec->backrefs)) {
3618                 backref = to_root_backref(rec->backrefs.next);
3619                 list_del(&backref->list);
3620                 free(backref);
3621         }
3622
3623         free(rec);
3624 }
3625
3626 FREE_EXTENT_CACHE_BASED_TREE(root_recs, free_root_record);
3627
3628 static int add_root_backref(struct cache_tree *root_cache,
3629                             u64 root_id, u64 ref_root, u64 dir, u64 index,
3630                             const char *name, int namelen,
3631                             int item_type, int errors)
3632 {
3633         struct root_record *rec;
3634         struct root_backref *backref;
3635
3636         rec = get_root_rec(root_cache, root_id);
3637         BUG_ON(IS_ERR(rec));
3638         backref = get_root_backref(rec, ref_root, dir, index, name, namelen);
3639         BUG_ON(!backref);
3640
3641         backref->errors |= errors;
3642
3643         if (item_type != BTRFS_DIR_ITEM_KEY) {
3644                 if (backref->found_dir_index || backref->found_back_ref ||
3645                     backref->found_forward_ref) {
3646                         if (backref->index != index)
3647                                 backref->errors |= REF_ERR_INDEX_UNMATCH;
3648                 } else {
3649                         backref->index = index;
3650                 }
3651         }
3652
3653         if (item_type == BTRFS_DIR_ITEM_KEY) {
3654                 if (backref->found_forward_ref)
3655                         rec->found_ref++;
3656                 backref->found_dir_item = 1;
3657         } else if (item_type == BTRFS_DIR_INDEX_KEY) {
3658                 backref->found_dir_index = 1;
3659         } else if (item_type == BTRFS_ROOT_REF_KEY) {
3660                 if (backref->found_forward_ref)
3661                         backref->errors |= REF_ERR_DUP_ROOT_REF;
3662                 else if (backref->found_dir_item)
3663                         rec->found_ref++;
3664                 backref->found_forward_ref = 1;
3665         } else if (item_type == BTRFS_ROOT_BACKREF_KEY) {
3666                 if (backref->found_back_ref)
3667                         backref->errors |= REF_ERR_DUP_ROOT_BACKREF;
3668                 backref->found_back_ref = 1;
3669         } else {
3670                 BUG_ON(1);
3671         }
3672
3673         if (backref->found_forward_ref && backref->found_dir_item)
3674                 backref->reachable = 1;
3675         return 0;
3676 }
3677
3678 static int merge_root_recs(struct btrfs_root *root,
3679                            struct cache_tree *src_cache,
3680                            struct cache_tree *dst_cache)
3681 {
3682         struct cache_extent *cache;
3683         struct ptr_node *node;
3684         struct inode_record *rec;
3685         struct inode_backref *backref;
3686         int ret = 0;
3687
3688         if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3689                 free_inode_recs_tree(src_cache);
3690                 return 0;
3691         }
3692
3693         while (1) {
3694                 cache = search_cache_extent(src_cache, 0);
3695                 if (!cache)
3696                         break;
3697                 node = container_of(cache, struct ptr_node, cache);
3698                 rec = node->data;
3699                 remove_cache_extent(src_cache, &node->cache);
3700                 free(node);
3701
3702                 ret = is_child_root(root, root->objectid, rec->ino);
3703                 if (ret < 0)
3704                         break;
3705                 else if (ret == 0)
3706                         goto skip;
3707
3708                 list_for_each_entry(backref, &rec->backrefs, list) {
3709                         BUG_ON(backref->found_inode_ref);
3710                         if (backref->found_dir_item)
3711                                 add_root_backref(dst_cache, rec->ino,
3712                                         root->root_key.objectid, backref->dir,
3713                                         backref->index, backref->name,
3714                                         backref->namelen, BTRFS_DIR_ITEM_KEY,
3715                                         backref->errors);
3716                         if (backref->found_dir_index)
3717                                 add_root_backref(dst_cache, rec->ino,
3718                                         root->root_key.objectid, backref->dir,
3719                                         backref->index, backref->name,
3720                                         backref->namelen, BTRFS_DIR_INDEX_KEY,
3721                                         backref->errors);
3722                 }
3723 skip:
3724                 free_inode_rec(rec);
3725         }
3726         if (ret < 0)
3727                 return ret;
3728         return 0;
3729 }
3730
3731 static int check_root_refs(struct btrfs_root *root,
3732                            struct cache_tree *root_cache)
3733 {
3734         struct root_record *rec;
3735         struct root_record *ref_root;
3736         struct root_backref *backref;
3737         struct cache_extent *cache;
3738         int loop = 1;
3739         int ret;
3740         int error;
3741         int errors = 0;
3742
3743         rec = get_root_rec(root_cache, BTRFS_FS_TREE_OBJECTID);
3744         BUG_ON(IS_ERR(rec));
3745         rec->found_ref = 1;
3746
3747         /* fixme: this can not detect circular references */
3748         while (loop) {
3749                 loop = 0;
3750                 cache = search_cache_extent(root_cache, 0);
3751                 while (1) {
3752                         if (!cache)
3753                                 break;
3754                         rec = container_of(cache, struct root_record, cache);
3755                         cache = next_cache_extent(cache);
3756
3757                         if (rec->found_ref == 0)
3758                                 continue;
3759
3760                         list_for_each_entry(backref, &rec->backrefs, list) {
3761                                 if (!backref->reachable)
3762                                         continue;
3763
3764                                 ref_root = get_root_rec(root_cache,
3765                                                         backref->ref_root);
3766                                 BUG_ON(IS_ERR(ref_root));
3767                                 if (ref_root->found_ref > 0)
3768                                         continue;
3769
3770                                 backref->reachable = 0;
3771                                 rec->found_ref--;
3772                                 if (rec->found_ref == 0)
3773                                         loop = 1;
3774                         }
3775                 }
3776         }
3777
3778         cache = search_cache_extent(root_cache, 0);
3779         while (1) {
3780                 if (!cache)
3781                         break;
3782                 rec = container_of(cache, struct root_record, cache);
3783                 cache = next_cache_extent(cache);
3784
3785                 if (rec->found_ref == 0 &&
3786                     rec->objectid >= BTRFS_FIRST_FREE_OBJECTID &&
3787                     rec->objectid <= BTRFS_LAST_FREE_OBJECTID) {
3788                         ret = check_orphan_item(root->fs_info->tree_root,
3789                                                 rec->objectid);
3790                         if (ret == 0)
3791                                 continue;
3792
3793                         /*
3794                          * If we don't have a root item then we likely just have
3795                          * a dir item in a snapshot for this root but no actual
3796                          * ref key or anything so it's meaningless.
3797                          */
3798                         if (!rec->found_root_item)
3799                                 continue;
3800                         errors++;
3801                         fprintf(stderr, "fs tree %llu not referenced\n",
3802                                 (unsigned long long)rec->objectid);
3803                 }
3804
3805                 error = 0;
3806                 if (rec->found_ref > 0 && !rec->found_root_item)
3807                         error = 1;
3808                 list_for_each_entry(backref, &rec->backrefs, list) {
3809                         if (!backref->found_dir_item)
3810                                 backref->errors |= REF_ERR_NO_DIR_ITEM;
3811                         if (!backref->found_dir_index)
3812                                 backref->errors |= REF_ERR_NO_DIR_INDEX;
3813                         if (!backref->found_back_ref)
3814                                 backref->errors |= REF_ERR_NO_ROOT_BACKREF;
3815                         if (!backref->found_forward_ref)
3816                                 backref->errors |= REF_ERR_NO_ROOT_REF;
3817                         if (backref->reachable && backref->errors)
3818                                 error = 1;
3819                 }
3820                 if (!error)
3821                         continue;
3822
3823                 errors++;
3824                 fprintf(stderr, "fs tree %llu refs %u %s\n",
3825                         (unsigned long long)rec->objectid, rec->found_ref,
3826                          rec->found_root_item ? "" : "not found");
3827
3828                 list_for_each_entry(backref, &rec->backrefs, list) {
3829                         if (!backref->reachable)
3830                                 continue;
3831                         if (!backref->errors && rec->found_root_item)
3832                                 continue;
3833                         fprintf(stderr, "\tunresolved ref root %llu dir %llu"
3834                                 " index %llu namelen %u name %s errors %x\n",
3835                                 (unsigned long long)backref->ref_root,
3836                                 (unsigned long long)backref->dir,
3837                                 (unsigned long long)backref->index,
3838                                 backref->namelen, backref->name,
3839                                 backref->errors);
3840                         print_ref_error(backref->errors);
3841                 }
3842         }
3843         return errors > 0 ? 1 : 0;
3844 }
3845
3846 static int process_root_ref(struct extent_buffer *eb, int slot,
3847                             struct btrfs_key *key,
3848                             struct cache_tree *root_cache)
3849 {
3850         u64 dirid;
3851         u64 index;
3852         u32 len;
3853         u32 name_len;
3854         struct btrfs_root_ref *ref;
3855         char namebuf[BTRFS_NAME_LEN];
3856         int error;
3857
3858         ref = btrfs_item_ptr(eb, slot, struct btrfs_root_ref);
3859
3860         dirid = btrfs_root_ref_dirid(eb, ref);
3861         index = btrfs_root_ref_sequence(eb, ref);
3862         name_len = btrfs_root_ref_name_len(eb, ref);
3863
3864         if (name_len <= BTRFS_NAME_LEN) {
3865                 len = name_len;
3866                 error = 0;
3867         } else {
3868                 len = BTRFS_NAME_LEN;
3869                 error = REF_ERR_NAME_TOO_LONG;
3870         }
3871         read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
3872
3873         if (key->type == BTRFS_ROOT_REF_KEY) {
3874                 add_root_backref(root_cache, key->offset, key->objectid, dirid,
3875                                  index, namebuf, len, key->type, error);
3876         } else {
3877                 add_root_backref(root_cache, key->objectid, key->offset, dirid,
3878                                  index, namebuf, len, key->type, error);
3879         }
3880         return 0;
3881 }
3882
3883 static void free_corrupt_block(struct cache_extent *cache)
3884 {
3885         struct btrfs_corrupt_block *corrupt;
3886
3887         corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
3888         free(corrupt);
3889 }
3890
3891 FREE_EXTENT_CACHE_BASED_TREE(corrupt_blocks, free_corrupt_block);
3892
3893 /*
3894  * Repair the btree of the given root.
3895  *
3896  * The fix is to remove the node key in corrupt_blocks cache_tree.
3897  * and rebalance the tree.
3898  * After the fix, the btree should be writeable.
3899  */
3900 static int repair_btree(struct btrfs_root *root,
3901                         struct cache_tree *corrupt_blocks)
3902 {
3903         struct btrfs_trans_handle *trans;
3904         struct btrfs_path path;
3905         struct btrfs_corrupt_block *corrupt;
3906         struct cache_extent *cache;
3907         struct btrfs_key key;
3908         u64 offset;
3909         int level;
3910         int ret = 0;
3911
3912         if (cache_tree_empty(corrupt_blocks))
3913                 return 0;
3914
3915         trans = btrfs_start_transaction(root, 1);
3916         if (IS_ERR(trans)) {
3917                 ret = PTR_ERR(trans);
3918                 fprintf(stderr, "Error starting transaction: %s\n",
3919                         strerror(-ret));
3920                 return ret;
3921         }
3922         btrfs_init_path(&path);
3923         cache = first_cache_extent(corrupt_blocks);
3924         while (cache) {
3925                 corrupt = container_of(cache, struct btrfs_corrupt_block,
3926                                        cache);
3927                 level = corrupt->level;
3928                 path.lowest_level = level;
3929                 key.objectid = corrupt->key.objectid;
3930                 key.type = corrupt->key.type;
3931                 key.offset = corrupt->key.offset;
3932
3933                 /*
3934                  * Here we don't want to do any tree balance, since it may
3935                  * cause a balance with corrupted brother leaf/node,
3936                  * so ins_len set to 0 here.
3937                  * Balance will be done after all corrupt node/leaf is deleted.
3938                  */
3939                 ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
3940                 if (ret < 0)
3941                         goto out;
3942                 offset = btrfs_node_blockptr(path.nodes[level],
3943                                              path.slots[level]);
3944
3945                 /* Remove the ptr */
3946                 ret = btrfs_del_ptr(root, &path, level, path.slots[level]);
3947                 if (ret < 0)
3948                         goto out;
3949                 /*
3950                  * Remove the corresponding extent
3951                  * return value is not concerned.
3952                  */
3953                 btrfs_release_path(&path);
3954                 ret = btrfs_free_extent(trans, root, offset,
3955                                 root->fs_info->nodesize, 0,
3956                                 root->root_key.objectid, level - 1, 0);
3957                 cache = next_cache_extent(cache);
3958         }
3959
3960         /* Balance the btree using btrfs_search_slot() */
3961         cache = first_cache_extent(corrupt_blocks);
3962         while (cache) {
3963                 corrupt = container_of(cache, struct btrfs_corrupt_block,
3964                                        cache);
3965                 memcpy(&key, &corrupt->key, sizeof(key));
3966                 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
3967                 if (ret < 0)
3968                         goto out;
3969                 /* return will always >0 since it won't find the item */
3970                 ret = 0;
3971                 btrfs_release_path(&path);
3972                 cache = next_cache_extent(cache);
3973         }
3974 out:
3975         btrfs_commit_transaction(trans, root);
3976         btrfs_release_path(&path);
3977         return ret;
3978 }
3979
3980 static int check_fs_root(struct btrfs_root *root,
3981                          struct cache_tree *root_cache,
3982                          struct walk_control *wc)
3983 {
3984         int ret = 0;
3985         int err = 0;
3986         int wret;
3987         int level;
3988         struct btrfs_path path;
3989         struct shared_node root_node;
3990         struct root_record *rec;
3991         struct btrfs_root_item *root_item = &root->root_item;
3992         struct cache_tree corrupt_blocks;
3993         struct orphan_data_extent *orphan;
3994         struct orphan_data_extent *tmp;
3995         enum btrfs_tree_block_status status;
3996         struct node_refs nrefs;
3997
3998         /*
3999          * Reuse the corrupt_block cache tree to record corrupted tree block
4000          *
4001          * Unlike the usage in extent tree check, here we do it in a per
4002          * fs/subvol tree base.
4003          */
4004         cache_tree_init(&corrupt_blocks);
4005         root->fs_info->corrupt_blocks = &corrupt_blocks;
4006
4007         if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
4008                 rec = get_root_rec(root_cache, root->root_key.objectid);
4009                 BUG_ON(IS_ERR(rec));
4010                 if (btrfs_root_refs(root_item) > 0)
4011                         rec->found_root_item = 1;
4012         }
4013
4014         btrfs_init_path(&path);
4015         memset(&root_node, 0, sizeof(root_node));
4016         cache_tree_init(&root_node.root_cache);
4017         cache_tree_init(&root_node.inode_cache);
4018         memset(&nrefs, 0, sizeof(nrefs));
4019
4020         /* Move the orphan extent record to corresponding inode_record */
4021         list_for_each_entry_safe(orphan, tmp,
4022                                  &root->orphan_data_extents, list) {
4023                 struct inode_record *inode;
4024
4025                 inode = get_inode_rec(&root_node.inode_cache, orphan->objectid,
4026                                       1);
4027                 BUG_ON(IS_ERR(inode));
4028                 inode->errors |= I_ERR_FILE_EXTENT_ORPHAN;
4029                 list_move(&orphan->list, &inode->orphan_extents);
4030         }
4031
4032         level = btrfs_header_level(root->node);
4033         memset(wc->nodes, 0, sizeof(wc->nodes));
4034         wc->nodes[level] = &root_node;
4035         wc->active_node = level;
4036         wc->root_level = level;
4037
4038         /* We may not have checked the root block, lets do that now */
4039         if (btrfs_is_leaf(root->node))
4040                 status = btrfs_check_leaf(root, NULL, root->node);
4041         else
4042                 status = btrfs_check_node(root, NULL, root->node);
4043         if (status != BTRFS_TREE_BLOCK_CLEAN)
4044                 return -EIO;
4045
4046         if (btrfs_root_refs(root_item) > 0 ||
4047             btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
4048                 path.nodes[level] = root->node;
4049                 extent_buffer_get(root->node);
4050                 path.slots[level] = 0;
4051         } else {
4052                 struct btrfs_key key;
4053                 struct btrfs_disk_key found_key;
4054
4055                 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
4056                 level = root_item->drop_level;
4057                 path.lowest_level = level;
4058                 if (level > btrfs_header_level(root->node) ||
4059                     level >= BTRFS_MAX_LEVEL) {
4060                         error("ignoring invalid drop level: %u", level);
4061                         goto skip_walking;
4062                 }
4063                 wret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
4064                 if (wret < 0)
4065                         goto skip_walking;
4066                 btrfs_node_key(path.nodes[level], &found_key,
4067                                 path.slots[level]);
4068                 WARN_ON(memcmp(&found_key, &root_item->drop_progress,
4069                                         sizeof(found_key)));
4070         }
4071
4072         while (1) {
4073                 wret = walk_down_tree(root, &path, wc, &level, &nrefs);
4074                 if (wret < 0)
4075                         ret = wret;
4076                 if (wret != 0)
4077                         break;
4078
4079                 wret = walk_up_tree(root, &path, wc, &level);
4080                 if (wret < 0)
4081                         ret = wret;
4082                 if (wret != 0)
4083                         break;
4084         }
4085 skip_walking:
4086         btrfs_release_path(&path);
4087
4088         if (!cache_tree_empty(&corrupt_blocks)) {
4089                 struct cache_extent *cache;
4090                 struct btrfs_corrupt_block *corrupt;
4091
4092                 printf("The following tree block(s) is corrupted in tree %llu:\n",
4093                        root->root_key.objectid);
4094                 cache = first_cache_extent(&corrupt_blocks);
4095                 while (cache) {
4096                         corrupt = container_of(cache,
4097                                                struct btrfs_corrupt_block,
4098                                                cache);
4099                         printf("\ttree block bytenr: %llu, level: %d, node key: (%llu, %u, %llu)\n",
4100                                cache->start, corrupt->level,
4101                                corrupt->key.objectid, corrupt->key.type,
4102                                corrupt->key.offset);
4103                         cache = next_cache_extent(cache);
4104                 }
4105                 if (repair) {
4106                         printf("Try to repair the btree for root %llu\n",
4107                                root->root_key.objectid);
4108                         ret = repair_btree(root, &corrupt_blocks);
4109                         if (ret < 0)
4110                                 fprintf(stderr, "Failed to repair btree: %s\n",
4111                                         strerror(-ret));
4112                         if (!ret)
4113                                 printf("Btree for root %llu is fixed\n",
4114                                        root->root_key.objectid);
4115                 }
4116         }
4117
4118         err = merge_root_recs(root, &root_node.root_cache, root_cache);
4119         if (err < 0)
4120                 ret = err;
4121
4122         if (root_node.current) {
4123                 root_node.current->checked = 1;
4124                 maybe_free_inode_rec(&root_node.inode_cache,
4125                                 root_node.current);
4126         }
4127
4128         err = check_inode_recs(root, &root_node.inode_cache);
4129         if (!ret)
4130                 ret = err;
4131
4132         free_corrupt_blocks_tree(&corrupt_blocks);
4133         root->fs_info->corrupt_blocks = NULL;
4134         free_orphan_data_extents(&root->orphan_data_extents);
4135         return ret;
4136 }
4137
4138 static int fs_root_objectid(u64 objectid)
4139 {
4140         if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
4141             objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
4142                 return 1;
4143         return is_fstree(objectid);
4144 }
4145
4146 static int check_fs_roots(struct btrfs_fs_info *fs_info,
4147                           struct cache_tree *root_cache)
4148 {
4149         struct btrfs_path path;
4150         struct btrfs_key key;
4151         struct walk_control wc;
4152         struct extent_buffer *leaf, *tree_node;
4153         struct btrfs_root *tmp_root;
4154         struct btrfs_root *tree_root = fs_info->tree_root;
4155         int ret;
4156         int err = 0;
4157
4158         if (ctx.progress_enabled) {
4159                 ctx.tp = TASK_FS_ROOTS;
4160                 task_start(ctx.info);
4161         }
4162
4163         /*
4164          * Just in case we made any changes to the extent tree that weren't
4165          * reflected into the free space cache yet.
4166          */
4167         if (repair)
4168                 reset_cached_block_groups(fs_info);
4169         memset(&wc, 0, sizeof(wc));
4170         cache_tree_init(&wc.shared);
4171         btrfs_init_path(&path);
4172
4173 again:
4174         key.offset = 0;
4175         key.objectid = 0;
4176         key.type = BTRFS_ROOT_ITEM_KEY;
4177         ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
4178         if (ret < 0) {
4179                 err = 1;
4180                 goto out;
4181         }
4182         tree_node = tree_root->node;
4183         while (1) {
4184                 if (tree_node != tree_root->node) {
4185                         free_root_recs_tree(root_cache);
4186                         btrfs_release_path(&path);
4187                         goto again;
4188                 }
4189                 leaf = path.nodes[0];
4190                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
4191                         ret = btrfs_next_leaf(tree_root, &path);
4192                         if (ret) {
4193                                 if (ret < 0)
4194                                         err = 1;
4195                                 break;
4196                         }
4197                         leaf = path.nodes[0];
4198                 }
4199                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
4200                 if (key.type == BTRFS_ROOT_ITEM_KEY &&
4201                     fs_root_objectid(key.objectid)) {
4202                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
4203                                 tmp_root = btrfs_read_fs_root_no_cache(
4204                                                 fs_info, &key);
4205                         } else {
4206                                 key.offset = (u64)-1;
4207                                 tmp_root = btrfs_read_fs_root(
4208                                                 fs_info, &key);
4209                         }
4210                         if (IS_ERR(tmp_root)) {
4211                                 err = 1;
4212                                 goto next;
4213                         }
4214                         ret = check_fs_root(tmp_root, root_cache, &wc);
4215                         if (ret == -EAGAIN) {
4216                                 free_root_recs_tree(root_cache);
4217                                 btrfs_release_path(&path);
4218                                 goto again;
4219                         }
4220                         if (ret)
4221                                 err = 1;
4222                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
4223                                 btrfs_free_fs_root(tmp_root);
4224                 } else if (key.type == BTRFS_ROOT_REF_KEY ||
4225                            key.type == BTRFS_ROOT_BACKREF_KEY) {
4226                         process_root_ref(leaf, path.slots[0], &key,
4227                                          root_cache);
4228                 }
4229 next:
4230                 path.slots[0]++;
4231         }
4232 out:
4233         btrfs_release_path(&path);
4234         if (err)
4235                 free_extent_cache_tree(&wc.shared);
4236         if (!cache_tree_empty(&wc.shared))
4237                 fprintf(stderr, "warning line %d\n", __LINE__);
4238
4239         task_stop(ctx.info);
4240
4241         return err;
4242 }
4243
4244 /*
4245  * Find the @index according by @ino and name.
4246  * Notice:time efficiency is O(N)
4247  *
4248  * @root:       the root of the fs/file tree
4249  * @index_ret:  the index as return value
4250  * @namebuf:    the name to match
4251  * @name_len:   the length of name to match
4252  * @file_type:  the file_type of INODE_ITEM to match
4253  *
4254  * Returns 0 if found and *@index_ret will be modified with right value
4255  * Returns< 0 not found and *@index_ret will be (u64)-1
4256  */
4257 static int find_dir_index(struct btrfs_root *root, u64 dirid, u64 location_id,
4258                           u64 *index_ret, char *namebuf, u32 name_len,
4259                           u8 file_type)
4260 {
4261         struct btrfs_path path;
4262         struct extent_buffer *node;
4263         struct btrfs_dir_item *di;
4264         struct btrfs_key key;
4265         struct btrfs_key location;
4266         char name[BTRFS_NAME_LEN] = {0};
4267
4268         u32 total;
4269         u32 cur = 0;
4270         u32 len;
4271         u32 data_len;
4272         u8 filetype;
4273         int slot;
4274         int ret;
4275
4276         ASSERT(index_ret);
4277
4278         /* search from the last index */
4279         key.objectid = dirid;
4280         key.offset = (u64)-1;
4281         key.type = BTRFS_DIR_INDEX_KEY;
4282
4283         btrfs_init_path(&path);
4284         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
4285         if (ret < 0)
4286                 return ret;
4287
4288 loop:
4289         ret = btrfs_previous_item(root, &path, dirid, BTRFS_DIR_INDEX_KEY);
4290         if (ret) {
4291                 ret = -ENOENT;
4292                 *index_ret = (64)-1;
4293                 goto out;
4294         }
4295         /* Check whether inode_id/filetype/name match */
4296         node = path.nodes[0];
4297         slot = path.slots[0];
4298         di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
4299         total = btrfs_item_size_nr(node, slot);
4300         while (cur < total) {
4301                 ret = -ENOENT;
4302                 len = btrfs_dir_name_len(node, di);
4303                 data_len = btrfs_dir_data_len(node, di);
4304
4305                 btrfs_dir_item_key_to_cpu(node, di, &location);
4306                 if (location.objectid != location_id ||
4307                     location.type != BTRFS_INODE_ITEM_KEY ||
4308                     location.offset != 0)
4309                         goto next;
4310
4311                 filetype = btrfs_dir_type(node, di);
4312                 if (file_type != filetype)
4313                         goto next;
4314
4315                 if (len > BTRFS_NAME_LEN)
4316                         len = BTRFS_NAME_LEN;
4317
4318                 read_extent_buffer(node, name, (unsigned long)(di + 1), len);
4319                 if (len != name_len || strncmp(namebuf, name, len))
4320                         goto next;
4321
4322                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
4323                 *index_ret = key.offset;
4324                 ret = 0;
4325                 goto out;
4326 next:
4327                 len += sizeof(*di) + data_len;
4328                 di = (struct btrfs_dir_item *)((char *)di + len);
4329                 cur += len;
4330         }
4331         goto loop;
4332
4333 out:
4334         btrfs_release_path(&path);
4335         return ret;
4336 }
4337
4338 /*
4339  * Find DIR_ITEM/DIR_INDEX for the given key and check it with the specified
4340  * INODE_REF/INODE_EXTREF match.
4341  *
4342  * @root:       the root of the fs/file tree
4343  * @key:        the key of the DIR_ITEM/DIR_INDEX, key->offset will be right
4344  *              value while find index
4345  * @location_key: location key of the struct btrfs_dir_item to match
4346  * @name:       the name to match
4347  * @namelen:    the length of name
4348  * @file_type:  the type of file to math
4349  *
4350  * Return 0 if no error occurred.
4351  * Return DIR_ITEM_MISSING/DIR_INDEX_MISSING if couldn't find
4352  * DIR_ITEM/DIR_INDEX
4353  * Return DIR_ITEM_MISMATCH/DIR_INDEX_MISMATCH if INODE_REF/INODE_EXTREF
4354  * and DIR_ITEM/DIR_INDEX mismatch
4355  */
4356 static int find_dir_item(struct btrfs_root *root, struct btrfs_key *key,
4357                          struct btrfs_key *location_key, char *name,
4358                          u32 namelen, u8 file_type)
4359 {
4360         struct btrfs_path path;
4361         struct extent_buffer *node;
4362         struct btrfs_dir_item *di;
4363         struct btrfs_key location;
4364         char namebuf[BTRFS_NAME_LEN] = {0};
4365         u32 total;
4366         u32 cur = 0;
4367         u32 len;
4368         u32 data_len;
4369         u8 filetype;
4370         int slot;
4371         int ret;
4372
4373         /* get the index by traversing all index */
4374         if (key->type == BTRFS_DIR_INDEX_KEY && key->offset == (u64)-1) {
4375                 ret = find_dir_index(root, key->objectid,
4376                                      location_key->objectid, &key->offset,
4377                                      name, namelen, file_type);
4378                 if (ret)
4379                         ret = DIR_INDEX_MISSING;
4380                 return ret;
4381         }
4382
4383         btrfs_init_path(&path);
4384         ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4385         if (ret) {
4386                 ret = key->type == BTRFS_DIR_ITEM_KEY ? DIR_ITEM_MISSING :
4387                         DIR_INDEX_MISSING;
4388                 goto out;
4389         }
4390
4391         /* Check whether inode_id/filetype/name match */
4392         node = path.nodes[0];
4393         slot = path.slots[0];
4394         di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
4395         total = btrfs_item_size_nr(node, slot);
4396         while (cur < total) {
4397                 ret = key->type == BTRFS_DIR_ITEM_KEY ?
4398                         DIR_ITEM_MISMATCH : DIR_INDEX_MISMATCH;
4399
4400                 len = btrfs_dir_name_len(node, di);
4401                 data_len = btrfs_dir_data_len(node, di);
4402
4403                 btrfs_dir_item_key_to_cpu(node, di, &location);
4404                 if (location.objectid != location_key->objectid ||
4405                     location.type != location_key->type ||
4406                     location.offset != location_key->offset)
4407                         goto next;
4408
4409                 filetype = btrfs_dir_type(node, di);
4410                 if (file_type != filetype)
4411                         goto next;
4412
4413                 if (len > BTRFS_NAME_LEN) {
4414                         len = BTRFS_NAME_LEN;
4415                         warning("root %llu %s[%llu %llu] name too long %u, trimmed",
4416                         root->objectid,
4417                         key->type == BTRFS_DIR_ITEM_KEY ?
4418                         "DIR_ITEM" : "DIR_INDEX",
4419                         key->objectid, key->offset, len);
4420                 }
4421                 read_extent_buffer(node, namebuf, (unsigned long)(di + 1),
4422                                    len);
4423                 if (len != namelen || strncmp(namebuf, name, len))
4424                         goto next;
4425
4426                 ret = 0;
4427                 goto out;
4428 next:
4429                 len += sizeof(*di) + data_len;
4430                 di = (struct btrfs_dir_item *)((char *)di + len);
4431                 cur += len;
4432         }
4433
4434 out:
4435         btrfs_release_path(&path);
4436         return ret;
4437 }
4438
4439 /*
4440  * Traverse the given INODE_REF and call find_dir_item() to find related
4441  * DIR_ITEM/DIR_INDEX.
4442  *
4443  * @root:       the root of the fs/file tree
4444  * @ref_key:    the key of the INODE_REF
4445  * @refs:       the count of INODE_REF
4446  * @mode:       the st_mode of INODE_ITEM
4447  *
4448  * Return 0 if no error occurred.
4449  */
4450 static int check_inode_ref(struct btrfs_root *root, struct btrfs_key *ref_key,
4451                            struct extent_buffer *node, int slot, u64 *refs,
4452                            int mode)
4453 {
4454         struct btrfs_key key;
4455         struct btrfs_key location;
4456         struct btrfs_inode_ref *ref;
4457         char namebuf[BTRFS_NAME_LEN] = {0};
4458         u32 total;
4459         u32 cur = 0;
4460         u32 len;
4461         u32 name_len;
4462         u64 index;
4463         int ret;
4464         int err = 0;
4465
4466         location.objectid = ref_key->objectid;
4467         location.type = BTRFS_INODE_ITEM_KEY;
4468         location.offset = 0;
4469
4470         ref = btrfs_item_ptr(node, slot, struct btrfs_inode_ref);
4471         total = btrfs_item_size_nr(node, slot);
4472
4473 next:
4474         /* Update inode ref count */
4475         (*refs)++;
4476
4477         index = btrfs_inode_ref_index(node, ref);
4478         name_len = btrfs_inode_ref_name_len(node, ref);
4479         if (cur + sizeof(*ref) + name_len > total ||
4480             name_len > BTRFS_NAME_LEN) {
4481                 warning("root %llu INODE_REF[%llu %llu] name too long",
4482                         root->objectid, ref_key->objectid, ref_key->offset);
4483
4484                 if (total < cur + sizeof(*ref))
4485                         goto out;
4486                 len = min_t(u32, total - cur - sizeof(*ref), BTRFS_NAME_LEN);
4487         } else {
4488                 len = name_len;
4489         }
4490
4491         read_extent_buffer(node, namebuf, (unsigned long)(ref + 1), len);
4492
4493         /* Check root dir ref name */
4494         if (index == 0 && strncmp(namebuf, "..", name_len)) {
4495                 error("root %llu INODE_REF[%llu %llu] ROOT_DIR name shouldn't be %s",
4496                       root->objectid, ref_key->objectid, ref_key->offset,
4497                       namebuf);
4498                 err |= ROOT_DIR_ERROR;
4499         }
4500
4501         /* Find related DIR_INDEX */
4502         key.objectid = ref_key->offset;
4503         key.type = BTRFS_DIR_INDEX_KEY;
4504         key.offset = index;
4505         ret = find_dir_item(root, &key, &location, namebuf, len, mode);
4506         err |= ret;
4507
4508         /* Find related dir_item */
4509         key.objectid = ref_key->offset;
4510         key.type = BTRFS_DIR_ITEM_KEY;
4511         key.offset = btrfs_name_hash(namebuf, len);
4512         ret = find_dir_item(root, &key, &location, namebuf, len, mode);
4513         err |= ret;
4514
4515         len = sizeof(*ref) + name_len;
4516         ref = (struct btrfs_inode_ref *)((char *)ref + len);
4517         cur += len;
4518         if (cur < total)
4519                 goto next;
4520
4521 out:
4522         return err;
4523 }
4524
4525 /*
4526  * Traverse the given INODE_EXTREF and call find_dir_item() to find related
4527  * DIR_ITEM/DIR_INDEX.
4528  *
4529  * @root:       the root of the fs/file tree
4530  * @ref_key:    the key of the INODE_EXTREF
4531  * @refs:       the count of INODE_EXTREF
4532  * @mode:       the st_mode of INODE_ITEM
4533  *
4534  * Return 0 if no error occurred.
4535  */
4536 static int check_inode_extref(struct btrfs_root *root,
4537                               struct btrfs_key *ref_key,
4538                               struct extent_buffer *node, int slot, u64 *refs,
4539                               int mode)
4540 {
4541         struct btrfs_key key;
4542         struct btrfs_key location;
4543         struct btrfs_inode_extref *extref;
4544         char namebuf[BTRFS_NAME_LEN] = {0};
4545         u32 total;
4546         u32 cur = 0;
4547         u32 len;
4548         u32 name_len;
4549         u64 index;
4550         u64 parent;
4551         int ret;
4552         int err = 0;
4553
4554         location.objectid = ref_key->objectid;
4555         location.type = BTRFS_INODE_ITEM_KEY;
4556         location.offset = 0;
4557
4558         extref = btrfs_item_ptr(node, slot, struct btrfs_inode_extref);
4559         total = btrfs_item_size_nr(node, slot);
4560
4561 next:
4562         /* update inode ref count */
4563         (*refs)++;
4564         name_len = btrfs_inode_extref_name_len(node, extref);
4565         index = btrfs_inode_extref_index(node, extref);
4566         parent = btrfs_inode_extref_parent(node, extref);
4567         if (name_len <= BTRFS_NAME_LEN) {
4568                 len = name_len;
4569         } else {
4570                 len = BTRFS_NAME_LEN;
4571                 warning("root %llu INODE_EXTREF[%llu %llu] name too long",
4572                         root->objectid, ref_key->objectid, ref_key->offset);
4573         }
4574         read_extent_buffer(node, namebuf, (unsigned long)(extref + 1), len);
4575
4576         /* Check root dir ref name */
4577         if (index == 0 && strncmp(namebuf, "..", name_len)) {
4578                 error("root %llu INODE_EXTREF[%llu %llu] ROOT_DIR name shouldn't be %s",
4579                       root->objectid, ref_key->objectid, ref_key->offset,
4580                       namebuf);
4581                 err |= ROOT_DIR_ERROR;
4582         }
4583
4584         /* find related dir_index */
4585         key.objectid = parent;
4586         key.type = BTRFS_DIR_INDEX_KEY;
4587         key.offset = index;
4588         ret = find_dir_item(root, &key, &location, namebuf, len, mode);
4589         err |= ret;
4590
4591         /* find related dir_item */
4592         key.objectid = parent;
4593         key.type = BTRFS_DIR_ITEM_KEY;
4594         key.offset = btrfs_name_hash(namebuf, len);
4595         ret = find_dir_item(root, &key, &location, namebuf, len, mode);
4596         err |= ret;
4597
4598         len = sizeof(*extref) + name_len;
4599         extref = (struct btrfs_inode_extref *)((char *)extref + len);
4600         cur += len;
4601
4602         if (cur < total)
4603                 goto next;
4604
4605         return err;
4606 }
4607
4608 /*
4609  * Find INODE_REF/INODE_EXTREF for the given key and check it with the specified
4610  * DIR_ITEM/DIR_INDEX match.
4611  * Return with @index_ret.
4612  *
4613  * @root:       the root of the fs/file tree
4614  * @key:        the key of the INODE_REF/INODE_EXTREF
4615  * @name:       the name in the INODE_REF/INODE_EXTREF
4616  * @namelen:    the length of name in the INODE_REF/INODE_EXTREF
4617  * @index_ret:  the index in the INODE_REF/INODE_EXTREF,
4618  *              value (64)-1 means do not check index
4619  * @ext_ref:    the EXTENDED_IREF feature
4620  *
4621  * Return 0 if no error occurred.
4622  * Return >0 for error bitmap
4623  */
4624 static int find_inode_ref(struct btrfs_root *root, struct btrfs_key *key,
4625                           char *name, int namelen, u64 *index_ret,
4626                           unsigned int ext_ref)
4627 {
4628         struct btrfs_path path;
4629         struct btrfs_inode_ref *ref;
4630         struct btrfs_inode_extref *extref;
4631         struct extent_buffer *node;
4632         char ref_namebuf[BTRFS_NAME_LEN] = {0};
4633         u32 total;
4634         u32 cur = 0;
4635         u32 len;
4636         u32 ref_namelen;
4637         u64 ref_index;
4638         u64 parent;
4639         u64 dir_id;
4640         int slot;
4641         int ret;
4642
4643         ASSERT(index_ret);
4644
4645         btrfs_init_path(&path);
4646         ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4647         if (ret) {
4648                 ret = INODE_REF_MISSING;
4649                 goto extref;
4650         }
4651
4652         node = path.nodes[0];
4653         slot = path.slots[0];
4654
4655         ref = btrfs_item_ptr(node, slot, struct btrfs_inode_ref);
4656         total = btrfs_item_size_nr(node, slot);
4657
4658         /* Iterate all entry of INODE_REF */
4659         while (cur < total) {
4660                 ret = INODE_REF_MISSING;
4661
4662                 ref_namelen = btrfs_inode_ref_name_len(node, ref);
4663                 ref_index = btrfs_inode_ref_index(node, ref);
4664                 if (*index_ret != (u64)-1 && *index_ret != ref_index)
4665                         goto next_ref;
4666
4667                 if (cur + sizeof(*ref) + ref_namelen > total ||
4668                     ref_namelen > BTRFS_NAME_LEN) {
4669                         warning("root %llu INODE %s[%llu %llu] name too long",
4670                                 root->objectid,
4671                                 key->type == BTRFS_INODE_REF_KEY ?
4672                                         "REF" : "EXTREF",
4673                                 key->objectid, key->offset);
4674
4675                         if (cur + sizeof(*ref) > total)
4676                                 break;
4677                         len = min_t(u32, total - cur - sizeof(*ref),
4678                                     BTRFS_NAME_LEN);
4679                 } else {
4680                         len = ref_namelen;
4681                 }
4682
4683                 read_extent_buffer(node, ref_namebuf, (unsigned long)(ref + 1),
4684                                    len);
4685
4686                 if (len != namelen || strncmp(ref_namebuf, name, len))
4687                         goto next_ref;
4688
4689                 *index_ret = ref_index;
4690                 ret = 0;
4691                 goto out;
4692 next_ref:
4693                 len = sizeof(*ref) + ref_namelen;
4694                 ref = (struct btrfs_inode_ref *)((char *)ref + len);
4695                 cur += len;
4696         }
4697
4698 extref:
4699         /* Skip if not support EXTENDED_IREF feature */
4700         if (!ext_ref)
4701                 goto out;
4702
4703         btrfs_release_path(&path);
4704         btrfs_init_path(&path);
4705
4706         dir_id = key->offset;
4707         key->type = BTRFS_INODE_EXTREF_KEY;
4708         key->offset = btrfs_extref_hash(dir_id, name, namelen);
4709
4710         ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4711         if (ret) {
4712                 ret = INODE_REF_MISSING;
4713                 goto out;
4714         }
4715
4716         node = path.nodes[0];
4717         slot = path.slots[0];
4718
4719         extref = btrfs_item_ptr(node, slot, struct btrfs_inode_extref);
4720         cur = 0;
4721         total = btrfs_item_size_nr(node, slot);
4722
4723         /* Iterate all entry of INODE_EXTREF */
4724         while (cur < total) {
4725                 ret = INODE_REF_MISSING;
4726
4727                 ref_namelen = btrfs_inode_extref_name_len(node, extref);
4728                 ref_index = btrfs_inode_extref_index(node, extref);
4729                 parent = btrfs_inode_extref_parent(node, extref);
4730                 if (*index_ret != (u64)-1 && *index_ret != ref_index)
4731                         goto next_extref;
4732
4733                 if (parent != dir_id)
4734                         goto next_extref;
4735
4736                 if (ref_namelen <= BTRFS_NAME_LEN) {
4737                         len = ref_namelen;
4738                 } else {
4739                         len = BTRFS_NAME_LEN;
4740                         warning("root %llu INODE %s[%llu %llu] name too long",
4741                                 root->objectid,
4742                                 key->type == BTRFS_INODE_REF_KEY ?
4743                                         "REF" : "EXTREF",
4744                                 key->objectid, key->offset);
4745                 }
4746                 read_extent_buffer(node, ref_namebuf,
4747                                    (unsigned long)(extref + 1), len);
4748
4749                 if (len != namelen || strncmp(ref_namebuf, name, len))
4750                         goto next_extref;
4751
4752                 *index_ret = ref_index;
4753                 ret = 0;
4754                 goto out;
4755
4756 next_extref:
4757                 len = sizeof(*extref) + ref_namelen;
4758                 extref = (struct btrfs_inode_extref *)((char *)extref + len);
4759                 cur += len;
4760
4761         }
4762 out:
4763         btrfs_release_path(&path);
4764         return ret;
4765 }
4766
4767 /*
4768  * Traverse the given DIR_ITEM/DIR_INDEX and check related INODE_ITEM and
4769  * call find_inode_ref() to check related INODE_REF/INODE_EXTREF.
4770  *
4771  * @root:       the root of the fs/file tree
4772  * @key:        the key of the INODE_REF/INODE_EXTREF
4773  * @size:       the st_size of the INODE_ITEM
4774  * @ext_ref:    the EXTENDED_IREF feature
4775  *
4776  * Return 0 if no error occurred.
4777  */
4778 static int check_dir_item(struct btrfs_root *root, struct btrfs_key *key,
4779                           struct extent_buffer *node, int slot, u64 *size,
4780                           unsigned int ext_ref)
4781 {
4782         struct btrfs_dir_item *di;
4783         struct btrfs_inode_item *ii;
4784         struct btrfs_path path;
4785         struct btrfs_key location;
4786         char namebuf[BTRFS_NAME_LEN] = {0};
4787         u32 total;
4788         u32 cur = 0;
4789         u32 len;
4790         u32 name_len;
4791         u32 data_len;
4792         u8 filetype;
4793         u32 mode;
4794         u64 index;
4795         int ret;
4796         int err = 0;
4797
4798         /*
4799          * For DIR_ITEM set index to (u64)-1, so that find_inode_ref
4800          * ignore index check.
4801          */
4802         index = (key->type == BTRFS_DIR_INDEX_KEY) ? key->offset : (u64)-1;
4803
4804         di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
4805         total = btrfs_item_size_nr(node, slot);
4806
4807         while (cur < total) {
4808                 data_len = btrfs_dir_data_len(node, di);
4809                 if (data_len)
4810                         error("root %llu %s[%llu %llu] data_len shouldn't be %u",
4811                               root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4812                               "DIR_ITEM" : "DIR_INDEX",
4813                               key->objectid, key->offset, data_len);
4814
4815                 name_len = btrfs_dir_name_len(node, di);
4816                 if (cur + sizeof(*di) + name_len > total ||
4817                     name_len > BTRFS_NAME_LEN) {
4818                         warning("root %llu %s[%llu %llu] name too long",
4819                                 root->objectid,
4820                                 key->type == BTRFS_DIR_ITEM_KEY ?
4821                                 "DIR_ITEM" : "DIR_INDEX",
4822                                 key->objectid, key->offset);
4823
4824                         if (cur + sizeof(*di) > total)
4825                                 break;
4826                         len = min_t(u32, total - cur - sizeof(*di),
4827                                     BTRFS_NAME_LEN);
4828                 } else {
4829                         len = name_len;
4830                 }
4831                 (*size) += name_len;
4832
4833                 read_extent_buffer(node, namebuf, (unsigned long)(di + 1), len);
4834                 filetype = btrfs_dir_type(node, di);
4835
4836                 if (key->type == BTRFS_DIR_ITEM_KEY &&
4837                     key->offset != btrfs_name_hash(namebuf, len)) {
4838                         err |= -EIO;
4839                         error("root %llu DIR_ITEM[%llu %llu] name %s namelen %u filetype %u mismatch with its hash, wanted %llu have %llu",
4840                                 root->objectid, key->objectid, key->offset,
4841                                 namebuf, len, filetype, key->offset,
4842                                 btrfs_name_hash(namebuf, len));
4843                 }
4844
4845                 btrfs_init_path(&path);
4846                 btrfs_dir_item_key_to_cpu(node, di, &location);
4847
4848                 /* Ignore related ROOT_ITEM check */
4849                 if (location.type == BTRFS_ROOT_ITEM_KEY)
4850                         goto next;
4851
4852                 /* Check relative INODE_ITEM(existence/filetype) */
4853                 ret = btrfs_search_slot(NULL, root, &location, &path, 0, 0);
4854                 if (ret) {
4855                         err |= INODE_ITEM_MISSING;
4856                         error("root %llu %s[%llu %llu] couldn't find relative INODE_ITEM[%llu] namelen %u filename %s filetype %x",
4857                               root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4858                               "DIR_ITEM" : "DIR_INDEX", key->objectid,
4859                               key->offset, location.objectid, name_len,
4860                               namebuf, filetype);
4861                         goto next;
4862                 }
4863
4864                 ii = btrfs_item_ptr(path.nodes[0], path.slots[0],
4865                                     struct btrfs_inode_item);
4866                 mode = btrfs_inode_mode(path.nodes[0], ii);
4867
4868                 if (imode_to_type(mode) != filetype) {
4869                         err |= INODE_ITEM_MISMATCH;
4870                         error("root %llu %s[%llu %llu] relative INODE_ITEM filetype mismatch namelen %u filename %s filetype %d",
4871                               root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4872                               "DIR_ITEM" : "DIR_INDEX", key->objectid,
4873                               key->offset, name_len, namebuf, filetype);
4874                 }
4875
4876                 /* Check relative INODE_REF/INODE_EXTREF */
4877                 location.type = BTRFS_INODE_REF_KEY;
4878                 location.offset = key->objectid;
4879                 ret = find_inode_ref(root, &location, namebuf, len,
4880                                      &index, ext_ref);
4881                 err |= ret;
4882                 if (ret & INODE_REF_MISSING)
4883                         error("root %llu %s[%llu %llu] relative INODE_REF missing namelen %u filename %s filetype %d",
4884                               root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4885                               "DIR_ITEM" : "DIR_INDEX", key->objectid,
4886                               key->offset, name_len, namebuf, filetype);
4887
4888 next:
4889                 btrfs_release_path(&path);
4890                 len = sizeof(*di) + name_len + data_len;
4891                 di = (struct btrfs_dir_item *)((char *)di + len);
4892                 cur += len;
4893
4894                 if (key->type == BTRFS_DIR_INDEX_KEY && cur < total) {
4895                         error("root %llu DIR_INDEX[%llu %llu] should contain only one entry",
4896                               root->objectid, key->objectid, key->offset);
4897                         break;
4898                 }
4899         }
4900
4901         return err;
4902 }
4903
4904 /*
4905  * Check file extent datasum/hole, update the size of the file extents,
4906  * check and update the last offset of the file extent.
4907  *
4908  * @root:       the root of fs/file tree.
4909  * @fkey:       the key of the file extent.
4910  * @nodatasum:  INODE_NODATASUM feature.
4911  * @size:       the sum of all EXTENT_DATA items size for this inode.
4912  * @end:        the offset of the last extent.
4913  *
4914  * Return 0 if no error occurred.
4915  */
4916 static int check_file_extent(struct btrfs_root *root, struct btrfs_key *fkey,
4917                              struct extent_buffer *node, int slot,
4918                              unsigned int nodatasum, u64 *size, u64 *end)
4919 {
4920         struct btrfs_file_extent_item *fi;
4921         u64 disk_bytenr;
4922         u64 disk_num_bytes;
4923         u64 extent_num_bytes;
4924         u64 extent_offset;
4925         u64 csum_found;         /* In byte size, sectorsize aligned */
4926         u64 search_start;       /* Logical range start we search for csum */
4927         u64 search_len;         /* Logical range len we search for csum */
4928         unsigned int extent_type;
4929         unsigned int is_hole;
4930         int compressed = 0;
4931         int ret;
4932         int err = 0;
4933
4934         fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
4935
4936         /* Check inline extent */
4937         extent_type = btrfs_file_extent_type(node, fi);
4938         if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
4939                 struct btrfs_item *e = btrfs_item_nr(slot);
4940                 u32 item_inline_len;
4941
4942                 item_inline_len = btrfs_file_extent_inline_item_len(node, e);
4943                 extent_num_bytes = btrfs_file_extent_inline_len(node, slot, fi);
4944                 compressed = btrfs_file_extent_compression(node, fi);
4945                 if (extent_num_bytes == 0) {
4946                         error(
4947                 "root %llu EXTENT_DATA[%llu %llu] has empty inline extent",
4948                                 root->objectid, fkey->objectid, fkey->offset);
4949                         err |= FILE_EXTENT_ERROR;
4950                 }
4951                 if (!compressed && extent_num_bytes != item_inline_len) {
4952                         error(
4953                 "root %llu EXTENT_DATA[%llu %llu] wrong inline size, have: %llu, expected: %u",
4954                                 root->objectid, fkey->objectid, fkey->offset,
4955                                 extent_num_bytes, item_inline_len);
4956                         err |= FILE_EXTENT_ERROR;
4957                 }
4958                 *end += extent_num_bytes;
4959                 *size += extent_num_bytes;
4960                 return err;
4961         }
4962
4963         /* Check extent type */
4964         if (extent_type != BTRFS_FILE_EXTENT_REG &&
4965                         extent_type != BTRFS_FILE_EXTENT_PREALLOC) {
4966                 err |= FILE_EXTENT_ERROR;
4967                 error("root %llu EXTENT_DATA[%llu %llu] type bad",
4968                       root->objectid, fkey->objectid, fkey->offset);
4969                 return err;
4970         }
4971
4972         /* Check REG_EXTENT/PREALLOC_EXTENT */
4973         disk_bytenr = btrfs_file_extent_disk_bytenr(node, fi);
4974         disk_num_bytes = btrfs_file_extent_disk_num_bytes(node, fi);
4975         extent_num_bytes = btrfs_file_extent_num_bytes(node, fi);
4976         extent_offset = btrfs_file_extent_offset(node, fi);
4977         compressed = btrfs_file_extent_compression(node, fi);
4978         is_hole = (disk_bytenr == 0) && (disk_num_bytes == 0);
4979
4980         /*
4981          * Check EXTENT_DATA csum
4982          *
4983          * For plain (uncompressed) extent, we should only check the range
4984          * we're referring to, as it's possible that part of prealloc extent
4985          * has been written, and has csum:
4986          *
4987          * |<--- Original large preallocated extent A ---->|
4988          * |<- Prealloc File Extent ->|<- Regular Extent ->|
4989          *      No csum                         Has csum
4990          *
4991          * For compressed extent, we should check the whole range.
4992          */
4993         if (!compressed) {
4994                 search_start = disk_bytenr + extent_offset;
4995                 search_len = extent_num_bytes;
4996         } else {
4997                 search_start = disk_bytenr;
4998                 search_len = disk_num_bytes;
4999         }
5000         ret = count_csum_range(root, search_start, search_len, &csum_found);
5001         if (csum_found > 0 && nodatasum) {
5002                 err |= ODD_CSUM_ITEM;
5003                 error("root %llu EXTENT_DATA[%llu %llu] nodatasum shouldn't have datasum",
5004                       root->objectid, fkey->objectid, fkey->offset);
5005         } else if (extent_type == BTRFS_FILE_EXTENT_REG && !nodatasum &&
5006                    !is_hole && (ret < 0 || csum_found < search_len)) {
5007                 err |= CSUM_ITEM_MISSING;
5008                 error("root %llu EXTENT_DATA[%llu %llu] csum missing, have: %llu, expected: %llu",
5009                       root->objectid, fkey->objectid, fkey->offset,
5010                       csum_found, search_len);
5011         } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC && csum_found > 0) {
5012                 err |= ODD_CSUM_ITEM;
5013                 error("root %llu EXTENT_DATA[%llu %llu] prealloc shouldn't have csum, but has: %llu",
5014                       root->objectid, fkey->objectid, fkey->offset, csum_found);
5015         }
5016
5017         /* Check EXTENT_DATA hole */
5018         if (!no_holes && *end != fkey->offset) {
5019                 err |= FILE_EXTENT_ERROR;
5020                 error("root %llu EXTENT_DATA[%llu %llu] interrupt",
5021                       root->objectid, fkey->objectid, fkey->offset);
5022         }
5023
5024         *end += extent_num_bytes;
5025         if (!is_hole)
5026                 *size += extent_num_bytes;
5027
5028         return err;
5029 }
5030
5031 /*
5032  * Set inode item nbytes to @nbytes
5033  *
5034  * Returns  0     on success
5035  * Returns  != 0  on error
5036  */
5037 static int repair_inode_nbytes_lowmem(struct btrfs_root *root,
5038                                       struct btrfs_path *path,
5039                                       u64 ino, u64 nbytes)
5040 {
5041         struct btrfs_trans_handle *trans;
5042         struct btrfs_inode_item *ii;
5043         struct btrfs_key key;
5044         struct btrfs_key research_key;
5045         int err = 0;
5046         int ret;
5047
5048         btrfs_item_key_to_cpu(path->nodes[0], &research_key, path->slots[0]);
5049
5050         key.objectid = ino;
5051         key.type = BTRFS_INODE_ITEM_KEY;
5052         key.offset = 0;
5053
5054         trans = btrfs_start_transaction(root, 1);
5055         if (IS_ERR(trans)) {
5056                 ret = PTR_ERR(trans);
5057                 err |= ret;
5058                 goto out;
5059         }
5060
5061         btrfs_release_path(path);
5062         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
5063         if (ret > 0)
5064                 ret = -ENOENT;
5065         if (ret) {
5066                 err |= ret;
5067                 goto fail;
5068         }
5069
5070         ii = btrfs_item_ptr(path->nodes[0], path->slots[0],
5071                             struct btrfs_inode_item);
5072         btrfs_set_inode_nbytes(path->nodes[0], ii, nbytes);
5073         btrfs_mark_buffer_dirty(path->nodes[0]);
5074 fail:
5075         btrfs_commit_transaction(trans, root);
5076 out:
5077         if (ret)
5078                 error("failed to set nbytes in inode %llu root %llu",
5079                       ino, root->root_key.objectid);
5080         else
5081                 printf("Set nbytes in inode item %llu root %llu\n to %llu", ino,
5082                        root->root_key.objectid, nbytes);
5083
5084         /* research path */
5085         btrfs_release_path(path);
5086         ret = btrfs_search_slot(NULL, root, &research_key, path, 0, 0);
5087         err |= ret;
5088
5089         return err;
5090 }
5091
5092 /*
5093  * Set directory inode isize to @isize.
5094  *
5095  * Returns 0     on success.
5096  * Returns != 0  on error.
5097  */
5098 static int repair_dir_isize_lowmem(struct btrfs_root *root,
5099                                    struct btrfs_path *path,
5100                                    u64 ino, u64 isize)
5101 {
5102         struct btrfs_trans_handle *trans;
5103         struct btrfs_inode_item *ii;
5104         struct btrfs_key key;
5105         struct btrfs_key research_key;
5106         int ret;
5107         int err = 0;
5108
5109         btrfs_item_key_to_cpu(path->nodes[0], &research_key, path->slots[0]);
5110
5111         key.objectid = ino;
5112         key.type = BTRFS_INODE_ITEM_KEY;
5113         key.offset = 0;
5114
5115         trans = btrfs_start_transaction(root, 1);
5116         if (IS_ERR(trans)) {
5117                 ret = PTR_ERR(trans);
5118                 err |= ret;
5119                 goto out;
5120         }
5121
5122         btrfs_release_path(path);
5123         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
5124         if (ret > 0)
5125                 ret = -ENOENT;
5126         if (ret) {
5127                 err |= ret;
5128                 goto fail;
5129         }
5130
5131         ii = btrfs_item_ptr(path->nodes[0], path->slots[0],
5132                             struct btrfs_inode_item);
5133         btrfs_set_inode_size(path->nodes[0], ii, isize);
5134         btrfs_mark_buffer_dirty(path->nodes[0]);
5135 fail:
5136         btrfs_commit_transaction(trans, root);
5137 out:
5138         if (ret)
5139                 error("failed to set isize in inode %llu root %llu",
5140                       ino, root->root_key.objectid);
5141         else
5142                 printf("Set isize in inode %llu root %llu to %llu\n",
5143                        ino, root->root_key.objectid, isize);
5144
5145         btrfs_release_path(path);
5146         ret = btrfs_search_slot(NULL, root, &research_key, path, 0, 0);
5147         err |= ret;
5148
5149         return err;
5150 }
5151
5152 /*
5153  * Wrapper function for btrfs_add_orphan_item().
5154  *
5155  * Returns 0     on success.
5156  * Returns != 0  on error.
5157  */
5158 static int repair_inode_orphan_item_lowmem(struct btrfs_root *root,
5159                                            struct btrfs_path *path, u64 ino)
5160 {
5161         struct btrfs_trans_handle *trans;
5162         struct btrfs_key research_key;
5163         int ret;
5164         int err = 0;
5165
5166         btrfs_item_key_to_cpu(path->nodes[0], &research_key, path->slots[0]);
5167
5168         trans = btrfs_start_transaction(root, 1);
5169         if (IS_ERR(trans)) {
5170                 ret = PTR_ERR(trans);
5171                 err |= ret;
5172                 goto out;
5173         }
5174
5175         btrfs_release_path(path);
5176         ret = btrfs_add_orphan_item(trans, root, path, ino);
5177         err |= ret;
5178         btrfs_commit_transaction(trans, root);
5179 out:
5180         if (ret)
5181                 error("failed to add inode %llu as orphan item root %llu",
5182                       ino, root->root_key.objectid);
5183         else
5184                 printf("Added inode %llu as orphan item root %llu\n",
5185                        ino, root->root_key.objectid);
5186
5187         btrfs_release_path(path);
5188         ret = btrfs_search_slot(NULL, root, &research_key, path, 0, 0);
5189         err |= ret;
5190
5191         return err;
5192 }
5193
5194 /*
5195  * Check INODE_ITEM and related ITEMs (the same inode number)
5196  * 1. check link count
5197  * 2. check inode ref/extref
5198  * 3. check dir item/index
5199  *
5200  * @ext_ref:    the EXTENDED_IREF feature
5201  *
5202  * Return 0 if no error occurred.
5203  * Return >0 for error or hit the traversal is done(by error bitmap)
5204  */
5205 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
5206                             unsigned int ext_ref)
5207 {
5208         struct extent_buffer *node;
5209         struct btrfs_inode_item *ii;
5210         struct btrfs_key key;
5211         u64 inode_id;
5212         u32 mode;
5213         u64 nlink;
5214         u64 nbytes;
5215         u64 isize;
5216         u64 size = 0;
5217         u64 refs = 0;
5218         u64 extent_end = 0;
5219         u64 extent_size = 0;
5220         unsigned int dir;
5221         unsigned int nodatasum;
5222         int slot;
5223         int ret;
5224         int err = 0;
5225
5226         node = path->nodes[0];
5227         slot = path->slots[0];
5228
5229         btrfs_item_key_to_cpu(node, &key, slot);
5230         inode_id = key.objectid;
5231
5232         if (inode_id == BTRFS_ORPHAN_OBJECTID) {
5233                 ret = btrfs_next_item(root, path);
5234                 if (ret > 0)
5235                         err |= LAST_ITEM;
5236                 return err;
5237         }
5238
5239         ii = btrfs_item_ptr(node, slot, struct btrfs_inode_item);
5240         isize = btrfs_inode_size(node, ii);
5241         nbytes = btrfs_inode_nbytes(node, ii);
5242         mode = btrfs_inode_mode(node, ii);
5243         dir = imode_to_type(mode) == BTRFS_FT_DIR;
5244         nlink = btrfs_inode_nlink(node, ii);
5245         nodatasum = btrfs_inode_flags(node, ii) & BTRFS_INODE_NODATASUM;
5246
5247         while (1) {
5248                 ret = btrfs_next_item(root, path);
5249                 if (ret < 0) {
5250                         /* out will fill 'err' rusing current statistics */
5251                         goto out;
5252                 } else if (ret > 0) {
5253                         err |= LAST_ITEM;
5254                         goto out;
5255                 }
5256
5257                 node = path->nodes[0];
5258                 slot = path->slots[0];
5259                 btrfs_item_key_to_cpu(node, &key, slot);
5260                 if (key.objectid != inode_id)
5261                         goto out;
5262
5263                 switch (key.type) {
5264                 case BTRFS_INODE_REF_KEY:
5265                         ret = check_inode_ref(root, &key, node, slot, &refs,
5266                                               mode);
5267                         err |= ret;
5268                         break;
5269                 case BTRFS_INODE_EXTREF_KEY:
5270                         if (key.type == BTRFS_INODE_EXTREF_KEY && !ext_ref)
5271                                 warning("root %llu EXTREF[%llu %llu] isn't supported",
5272                                         root->objectid, key.objectid,
5273                                         key.offset);
5274                         ret = check_inode_extref(root, &key, node, slot, &refs,
5275                                                  mode);
5276                         err |= ret;
5277                         break;
5278                 case BTRFS_DIR_ITEM_KEY:
5279                 case BTRFS_DIR_INDEX_KEY:
5280                         if (!dir) {
5281                                 warning("root %llu INODE[%llu] mode %u shouldn't have DIR_INDEX[%llu %llu]",
5282                                         root->objectid, inode_id,
5283                                         imode_to_type(mode), key.objectid,
5284                                         key.offset);
5285                         }
5286                         ret = check_dir_item(root, &key, node, slot, &size,
5287                                              ext_ref);
5288                         err |= ret;
5289                         break;
5290                 case BTRFS_EXTENT_DATA_KEY:
5291                         if (dir) {
5292                                 warning("root %llu DIR INODE[%llu] shouldn't EXTENT_DATA[%llu %llu]",
5293                                         root->objectid, inode_id, key.objectid,
5294                                         key.offset);
5295                         }
5296                         ret = check_file_extent(root, &key, node, slot,
5297                                                 nodatasum, &extent_size,
5298                                                 &extent_end);
5299                         err |= ret;
5300                         break;
5301                 case BTRFS_XATTR_ITEM_KEY:
5302                         break;
5303                 default:
5304                         error("ITEM[%llu %u %llu] UNKNOWN TYPE",
5305                               key.objectid, key.type, key.offset);
5306                 }
5307         }
5308
5309 out:
5310         /* verify INODE_ITEM nlink/isize/nbytes */
5311         if (dir) {
5312                 if (nlink != 1) {
5313                         err |= LINK_COUNT_ERROR;
5314                         error("root %llu DIR INODE[%llu] shouldn't have more than one link(%llu)",
5315                               root->objectid, inode_id, nlink);
5316                 }
5317
5318                 /*
5319                  * Just a warning, as dir inode nbytes is just an
5320                  * instructive value.
5321                  */
5322                 if (!IS_ALIGNED(nbytes, root->fs_info->nodesize)) {
5323                         warning("root %llu DIR INODE[%llu] nbytes should be aligned to %u",
5324                                 root->objectid, inode_id,
5325                                 root->fs_info->nodesize);
5326                 }
5327
5328                 if (isize != size) {
5329                         if (repair)
5330                                 ret = repair_dir_isize_lowmem(root, path,
5331                                                               inode_id, size);
5332                         if (!repair || ret) {
5333                                 err |= ISIZE_ERROR;
5334                                 error(
5335                 "root %llu DIR INODE [%llu] size %llu not equal to %llu",
5336                                       root->objectid, inode_id, isize, size);
5337                         }
5338                 }
5339         } else {
5340                 if (nlink != refs) {
5341                         err |= LINK_COUNT_ERROR;
5342                         error("root %llu INODE[%llu] nlink(%llu) not equal to inode_refs(%llu)",
5343                               root->objectid, inode_id, nlink, refs);
5344                 } else if (!nlink) {
5345                         if (repair)
5346                                 ret = repair_inode_orphan_item_lowmem(root,
5347                                                               path, inode_id);
5348                         if (!repair || ret) {
5349                                 err |= ORPHAN_ITEM;
5350                                 error("root %llu INODE[%llu] is orphan item",
5351                                       root->objectid, inode_id);
5352                         }
5353                 }
5354
5355                 if (!nbytes && !no_holes && extent_end < isize) {
5356                         err |= NBYTES_ERROR;
5357                         error("root %llu INODE[%llu] size (%llu) should have a file extent hole",
5358                               root->objectid, inode_id, isize);
5359                 }
5360
5361                 if (nbytes != extent_size) {
5362                         if (repair)
5363                                 ret = repair_inode_nbytes_lowmem(root, path,
5364                                                          inode_id, extent_size);
5365                         if (!repair || ret) {
5366                                 err |= NBYTES_ERROR;
5367                                 error(
5368         "root %llu INODE[%llu] nbytes %llu not equal to extent_size %llu",
5369                                       root->objectid, inode_id, nbytes,
5370                                       extent_size);
5371                         }
5372                 }
5373         }
5374
5375         return err;
5376 }
5377
5378 /*
5379  * check first root dir's inode_item and inode_ref
5380  *
5381  * returns 0 means no error
5382  * returns >0 means error
5383  * returns <0 means fatal error
5384  */
5385 static int check_fs_first_inode(struct btrfs_root *root, unsigned int ext_ref)
5386 {
5387         struct btrfs_path path;
5388         struct btrfs_key key;
5389         struct btrfs_inode_item *ii;
5390         u64 index;
5391         u32 mode;
5392         int err = 0;
5393         int ret;
5394
5395         key.objectid = BTRFS_FIRST_FREE_OBJECTID;
5396         key.type = BTRFS_INODE_ITEM_KEY;
5397         key.offset = 0;
5398
5399         /* For root being dropped, we don't need to check first inode */
5400         if (btrfs_root_refs(&root->root_item) == 0 &&
5401             btrfs_disk_key_objectid(&root->root_item.drop_progress) >=
5402             BTRFS_FIRST_FREE_OBJECTID)
5403                 return 0;
5404
5405         btrfs_init_path(&path);
5406         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
5407         if (ret < 0)
5408                 goto out;
5409         if (ret > 0) {
5410                 ret = 0;
5411                 err |= INODE_ITEM_MISSING;
5412         } else {
5413                 ii = btrfs_item_ptr(path.nodes[0], path.slots[0],
5414                                     struct btrfs_inode_item);
5415                 mode = btrfs_inode_mode(path.nodes[0], ii);
5416                 if (imode_to_type(mode) != BTRFS_FT_DIR)
5417                         err |= INODE_ITEM_MISMATCH;
5418         }
5419
5420         /* lookup first inode ref */
5421         key.offset = BTRFS_FIRST_FREE_OBJECTID;
5422         key.type = BTRFS_INODE_REF_KEY;
5423         /* special index value */
5424         index = 0;
5425
5426         ret = find_inode_ref(root, &key, "..", strlen(".."), &index, ext_ref);
5427         if (ret < 0)
5428                 goto out;
5429         err |= ret;
5430
5431 out:
5432         btrfs_release_path(&path);
5433         if (err & (INODE_ITEM_MISSING | INODE_ITEM_MISMATCH))
5434                 error("root dir INODE_ITEM is %s",
5435                       err & INODE_ITEM_MISMATCH ? "mismatch" : "missing");
5436         if (err & INODE_REF_MISSING)
5437                 error("root dir INODE_REF is missing");
5438
5439         return ret < 0 ? ret : err;
5440 }
5441
5442 static struct tree_backref *find_tree_backref(struct extent_record *rec,
5443                                                 u64 parent, u64 root)
5444 {
5445         struct rb_node *node;
5446         struct tree_backref *back = NULL;
5447         struct tree_backref match = {
5448                 .node = {
5449                         .is_data = 0,
5450                 },
5451         };
5452
5453         if (parent) {
5454                 match.parent = parent;
5455                 match.node.full_backref = 1;
5456         } else {
5457                 match.root = root;
5458         }
5459
5460         node = rb_search(&rec->backref_tree, &match.node.node,
5461                          (rb_compare_keys)compare_extent_backref, NULL);
5462         if (node)
5463                 back = to_tree_backref(rb_node_to_extent_backref(node));
5464
5465         return back;
5466 }
5467
5468 static struct data_backref *find_data_backref(struct extent_record *rec,
5469                                                 u64 parent, u64 root,
5470                                                 u64 owner, u64 offset,
5471                                                 int found_ref,
5472                                                 u64 disk_bytenr, u64 bytes)
5473 {
5474         struct rb_node *node;
5475         struct data_backref *back = NULL;
5476         struct data_backref match = {
5477                 .node = {
5478                         .is_data = 1,
5479                 },
5480                 .owner = owner,
5481                 .offset = offset,
5482                 .bytes = bytes,
5483                 .found_ref = found_ref,
5484                 .disk_bytenr = disk_bytenr,
5485         };
5486
5487         if (parent) {
5488                 match.parent = parent;
5489                 match.node.full_backref = 1;
5490         } else {
5491                 match.root = root;
5492         }
5493
5494         node = rb_search(&rec->backref_tree, &match.node.node,
5495                          (rb_compare_keys)compare_extent_backref, NULL);
5496         if (node)
5497                 back = to_data_backref(rb_node_to_extent_backref(node));
5498
5499         return back;
5500 }
5501 /*
5502  * Iterate all item on the tree and call check_inode_item() to check.
5503  *
5504  * @root:       the root of the tree to be checked.
5505  * @ext_ref:    the EXTENDED_IREF feature
5506  *
5507  * Return 0 if no error found.
5508  * Return <0 for error.
5509  */
5510 static int check_fs_root_v2(struct btrfs_root *root, unsigned int ext_ref)
5511 {
5512         struct btrfs_path path;
5513         struct node_refs nrefs;
5514         struct btrfs_root_item *root_item = &root->root_item;
5515         int ret;
5516         int level;
5517         int err = 0;
5518
5519         /*
5520          * We need to manually check the first inode item(256)
5521          * As the following traversal function will only start from
5522          * the first inode item in the leaf, if inode item(256) is missing
5523          * we will just skip it forever.
5524          */
5525         ret = check_fs_first_inode(root, ext_ref);
5526         if (ret < 0)
5527                 return ret;
5528         err |= !!ret;
5529
5530         memset(&nrefs, 0, sizeof(nrefs));
5531         level = btrfs_header_level(root->node);
5532         btrfs_init_path(&path);
5533
5534         if (btrfs_root_refs(root_item) > 0 ||
5535             btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
5536                 path.nodes[level] = root->node;
5537                 path.slots[level] = 0;
5538                 extent_buffer_get(root->node);
5539         } else {
5540                 struct btrfs_key key;
5541
5542                 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
5543                 level = root_item->drop_level;
5544                 path.lowest_level = level;
5545                 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
5546                 if (ret < 0)
5547                         goto out;
5548                 ret = 0;
5549         }
5550
5551         while (1) {
5552                 ret = walk_down_tree_v2(root, &path, &level, &nrefs, ext_ref);
5553                 err |= !!ret;
5554
5555                 /* if ret is negative, walk shall stop */
5556                 if (ret < 0) {
5557                         ret = err;
5558                         break;
5559                 }
5560
5561                 ret = walk_up_tree_v2(root, &path, &level);
5562                 if (ret != 0) {
5563                         /* Normal exit, reset ret to err */
5564                         ret = err;
5565                         break;
5566                 }
5567         }
5568
5569 out:
5570         btrfs_release_path(&path);
5571         return ret;
5572 }
5573
5574 /*
5575  * Find the relative ref for root_ref and root_backref.
5576  *
5577  * @root:       the root of the root tree.
5578  * @ref_key:    the key of the root ref.
5579  *
5580  * Return 0 if no error occurred.
5581  */
5582 static int check_root_ref(struct btrfs_root *root, struct btrfs_key *ref_key,
5583                           struct extent_buffer *node, int slot)
5584 {
5585         struct btrfs_path path;
5586         struct btrfs_key key;
5587         struct btrfs_root_ref *ref;
5588         struct btrfs_root_ref *backref;
5589         char ref_name[BTRFS_NAME_LEN] = {0};
5590         char backref_name[BTRFS_NAME_LEN] = {0};
5591         u64 ref_dirid;
5592         u64 ref_seq;
5593         u32 ref_namelen;
5594         u64 backref_dirid;
5595         u64 backref_seq;
5596         u32 backref_namelen;
5597         u32 len;
5598         int ret;
5599         int err = 0;
5600
5601         ref = btrfs_item_ptr(node, slot, struct btrfs_root_ref);
5602         ref_dirid = btrfs_root_ref_dirid(node, ref);
5603         ref_seq = btrfs_root_ref_sequence(node, ref);
5604         ref_namelen = btrfs_root_ref_name_len(node, ref);
5605
5606         if (ref_namelen <= BTRFS_NAME_LEN) {
5607                 len = ref_namelen;
5608         } else {
5609                 len = BTRFS_NAME_LEN;
5610                 warning("%s[%llu %llu] ref_name too long",
5611                         ref_key->type == BTRFS_ROOT_REF_KEY ?
5612                         "ROOT_REF" : "ROOT_BACKREF", ref_key->objectid,
5613                         ref_key->offset);
5614         }
5615         read_extent_buffer(node, ref_name, (unsigned long)(ref + 1), len);
5616
5617         /* Find relative root_ref */
5618         key.objectid = ref_key->offset;
5619         key.type = BTRFS_ROOT_BACKREF_KEY + BTRFS_ROOT_REF_KEY - ref_key->type;
5620         key.offset = ref_key->objectid;
5621
5622         btrfs_init_path(&path);
5623         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
5624         if (ret) {
5625                 err |= ROOT_REF_MISSING;
5626                 error("%s[%llu %llu] couldn't find relative ref",
5627                       ref_key->type == BTRFS_ROOT_REF_KEY ?
5628                       "ROOT_REF" : "ROOT_BACKREF",
5629                       ref_key->objectid, ref_key->offset);
5630                 goto out;
5631         }
5632
5633         backref = btrfs_item_ptr(path.nodes[0], path.slots[0],
5634                                  struct btrfs_root_ref);
5635         backref_dirid = btrfs_root_ref_dirid(path.nodes[0], backref);
5636         backref_seq = btrfs_root_ref_sequence(path.nodes[0], backref);
5637         backref_namelen = btrfs_root_ref_name_len(path.nodes[0], backref);
5638
5639         if (backref_namelen <= BTRFS_NAME_LEN) {
5640                 len = backref_namelen;
5641         } else {
5642                 len = BTRFS_NAME_LEN;
5643                 warning("%s[%llu %llu] ref_name too long",
5644                         key.type == BTRFS_ROOT_REF_KEY ?
5645                         "ROOT_REF" : "ROOT_BACKREF",
5646                         key.objectid, key.offset);
5647         }
5648         read_extent_buffer(path.nodes[0], backref_name,
5649                            (unsigned long)(backref + 1), len);
5650
5651         if (ref_dirid != backref_dirid || ref_seq != backref_seq ||
5652             ref_namelen != backref_namelen ||
5653             strncmp(ref_name, backref_name, len)) {
5654                 err |= ROOT_REF_MISMATCH;
5655                 error("%s[%llu %llu] mismatch relative ref",
5656                       ref_key->type == BTRFS_ROOT_REF_KEY ?
5657                       "ROOT_REF" : "ROOT_BACKREF",
5658                       ref_key->objectid, ref_key->offset);
5659         }
5660 out:
5661         btrfs_release_path(&path);
5662         return err;
5663 }
5664
5665 /*
5666  * Check all fs/file tree in low_memory mode.
5667  *
5668  * 1. for fs tree root item, call check_fs_root_v2()
5669  * 2. for fs tree root ref/backref, call check_root_ref()
5670  *
5671  * Return 0 if no error occurred.
5672  */
5673 static int check_fs_roots_v2(struct btrfs_fs_info *fs_info)
5674 {
5675         struct btrfs_root *tree_root = fs_info->tree_root;
5676         struct btrfs_root *cur_root = NULL;
5677         struct btrfs_path path;
5678         struct btrfs_key key;
5679         struct extent_buffer *node;
5680         unsigned int ext_ref;
5681         int slot;
5682         int ret;
5683         int err = 0;
5684
5685         ext_ref = btrfs_fs_incompat(fs_info, EXTENDED_IREF);
5686
5687         btrfs_init_path(&path);
5688         key.objectid = BTRFS_FS_TREE_OBJECTID;
5689         key.offset = 0;
5690         key.type = BTRFS_ROOT_ITEM_KEY;
5691
5692         ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
5693         if (ret < 0) {
5694                 err = ret;
5695                 goto out;
5696         } else if (ret > 0) {
5697                 err = -ENOENT;
5698                 goto out;
5699         }
5700
5701         while (1) {
5702                 node = path.nodes[0];
5703                 slot = path.slots[0];
5704                 btrfs_item_key_to_cpu(node, &key, slot);
5705                 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
5706                         goto out;
5707                 if (key.type == BTRFS_ROOT_ITEM_KEY &&
5708                     fs_root_objectid(key.objectid)) {
5709                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
5710                                 cur_root = btrfs_read_fs_root_no_cache(fs_info,
5711                                                                        &key);
5712                         } else {
5713                                 key.offset = (u64)-1;
5714                                 cur_root = btrfs_read_fs_root(fs_info, &key);
5715                         }
5716
5717                         if (IS_ERR(cur_root)) {
5718                                 error("Fail to read fs/subvol tree: %lld",
5719                                       key.objectid);
5720                                 err = -EIO;
5721                                 goto next;
5722                         }
5723
5724                         ret = check_fs_root_v2(cur_root, ext_ref);
5725                         err |= ret;
5726
5727                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
5728                                 btrfs_free_fs_root(cur_root);
5729                 } else if (key.type == BTRFS_ROOT_REF_KEY ||
5730                                 key.type == BTRFS_ROOT_BACKREF_KEY) {
5731                         ret = check_root_ref(tree_root, &key, node, slot);
5732                         err |= ret;
5733                 }
5734 next:
5735                 ret = btrfs_next_item(tree_root, &path);
5736                 if (ret > 0)
5737                         goto out;
5738                 if (ret < 0) {
5739                         err = ret;
5740                         goto out;
5741                 }
5742         }
5743
5744 out:
5745         btrfs_release_path(&path);
5746         return err;
5747 }
5748
5749 static int do_check_fs_roots(struct btrfs_fs_info *fs_info,
5750                           struct cache_tree *root_cache)
5751 {
5752         int ret;
5753
5754         if (!ctx.progress_enabled)
5755                 fprintf(stderr, "checking fs roots\n");
5756         if (check_mode == CHECK_MODE_LOWMEM)
5757                 ret = check_fs_roots_v2(fs_info);
5758         else
5759                 ret = check_fs_roots(fs_info, root_cache);
5760
5761         return ret;
5762 }
5763
5764 static int all_backpointers_checked(struct extent_record *rec, int print_errs)
5765 {
5766         struct extent_backref *back, *tmp;
5767         struct tree_backref *tback;
5768         struct data_backref *dback;
5769         u64 found = 0;
5770         int err = 0;
5771
5772         rbtree_postorder_for_each_entry_safe(back, tmp,
5773                                              &rec->backref_tree, node) {
5774                 if (!back->found_extent_tree) {
5775                         err = 1;
5776                         if (!print_errs)
5777                                 goto out;
5778                         if (back->is_data) {
5779                                 dback = to_data_backref(back);
5780                                 fprintf(stderr, "Data backref %llu %s %llu"
5781                                         " owner %llu offset %llu num_refs %lu"
5782                                         " not found in extent tree\n",
5783                                         (unsigned long long)rec->start,
5784                                         back->full_backref ?
5785                                         "parent" : "root",
5786                                         back->full_backref ?
5787                                         (unsigned long long)dback->parent:
5788                                         (unsigned long long)dback->root,
5789                                         (unsigned long long)dback->owner,
5790                                         (unsigned long long)dback->offset,
5791                                         (unsigned long)dback->num_refs);
5792                         } else {
5793                                 tback = to_tree_backref(back);
5794                                 fprintf(stderr, "Tree backref %llu parent %llu"
5795                                         " root %llu not found in extent tree\n",
5796                                         (unsigned long long)rec->start,
5797                                         (unsigned long long)tback->parent,
5798                                         (unsigned long long)tback->root);
5799                         }
5800                 }
5801                 if (!back->is_data && !back->found_ref) {
5802                         err = 1;
5803                         if (!print_errs)
5804                                 goto out;
5805                         tback = to_tree_backref(back);
5806                         fprintf(stderr, "Backref %llu %s %llu not referenced back %p\n",
5807                                 (unsigned long long)rec->start,
5808                                 back->full_backref ? "parent" : "root",
5809                                 back->full_backref ?
5810                                 (unsigned long long)tback->parent :
5811                                 (unsigned long long)tback->root, back);
5812                 }
5813                 if (back->is_data) {
5814                         dback = to_data_backref(back);
5815                         if (dback->found_ref != dback->num_refs) {
5816                                 err = 1;
5817                                 if (!print_errs)
5818                                         goto out;
5819                                 fprintf(stderr, "Incorrect local backref count"
5820                                         " on %llu %s %llu owner %llu"
5821                                         " offset %llu found %u wanted %u back %p\n",
5822                                         (unsigned long long)rec->start,
5823                                         back->full_backref ?
5824                                         "parent" : "root",
5825                                         back->full_backref ?
5826                                         (unsigned long long)dback->parent:
5827                                         (unsigned long long)dback->root,
5828                                         (unsigned long long)dback->owner,
5829                                         (unsigned long long)dback->offset,
5830                                         dback->found_ref, dback->num_refs, back);
5831                         }
5832                         if (dback->disk_bytenr != rec->start) {
5833                                 err = 1;
5834                                 if (!print_errs)
5835                                         goto out;
5836                                 fprintf(stderr, "Backref disk bytenr does not"
5837                                         " match extent record, bytenr=%llu, "
5838                                         "ref bytenr=%llu\n",
5839                                         (unsigned long long)rec->start,
5840                                         (unsigned long long)dback->disk_bytenr);
5841                         }
5842
5843                         if (dback->bytes != rec->nr) {
5844                                 err = 1;
5845                                 if (!print_errs)
5846                                         goto out;
5847                                 fprintf(stderr, "Backref bytes do not match "
5848                                         "extent backref, bytenr=%llu, ref "
5849                                         "bytes=%llu, backref bytes=%llu\n",
5850                                         (unsigned long long)rec->start,
5851                                         (unsigned long long)rec->nr,
5852                                         (unsigned long long)dback->bytes);
5853                         }
5854                 }
5855                 if (!back->is_data) {
5856                         found += 1;
5857                 } else {
5858                         dback = to_data_backref(back);
5859                         found += dback->found_ref;
5860                 }
5861         }
5862         if (found != rec->refs) {
5863                 err = 1;
5864                 if (!print_errs)
5865                         goto out;
5866                 fprintf(stderr, "Incorrect global backref count "
5867                         "on %llu found %llu wanted %llu\n",
5868                         (unsigned long long)rec->start,
5869                         (unsigned long long)found,
5870                         (unsigned long long)rec->refs);
5871         }
5872 out:
5873         return err;
5874 }
5875
5876 static void __free_one_backref(struct rb_node *node)
5877 {
5878         struct extent_backref *back = rb_node_to_extent_backref(node);
5879
5880         free(back);
5881 }
5882
5883 static void free_all_extent_backrefs(struct extent_record *rec)
5884 {
5885         rb_free_nodes(&rec->backref_tree, __free_one_backref);
5886 }
5887
5888 static void free_extent_record_cache(struct cache_tree *extent_cache)
5889 {
5890         struct cache_extent *cache;
5891         struct extent_record *rec;
5892
5893         while (1) {
5894                 cache = first_cache_extent(extent_cache);
5895                 if (!cache)
5896                         break;
5897                 rec = container_of(cache, struct extent_record, cache);
5898                 remove_cache_extent(extent_cache, cache);
5899                 free_all_extent_backrefs(rec);
5900                 free(rec);
5901         }
5902 }
5903
5904 static int maybe_free_extent_rec(struct cache_tree *extent_cache,
5905                                  struct extent_record *rec)
5906 {
5907         if (rec->content_checked && rec->owner_ref_checked &&
5908             rec->extent_item_refs == rec->refs && rec->refs > 0 &&
5909             rec->num_duplicates == 0 && !all_backpointers_checked(rec, 0) &&
5910             !rec->bad_full_backref && !rec->crossing_stripes &&
5911             !rec->wrong_chunk_type) {
5912                 remove_cache_extent(extent_cache, &rec->cache);
5913                 free_all_extent_backrefs(rec);
5914                 list_del_init(&rec->list);
5915                 free(rec);
5916         }
5917         return 0;
5918 }
5919
5920 static int check_owner_ref(struct btrfs_root *root,
5921                             struct extent_record *rec,
5922                             struct extent_buffer *buf)
5923 {
5924         struct extent_backref *node, *tmp;
5925         struct tree_backref *back;
5926         struct btrfs_root *ref_root;
5927         struct btrfs_key key;
5928         struct btrfs_path path;
5929         struct extent_buffer *parent;
5930         int level;
5931         int found = 0;
5932         int ret;
5933
5934         rbtree_postorder_for_each_entry_safe(node, tmp,
5935                                              &rec->backref_tree, node) {
5936                 if (node->is_data)
5937                         continue;
5938                 if (!node->found_ref)
5939                         continue;
5940                 if (node->full_backref)
5941                         continue;
5942                 back = to_tree_backref(node);
5943                 if (btrfs_header_owner(buf) == back->root)
5944                         return 0;
5945         }
5946         BUG_ON(rec->is_root);
5947
5948         /* try to find the block by search corresponding fs tree */
5949         key.objectid = btrfs_header_owner(buf);
5950         key.type = BTRFS_ROOT_ITEM_KEY;
5951         key.offset = (u64)-1;
5952
5953         ref_root = btrfs_read_fs_root(root->fs_info, &key);
5954         if (IS_ERR(ref_root))
5955                 return 1;
5956
5957         level = btrfs_header_level(buf);
5958         if (level == 0)
5959                 btrfs_item_key_to_cpu(buf, &key, 0);
5960         else
5961                 btrfs_node_key_to_cpu(buf, &key, 0);
5962
5963         btrfs_init_path(&path);
5964         path.lowest_level = level + 1;
5965         ret = btrfs_search_slot(NULL, ref_root, &key, &path, 0, 0);
5966         if (ret < 0)
5967                 return 0;
5968
5969         parent = path.nodes[level + 1];
5970         if (parent && buf->start == btrfs_node_blockptr(parent,
5971                                                         path.slots[level + 1]))
5972                 found = 1;
5973
5974         btrfs_release_path(&path);
5975         return found ? 0 : 1;
5976 }
5977
5978 static int is_extent_tree_record(struct extent_record *rec)
5979 {
5980         struct extent_backref *node, *tmp;
5981         struct tree_backref *back;
5982         int is_extent = 0;
5983
5984         rbtree_postorder_for_each_entry_safe(node, tmp,
5985                                              &rec->backref_tree, node) {
5986                 if (node->is_data)
5987                         return 0;
5988                 back = to_tree_backref(node);
5989                 if (node->full_backref)
5990                         return 0;
5991                 if (back->root == BTRFS_EXTENT_TREE_OBJECTID)
5992                         is_extent = 1;
5993         }
5994         return is_extent;
5995 }
5996
5997
5998 static int record_bad_block_io(struct btrfs_fs_info *info,
5999                                struct cache_tree *extent_cache,
6000                                u64 start, u64 len)
6001 {
6002         struct extent_record *rec;
6003         struct cache_extent *cache;
6004         struct btrfs_key key;
6005
6006         cache = lookup_cache_extent(extent_cache, start, len);
6007         if (!cache)
6008                 return 0;
6009
6010         rec = container_of(cache, struct extent_record, cache);
6011         if (!is_extent_tree_record(rec))
6012                 return 0;
6013
6014         btrfs_disk_key_to_cpu(&key, &rec->parent_key);
6015         return btrfs_add_corrupt_extent_record(info, &key, start, len, 0);
6016 }
6017
6018 static int swap_values(struct btrfs_root *root, struct btrfs_path *path,
6019                        struct extent_buffer *buf, int slot)
6020 {
6021         if (btrfs_header_level(buf)) {
6022                 struct btrfs_key_ptr ptr1, ptr2;
6023
6024                 read_extent_buffer(buf, &ptr1, btrfs_node_key_ptr_offset(slot),
6025                                    sizeof(struct btrfs_key_ptr));
6026                 read_extent_buffer(buf, &ptr2,
6027                                    btrfs_node_key_ptr_offset(slot + 1),
6028                                    sizeof(struct btrfs_key_ptr));
6029                 write_extent_buffer(buf, &ptr1,
6030                                     btrfs_node_key_ptr_offset(slot + 1),
6031                                     sizeof(struct btrfs_key_ptr));
6032                 write_extent_buffer(buf, &ptr2,
6033                                     btrfs_node_key_ptr_offset(slot),
6034                                     sizeof(struct btrfs_key_ptr));
6035                 if (slot == 0) {
6036                         struct btrfs_disk_key key;
6037                         btrfs_node_key(buf, &key, 0);
6038                         btrfs_fixup_low_keys(root, path, &key,
6039                                              btrfs_header_level(buf) + 1);
6040                 }
6041         } else {
6042                 struct btrfs_item *item1, *item2;
6043                 struct btrfs_key k1, k2;
6044                 char *item1_data, *item2_data;
6045                 u32 item1_offset, item2_offset, item1_size, item2_size;
6046
6047                 item1 = btrfs_item_nr(slot);
6048                 item2 = btrfs_item_nr(slot + 1);
6049                 btrfs_item_key_to_cpu(buf, &k1, slot);
6050                 btrfs_item_key_to_cpu(buf, &k2, slot + 1);
6051                 item1_offset = btrfs_item_offset(buf, item1);
6052                 item2_offset = btrfs_item_offset(buf, item2);
6053                 item1_size = btrfs_item_size(buf, item1);
6054                 item2_size = btrfs_item_size(buf, item2);
6055
6056                 item1_data = malloc(item1_size);
6057                 if (!item1_data)
6058                         return -ENOMEM;
6059                 item2_data = malloc(item2_size);
6060                 if (!item2_data) {
6061                         free(item1_data);
6062                         return -ENOMEM;
6063                 }
6064
6065                 read_extent_buffer(buf, item1_data, item1_offset, item1_size);
6066                 read_extent_buffer(buf, item2_data, item2_offset, item2_size);
6067
6068                 write_extent_buffer(buf, item1_data, item2_offset, item2_size);
6069                 write_extent_buffer(buf, item2_data, item1_offset, item1_size);
6070                 free(item1_data);
6071                 free(item2_data);
6072
6073                 btrfs_set_item_offset(buf, item1, item2_offset);
6074                 btrfs_set_item_offset(buf, item2, item1_offset);
6075                 btrfs_set_item_size(buf, item1, item2_size);
6076                 btrfs_set_item_size(buf, item2, item1_size);
6077
6078                 path->slots[0] = slot;
6079                 btrfs_set_item_key_unsafe(root, path, &k2);
6080                 path->slots[0] = slot + 1;
6081                 btrfs_set_item_key_unsafe(root, path, &k1);
6082         }
6083         return 0;
6084 }
6085
6086 static int fix_key_order(struct btrfs_root *root, struct btrfs_path *path)
6087 {
6088         struct extent_buffer *buf;
6089         struct btrfs_key k1, k2;
6090         int i;
6091         int level = path->lowest_level;
6092         int ret = -EIO;
6093
6094         buf = path->nodes[level];
6095         for (i = 0; i < btrfs_header_nritems(buf) - 1; i++) {
6096                 if (level) {
6097                         btrfs_node_key_to_cpu(buf, &k1, i);
6098                         btrfs_node_key_to_cpu(buf, &k2, i + 1);
6099                 } else {
6100                         btrfs_item_key_to_cpu(buf, &k1, i);
6101                         btrfs_item_key_to_cpu(buf, &k2, i + 1);
6102                 }
6103                 if (btrfs_comp_cpu_keys(&k1, &k2) < 0)
6104                         continue;
6105                 ret = swap_values(root, path, buf, i);
6106                 if (ret)
6107                         break;
6108                 btrfs_mark_buffer_dirty(buf);
6109                 i = 0;
6110         }
6111         return ret;
6112 }
6113
6114 static int delete_bogus_item(struct btrfs_root *root,
6115                              struct btrfs_path *path,
6116                              struct extent_buffer *buf, int slot)
6117 {
6118         struct btrfs_key key;
6119         int nritems = btrfs_header_nritems(buf);
6120
6121         btrfs_item_key_to_cpu(buf, &key, slot);
6122
6123         /* These are all the keys we can deal with missing. */
6124         if (key.type != BTRFS_DIR_INDEX_KEY &&
6125             key.type != BTRFS_EXTENT_ITEM_KEY &&
6126             key.type != BTRFS_METADATA_ITEM_KEY &&
6127             key.type != BTRFS_TREE_BLOCK_REF_KEY &&
6128             key.type != BTRFS_EXTENT_DATA_REF_KEY)
6129                 return -1;
6130
6131         printf("Deleting bogus item [%llu,%u,%llu] at slot %d on block %llu\n",
6132                (unsigned long long)key.objectid, key.type,
6133                (unsigned long long)key.offset, slot, buf->start);
6134         memmove_extent_buffer(buf, btrfs_item_nr_offset(slot),
6135                               btrfs_item_nr_offset(slot + 1),
6136                               sizeof(struct btrfs_item) *
6137                               (nritems - slot - 1));
6138         btrfs_set_header_nritems(buf, nritems - 1);
6139         if (slot == 0) {
6140                 struct btrfs_disk_key disk_key;
6141
6142                 btrfs_item_key(buf, &disk_key, 0);
6143                 btrfs_fixup_low_keys(root, path, &disk_key, 1);
6144         }
6145         btrfs_mark_buffer_dirty(buf);
6146         return 0;
6147 }
6148
6149 static int fix_item_offset(struct btrfs_root *root, struct btrfs_path *path)
6150 {
6151         struct extent_buffer *buf;
6152         int i;
6153         int ret = 0;
6154
6155         /* We should only get this for leaves */
6156         BUG_ON(path->lowest_level);
6157         buf = path->nodes[0];
6158 again:
6159         for (i = 0; i < btrfs_header_nritems(buf); i++) {
6160                 unsigned int shift = 0, offset;
6161
6162                 if (i == 0 && btrfs_item_end_nr(buf, i) !=
6163                     BTRFS_LEAF_DATA_SIZE(root)) {
6164                         if (btrfs_item_end_nr(buf, i) >
6165                             BTRFS_LEAF_DATA_SIZE(root)) {
6166                                 ret = delete_bogus_item(root, path, buf, i);
6167                                 if (!ret)
6168                                         goto again;
6169                                 fprintf(stderr, "item is off the end of the "
6170                                         "leaf, can't fix\n");
6171                                 ret = -EIO;
6172                                 break;
6173                         }
6174                         shift = BTRFS_LEAF_DATA_SIZE(root) -
6175                                 btrfs_item_end_nr(buf, i);
6176                 } else if (i > 0 && btrfs_item_end_nr(buf, i) !=
6177                            btrfs_item_offset_nr(buf, i - 1)) {
6178                         if (btrfs_item_end_nr(buf, i) >
6179                             btrfs_item_offset_nr(buf, i - 1)) {
6180                                 ret = delete_bogus_item(root, path, buf, i);
6181                                 if (!ret)
6182                                         goto again;
6183                                 fprintf(stderr, "items overlap, can't fix\n");
6184                                 ret = -EIO;
6185                                 break;
6186                         }
6187                         shift = btrfs_item_offset_nr(buf, i - 1) -
6188                                 btrfs_item_end_nr(buf, i);
6189                 }
6190                 if (!shift)
6191                         continue;
6192
6193                 printf("Shifting item nr %d by %u bytes in block %llu\n",
6194                        i, shift, (unsigned long long)buf->start);
6195                 offset = btrfs_item_offset_nr(buf, i);
6196                 memmove_extent_buffer(buf,
6197                                       btrfs_leaf_data(buf) + offset + shift,
6198                                       btrfs_leaf_data(buf) + offset,
6199                                       btrfs_item_size_nr(buf, i));
6200                 btrfs_set_item_offset(buf, btrfs_item_nr(i),
6201                                       offset + shift);
6202                 btrfs_mark_buffer_dirty(buf);
6203         }
6204
6205         /*
6206          * We may have moved things, in which case we want to exit so we don't
6207          * write those changes out.  Once we have proper abort functionality in
6208          * progs this can be changed to something nicer.
6209          */
6210         BUG_ON(ret);
6211         return ret;
6212 }
6213
6214 /*
6215  * Attempt to fix basic block failures.  If we can't fix it for whatever reason
6216  * then just return -EIO.
6217  */
6218 static int try_to_fix_bad_block(struct btrfs_root *root,
6219                                 struct extent_buffer *buf,
6220                                 enum btrfs_tree_block_status status)
6221 {
6222         struct btrfs_trans_handle *trans;
6223         struct ulist *roots;
6224         struct ulist_node *node;
6225         struct btrfs_root *search_root;
6226         struct btrfs_path path;
6227         struct ulist_iterator iter;
6228         struct btrfs_key root_key, key;
6229         int ret;
6230
6231         if (status != BTRFS_TREE_BLOCK_BAD_KEY_ORDER &&
6232             status != BTRFS_TREE_BLOCK_INVALID_OFFSETS)
6233                 return -EIO;
6234
6235         ret = btrfs_find_all_roots(NULL, root->fs_info, buf->start, 0, &roots);
6236         if (ret)
6237                 return -EIO;
6238
6239         btrfs_init_path(&path);
6240         ULIST_ITER_INIT(&iter);
6241         while ((node = ulist_next(roots, &iter))) {
6242                 root_key.objectid = node->val;
6243                 root_key.type = BTRFS_ROOT_ITEM_KEY;
6244                 root_key.offset = (u64)-1;
6245
6246                 search_root = btrfs_read_fs_root(root->fs_info, &root_key);
6247                 if (IS_ERR(root)) {
6248                         ret = -EIO;
6249                         break;
6250                 }
6251
6252
6253                 trans = btrfs_start_transaction(search_root, 0);
6254                 if (IS_ERR(trans)) {
6255                         ret = PTR_ERR(trans);
6256                         break;
6257                 }
6258
6259                 path.lowest_level = btrfs_header_level(buf);
6260                 path.skip_check_block = 1;
6261                 if (path.lowest_level)
6262                         btrfs_node_key_to_cpu(buf, &key, 0);
6263                 else
6264                         btrfs_item_key_to_cpu(buf, &key, 0);
6265                 ret = btrfs_search_slot(trans, search_root, &key, &path, 0, 1);
6266                 if (ret) {
6267                         ret = -EIO;
6268                         btrfs_commit_transaction(trans, search_root);
6269                         break;
6270                 }
6271                 if (status == BTRFS_TREE_BLOCK_BAD_KEY_ORDER)
6272                         ret = fix_key_order(search_root, &path);
6273                 else if (status == BTRFS_TREE_BLOCK_INVALID_OFFSETS)
6274                         ret = fix_item_offset(search_root, &path);
6275                 if (ret) {
6276                         btrfs_commit_transaction(trans, search_root);
6277                         break;
6278                 }
6279                 btrfs_release_path(&path);
6280                 btrfs_commit_transaction(trans, search_root);
6281         }
6282         ulist_free(roots);
6283         btrfs_release_path(&path);
6284         return ret;
6285 }
6286
6287 static int check_block(struct btrfs_root *root,
6288                        struct cache_tree *extent_cache,
6289                        struct extent_buffer *buf, u64 flags)
6290 {
6291         struct extent_record *rec;
6292         struct cache_extent *cache;
6293         struct btrfs_key key;
6294         enum btrfs_tree_block_status status;
6295         int ret = 0;
6296         int level;
6297
6298         cache = lookup_cache_extent(extent_cache, buf->start, buf->len);
6299         if (!cache)
6300                 return 1;
6301         rec = container_of(cache, struct extent_record, cache);
6302         rec->generation = btrfs_header_generation(buf);
6303
6304         level = btrfs_header_level(buf);
6305         if (btrfs_header_nritems(buf) > 0) {
6306
6307                 if (level == 0)
6308                         btrfs_item_key_to_cpu(buf, &key, 0);
6309                 else
6310                         btrfs_node_key_to_cpu(buf, &key, 0);
6311
6312                 rec->info_objectid = key.objectid;
6313         }
6314         rec->info_level = level;
6315
6316         if (btrfs_is_leaf(buf))
6317                 status = btrfs_check_leaf(root, &rec->parent_key, buf);
6318         else
6319                 status = btrfs_check_node(root, &rec->parent_key, buf);
6320
6321         if (status != BTRFS_TREE_BLOCK_CLEAN) {
6322                 if (repair)
6323                         status = try_to_fix_bad_block(root, buf, status);
6324                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
6325                         ret = -EIO;
6326                         fprintf(stderr, "bad block %llu\n",
6327                                 (unsigned long long)buf->start);
6328                 } else {
6329                         /*
6330                          * Signal to callers we need to start the scan over
6331                          * again since we'll have cowed blocks.
6332                          */
6333                         ret = -EAGAIN;
6334                 }
6335         } else {
6336                 rec->content_checked = 1;
6337                 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
6338                         rec->owner_ref_checked = 1;
6339                 else {
6340                         ret = check_owner_ref(root, rec, buf);
6341                         if (!ret)
6342                                 rec->owner_ref_checked = 1;
6343                 }
6344         }
6345         if (!ret)
6346                 maybe_free_extent_rec(extent_cache, rec);
6347         return ret;
6348 }
6349
6350 #if 0
6351 static struct tree_backref *find_tree_backref(struct extent_record *rec,
6352                                                 u64 parent, u64 root)
6353 {
6354         struct list_head *cur = rec->backrefs.next;
6355         struct extent_backref *node;
6356         struct tree_backref *back;
6357
6358         while(cur != &rec->backrefs) {
6359                 node = to_extent_backref(cur);
6360                 cur = cur->next;
6361                 if (node->is_data)
6362                         continue;
6363                 back = to_tree_backref(node);
6364                 if (parent > 0) {
6365                         if (!node->full_backref)
6366                                 continue;
6367                         if (parent == back->parent)
6368                                 return back;
6369                 } else {
6370                         if (node->full_backref)
6371                                 continue;
6372                         if (back->root == root)
6373                                 return back;
6374                 }
6375         }
6376         return NULL;
6377 }
6378 #endif
6379
6380 static struct tree_backref *alloc_tree_backref(struct extent_record *rec,
6381                                                 u64 parent, u64 root)
6382 {
6383         struct tree_backref *ref = malloc(sizeof(*ref));
6384
6385         if (!ref)
6386                 return NULL;
6387         memset(&ref->node, 0, sizeof(ref->node));
6388         if (parent > 0) {
6389                 ref->parent = parent;
6390                 ref->node.full_backref = 1;
6391         } else {
6392                 ref->root = root;
6393                 ref->node.full_backref = 0;
6394         }
6395
6396         return ref;
6397 }
6398
6399 #if 0
6400 static struct data_backref *find_data_backref(struct extent_record *rec,
6401                                                 u64 parent, u64 root,
6402                                                 u64 owner, u64 offset,
6403                                                 int found_ref,
6404                                                 u64 disk_bytenr, u64 bytes)
6405 {
6406         struct list_head *cur = rec->backrefs.next;
6407         struct extent_backref *node;
6408         struct data_backref *back;
6409
6410         while(cur != &rec->backrefs) {
6411                 node = to_extent_backref(cur);
6412                 cur = cur->next;
6413                 if (!node->is_data)
6414                         continue;
6415                 back = to_data_backref(node);
6416                 if (parent > 0) {
6417                         if (!node->full_backref)
6418                                 continue;
6419                         if (parent == back->parent)
6420                                 return back;
6421                 } else {
6422                         if (node->full_backref)
6423                                 continue;
6424                         if (back->root == root && back->owner == owner &&
6425                             back->offset == offset) {
6426                                 if (found_ref && node->found_ref &&
6427                                     (back->bytes != bytes ||
6428                                     back->disk_bytenr != disk_bytenr))
6429                                         continue;
6430                                 return back;
6431                         }
6432                 }
6433         }
6434         return NULL;
6435 }
6436 #endif
6437
6438 static struct data_backref *alloc_data_backref(struct extent_record *rec,
6439                                                 u64 parent, u64 root,
6440                                                 u64 owner, u64 offset,
6441                                                 u64 max_size)
6442 {
6443         struct data_backref *ref = malloc(sizeof(*ref));
6444
6445         if (!ref)
6446                 return NULL;
6447         memset(&ref->node, 0, sizeof(ref->node));
6448         ref->node.is_data = 1;
6449
6450         if (parent > 0) {
6451                 ref->parent = parent;
6452                 ref->owner = 0;
6453                 ref->offset = 0;
6454                 ref->node.full_backref = 1;
6455         } else {
6456                 ref->root = root;
6457                 ref->owner = owner;
6458                 ref->offset = offset;
6459                 ref->node.full_backref = 0;
6460         }
6461         ref->bytes = max_size;
6462         ref->found_ref = 0;
6463         ref->num_refs = 0;
6464         if (max_size > rec->max_size)
6465                 rec->max_size = max_size;
6466         return ref;
6467 }
6468
6469 /* Check if the type of extent matches with its chunk */
6470 static void check_extent_type(struct extent_record *rec)
6471 {
6472         struct btrfs_block_group_cache *bg_cache;
6473
6474         bg_cache = btrfs_lookup_first_block_group(global_info, rec->start);
6475         if (!bg_cache)
6476                 return;
6477
6478         /* data extent, check chunk directly*/
6479         if (!rec->metadata) {
6480                 if (!(bg_cache->flags & BTRFS_BLOCK_GROUP_DATA))
6481                         rec->wrong_chunk_type = 1;
6482                 return;
6483         }
6484
6485         /* metadata extent, check the obvious case first */
6486         if (!(bg_cache->flags & (BTRFS_BLOCK_GROUP_SYSTEM |
6487                                  BTRFS_BLOCK_GROUP_METADATA))) {
6488                 rec->wrong_chunk_type = 1;
6489                 return;
6490         }
6491
6492         /*
6493          * Check SYSTEM extent, as it's also marked as metadata, we can only
6494          * make sure it's a SYSTEM extent by its backref
6495          */
6496         if (!RB_EMPTY_ROOT(&rec->backref_tree)) {
6497                 struct extent_backref *node;
6498                 struct tree_backref *tback;
6499                 u64 bg_type;
6500
6501                 node = rb_node_to_extent_backref(rb_first(&rec->backref_tree));
6502                 if (node->is_data) {
6503                         /* tree block shouldn't have data backref */
6504                         rec->wrong_chunk_type = 1;
6505                         return;
6506                 }
6507                 tback = container_of(node, struct tree_backref, node);
6508
6509                 if (tback->root == BTRFS_CHUNK_TREE_OBJECTID)
6510                         bg_type = BTRFS_BLOCK_GROUP_SYSTEM;
6511                 else
6512                         bg_type = BTRFS_BLOCK_GROUP_METADATA;
6513                 if (!(bg_cache->flags & bg_type))
6514                         rec->wrong_chunk_type = 1;
6515         }
6516 }
6517
6518 /*
6519  * Allocate a new extent record, fill default values from @tmpl and insert int
6520  * @extent_cache. Caller is supposed to make sure the [start,nr) is not in
6521  * the cache, otherwise it fails.
6522  */
6523 static int add_extent_rec_nolookup(struct cache_tree *extent_cache,
6524                 struct extent_record *tmpl)
6525 {
6526         struct extent_record *rec;
6527         int ret = 0;
6528
6529         BUG_ON(tmpl->max_size == 0);
6530         rec = malloc(sizeof(*rec));
6531         if (!rec)
6532                 return -ENOMEM;
6533         rec->start = tmpl->start;
6534         rec->max_size = tmpl->max_size;
6535         rec->nr = max(tmpl->nr, tmpl->max_size);
6536         rec->found_rec = tmpl->found_rec;
6537         rec->content_checked = tmpl->content_checked;
6538         rec->owner_ref_checked = tmpl->owner_ref_checked;
6539         rec->num_duplicates = 0;
6540         rec->metadata = tmpl->metadata;
6541         rec->flag_block_full_backref = FLAG_UNSET;
6542         rec->bad_full_backref = 0;
6543         rec->crossing_stripes = 0;
6544         rec->wrong_chunk_type = 0;
6545         rec->is_root = tmpl->is_root;
6546         rec->refs = tmpl->refs;
6547         rec->extent_item_refs = tmpl->extent_item_refs;
6548         rec->parent_generation = tmpl->parent_generation;
6549         INIT_LIST_HEAD(&rec->backrefs);
6550         INIT_LIST_HEAD(&rec->dups);
6551         INIT_LIST_HEAD(&rec->list);
6552         rec->backref_tree = RB_ROOT;
6553         memcpy(&rec->parent_key, &tmpl->parent_key, sizeof(tmpl->parent_key));
6554         rec->cache.start = tmpl->start;
6555         rec->cache.size = tmpl->nr;
6556         ret = insert_cache_extent(extent_cache, &rec->cache);
6557         if (ret) {
6558                 free(rec);
6559                 return ret;
6560         }
6561         bytes_used += rec->nr;
6562
6563         if (tmpl->metadata)
6564                 rec->crossing_stripes = check_crossing_stripes(global_info,
6565                                 rec->start, global_info->nodesize);
6566         check_extent_type(rec);
6567         return ret;
6568 }
6569
6570 /*
6571  * Lookup and modify an extent, some values of @tmpl are interpreted verbatim,
6572  * some are hints:
6573  * - refs              - if found, increase refs
6574  * - is_root           - if found, set
6575  * - content_checked   - if found, set
6576  * - owner_ref_checked - if found, set
6577  *
6578  * If not found, create a new one, initialize and insert.
6579  */
6580 static int add_extent_rec(struct cache_tree *extent_cache,
6581                 struct extent_record *tmpl)
6582 {
6583         struct extent_record *rec;
6584         struct cache_extent *cache;
6585         int ret = 0;
6586         int dup = 0;
6587
6588         cache = lookup_cache_extent(extent_cache, tmpl->start, tmpl->nr);
6589         if (cache) {
6590                 rec = container_of(cache, struct extent_record, cache);
6591                 if (tmpl->refs)
6592                         rec->refs++;
6593                 if (rec->nr == 1)
6594                         rec->nr = max(tmpl->nr, tmpl->max_size);
6595
6596                 /*
6597                  * We need to make sure to reset nr to whatever the extent
6598                  * record says was the real size, this way we can compare it to
6599                  * the backrefs.
6600                  */
6601                 if (tmpl->found_rec) {
6602                         if (tmpl->start != rec->start || rec->found_rec) {
6603                                 struct extent_record *tmp;
6604
6605                                 dup = 1;
6606                                 if (list_empty(&rec->list))
6607                                         list_add_tail(&rec->list,
6608                                                       &duplicate_extents);
6609
6610                                 /*
6611                                  * We have to do this song and dance in case we
6612                                  * find an extent record that falls inside of
6613                                  * our current extent record but does not have
6614                                  * the same objectid.
6615                                  */
6616                                 tmp = malloc(sizeof(*tmp));
6617                                 if (!tmp)
6618                                         return -ENOMEM;
6619                                 tmp->start = tmpl->start;
6620                                 tmp->max_size = tmpl->max_size;
6621                                 tmp->nr = tmpl->nr;
6622                                 tmp->found_rec = 1;
6623                                 tmp->metadata = tmpl->metadata;
6624                                 tmp->extent_item_refs = tmpl->extent_item_refs;
6625                                 INIT_LIST_HEAD(&tmp->list);
6626                                 list_add_tail(&tmp->list, &rec->dups);
6627                                 rec->num_duplicates++;
6628                         } else {
6629                                 rec->nr = tmpl->nr;
6630                                 rec->found_rec = 1;
6631                         }
6632                 }
6633
6634                 if (tmpl->extent_item_refs && !dup) {
6635                         if (rec->extent_item_refs) {
6636                                 fprintf(stderr, "block %llu rec "
6637                                         "extent_item_refs %llu, passed %llu\n",
6638                                         (unsigned long long)tmpl->start,
6639                                         (unsigned long long)
6640                                                         rec->extent_item_refs,
6641                                         (unsigned long long)tmpl->extent_item_refs);
6642                         }
6643                         rec->extent_item_refs = tmpl->extent_item_refs;
6644                 }
6645                 if (tmpl->is_root)
6646                         rec->is_root = 1;
6647                 if (tmpl->content_checked)
6648                         rec->content_checked = 1;
6649                 if (tmpl->owner_ref_checked)
6650                         rec->owner_ref_checked = 1;
6651                 memcpy(&rec->parent_key, &tmpl->parent_key,
6652                                 sizeof(tmpl->parent_key));
6653                 if (tmpl->parent_generation)
6654                         rec->parent_generation = tmpl->parent_generation;
6655                 if (rec->max_size < tmpl->max_size)
6656                         rec->max_size = tmpl->max_size;
6657
6658                 /*
6659                  * A metadata extent can't cross stripe_len boundary, otherwise
6660                  * kernel scrub won't be able to handle it.
6661                  * As now stripe_len is fixed to BTRFS_STRIPE_LEN, just check
6662                  * it.
6663                  */
6664                 if (tmpl->metadata)
6665                         rec->crossing_stripes = check_crossing_stripes(
6666                                         global_info, rec->start,
6667                                         global_info->nodesize);
6668                 check_extent_type(rec);
6669                 maybe_free_extent_rec(extent_cache, rec);
6670                 return ret;
6671         }
6672
6673         ret = add_extent_rec_nolookup(extent_cache, tmpl);
6674
6675         return ret;
6676 }
6677
6678 static int add_tree_backref(struct cache_tree *extent_cache, u64 bytenr,
6679                             u64 parent, u64 root, int found_ref)
6680 {
6681         struct extent_record *rec;
6682         struct tree_backref *back;
6683         struct cache_extent *cache;
6684         int ret;
6685         bool insert = false;
6686
6687         cache = lookup_cache_extent(extent_cache, bytenr, 1);
6688         if (!cache) {
6689                 struct extent_record tmpl;
6690
6691                 memset(&tmpl, 0, sizeof(tmpl));
6692                 tmpl.start = bytenr;
6693                 tmpl.nr = 1;
6694                 tmpl.metadata = 1;
6695                 tmpl.max_size = 1;
6696
6697                 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
6698                 if (ret)
6699                         return ret;
6700
6701                 /* really a bug in cache_extent implement now */
6702                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
6703                 if (!cache)
6704                         return -ENOENT;
6705         }
6706
6707         rec = container_of(cache, struct extent_record, cache);
6708         if (rec->start != bytenr) {
6709                 /*
6710                  * Several cause, from unaligned bytenr to over lapping extents
6711                  */
6712                 return -EEXIST;
6713         }
6714
6715         back = find_tree_backref(rec, parent, root);
6716         if (!back) {
6717                 back = alloc_tree_backref(rec, parent, root);
6718                 if (!back)
6719                         return -ENOMEM;
6720                 insert = true;
6721         }
6722
6723         if (found_ref) {
6724                 if (back->node.found_ref) {
6725                         fprintf(stderr, "Extent back ref already exists "
6726                                 "for %llu parent %llu root %llu \n",
6727                                 (unsigned long long)bytenr,
6728                                 (unsigned long long)parent,
6729                                 (unsigned long long)root);
6730                 }
6731                 back->node.found_ref = 1;
6732         } else {
6733                 if (back->node.found_extent_tree) {
6734                         fprintf(stderr, "Extent back ref already exists "
6735                                 "for %llu parent %llu root %llu \n",
6736                                 (unsigned long long)bytenr,
6737                                 (unsigned long long)parent,
6738                                 (unsigned long long)root);
6739                 }
6740                 back->node.found_extent_tree = 1;
6741         }
6742         if (insert)
6743                 WARN_ON(rb_insert(&rec->backref_tree, &back->node.node,
6744                         compare_extent_backref));
6745         check_extent_type(rec);
6746         maybe_free_extent_rec(extent_cache, rec);
6747         return 0;
6748 }
6749
6750 static int add_data_backref(struct cache_tree *extent_cache, u64 bytenr,
6751                             u64 parent, u64 root, u64 owner, u64 offset,
6752                             u32 num_refs, int found_ref, u64 max_size)
6753 {
6754         struct extent_record *rec;
6755         struct data_backref *back;
6756         struct cache_extent *cache;
6757         int ret;
6758         bool insert = false;
6759
6760         cache = lookup_cache_extent(extent_cache, bytenr, 1);
6761         if (!cache) {
6762                 struct extent_record tmpl;
6763
6764                 memset(&tmpl, 0, sizeof(tmpl));
6765                 tmpl.start = bytenr;
6766                 tmpl.nr = 1;
6767                 tmpl.max_size = max_size;
6768
6769                 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
6770                 if (ret)
6771                         return ret;
6772
6773                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
6774                 if (!cache)
6775                         abort();
6776         }
6777
6778         rec = container_of(cache, struct extent_record, cache);
6779         if (rec->max_size < max_size)
6780                 rec->max_size = max_size;
6781
6782         /*
6783          * If found_ref is set then max_size is the real size and must match the
6784          * existing refs.  So if we have already found a ref then we need to
6785          * make sure that this ref matches the existing one, otherwise we need
6786          * to add a new backref so we can notice that the backrefs don't match
6787          * and we need to figure out who is telling the truth.  This is to
6788          * account for that awful fsync bug I introduced where we'd end up with
6789          * a btrfs_file_extent_item that would have its length include multiple
6790          * prealloc extents or point inside of a prealloc extent.
6791          */
6792         back = find_data_backref(rec, parent, root, owner, offset, found_ref,
6793                                  bytenr, max_size);
6794         if (!back) {
6795                 back = alloc_data_backref(rec, parent, root, owner, offset,
6796                                           max_size);
6797                 BUG_ON(!back);
6798                 insert = true;
6799         }
6800
6801         if (found_ref) {
6802                 BUG_ON(num_refs != 1);
6803                 if (back->node.found_ref)
6804                         BUG_ON(back->bytes != max_size);
6805                 back->node.found_ref = 1;
6806                 back->found_ref += 1;
6807                 if (back->bytes != max_size || back->disk_bytenr != bytenr) {
6808                         back->bytes = max_size;
6809                         back->disk_bytenr = bytenr;
6810
6811                         /* Need to reinsert if not already in the tree */
6812                         if (!insert) {
6813                                 rb_erase(&back->node.node, &rec->backref_tree);
6814                                 insert = true;
6815                         }
6816                 }
6817                 rec->refs += 1;
6818                 rec->content_checked = 1;
6819                 rec->owner_ref_checked = 1;
6820         } else {
6821                 if (back->node.found_extent_tree) {
6822                         fprintf(stderr, "Extent back ref already exists "
6823                                 "for %llu parent %llu root %llu "
6824                                 "owner %llu offset %llu num_refs %lu\n",
6825                                 (unsigned long long)bytenr,
6826                                 (unsigned long long)parent,
6827                                 (unsigned long long)root,
6828                                 (unsigned long long)owner,
6829                                 (unsigned long long)offset,
6830                                 (unsigned long)num_refs);
6831                 }
6832                 back->num_refs = num_refs;
6833                 back->node.found_extent_tree = 1;
6834         }
6835         if (insert)
6836                 WARN_ON(rb_insert(&rec->backref_tree, &back->node.node,
6837                         compare_extent_backref));
6838
6839         maybe_free_extent_rec(extent_cache, rec);
6840         return 0;
6841 }
6842
6843 static int add_pending(struct cache_tree *pending,
6844                        struct cache_tree *seen, u64 bytenr, u32 size)
6845 {
6846         int ret;
6847         ret = add_cache_extent(seen, bytenr, size);
6848         if (ret)
6849                 return ret;
6850         add_cache_extent(pending, bytenr, size);
6851         return 0;
6852 }
6853
6854 static int pick_next_pending(struct cache_tree *pending,
6855                         struct cache_tree *reada,
6856                         struct cache_tree *nodes,
6857                         u64 last, struct block_info *bits, int bits_nr,
6858                         int *reada_bits)
6859 {
6860         unsigned long node_start = last;
6861         struct cache_extent *cache;
6862         int ret;
6863
6864         cache = search_cache_extent(reada, 0);
6865         if (cache) {
6866                 bits[0].start = cache->start;
6867                 bits[0].size = cache->size;
6868                 *reada_bits = 1;
6869                 return 1;
6870         }
6871         *reada_bits = 0;
6872         if (node_start > 32768)
6873                 node_start -= 32768;
6874
6875         cache = search_cache_extent(nodes, node_start);
6876         if (!cache)
6877                 cache = search_cache_extent(nodes, 0);
6878
6879         if (!cache) {
6880                  cache = search_cache_extent(pending, 0);
6881                  if (!cache)
6882                          return 0;
6883                  ret = 0;
6884                  do {
6885                          bits[ret].start = cache->start;
6886                          bits[ret].size = cache->size;
6887                          cache = next_cache_extent(cache);
6888                          ret++;
6889                  } while (cache && ret < bits_nr);
6890                  return ret;
6891         }
6892
6893         ret = 0;
6894         do {
6895                 bits[ret].start = cache->start;
6896                 bits[ret].size = cache->size;
6897                 cache = next_cache_extent(cache);
6898                 ret++;
6899         } while (cache && ret < bits_nr);
6900
6901         if (bits_nr - ret > 8) {
6902                 u64 lookup = bits[0].start + bits[0].size;
6903                 struct cache_extent *next;
6904                 next = search_cache_extent(pending, lookup);
6905                 while(next) {
6906                         if (next->start - lookup > 32768)
6907                                 break;
6908                         bits[ret].start = next->start;
6909                         bits[ret].size = next->size;
6910                         lookup = next->start + next->size;
6911                         ret++;
6912                         if (ret == bits_nr)
6913                                 break;
6914                         next = next_cache_extent(next);
6915                         if (!next)
6916                                 break;
6917                 }
6918         }
6919         return ret;
6920 }
6921
6922 static void free_chunk_record(struct cache_extent *cache)
6923 {
6924         struct chunk_record *rec;
6925
6926         rec = container_of(cache, struct chunk_record, cache);
6927         list_del_init(&rec->list);
6928         list_del_init(&rec->dextents);
6929         free(rec);
6930 }
6931
6932 void free_chunk_cache_tree(struct cache_tree *chunk_cache)
6933 {
6934         cache_tree_free_extents(chunk_cache, free_chunk_record);
6935 }
6936
6937 static void free_device_record(struct rb_node *node)
6938 {
6939         struct device_record *rec;
6940
6941         rec = container_of(node, struct device_record, node);
6942         free(rec);
6943 }
6944
6945 FREE_RB_BASED_TREE(device_cache, free_device_record);
6946
6947 int insert_block_group_record(struct block_group_tree *tree,
6948                               struct block_group_record *bg_rec)
6949 {
6950         int ret;
6951
6952         ret = insert_cache_extent(&tree->tree, &bg_rec->cache);
6953         if (ret)
6954                 return ret;
6955
6956         list_add_tail(&bg_rec->list, &tree->block_groups);
6957         return 0;
6958 }
6959
6960 static void free_block_group_record(struct cache_extent *cache)
6961 {
6962         struct block_group_record *rec;
6963
6964         rec = container_of(cache, struct block_group_record, cache);
6965         list_del_init(&rec->list);
6966         free(rec);
6967 }
6968
6969 void free_block_group_tree(struct block_group_tree *tree)
6970 {
6971         cache_tree_free_extents(&tree->tree, free_block_group_record);
6972 }
6973
6974 int insert_device_extent_record(struct device_extent_tree *tree,
6975                                 struct device_extent_record *de_rec)
6976 {
6977         int ret;
6978
6979         /*
6980          * Device extent is a bit different from the other extents, because
6981          * the extents which belong to the different devices may have the
6982          * same start and size, so we need use the special extent cache
6983          * search/insert functions.
6984          */
6985         ret = insert_cache_extent2(&tree->tree, &de_rec->cache);
6986         if (ret)
6987                 return ret;
6988
6989         list_add_tail(&de_rec->chunk_list, &tree->no_chunk_orphans);
6990         list_add_tail(&de_rec->device_list, &tree->no_device_orphans);
6991         return 0;
6992 }
6993
6994 static void free_device_extent_record(struct cache_extent *cache)
6995 {
6996         struct device_extent_record *rec;
6997
6998         rec = container_of(cache, struct device_extent_record, cache);
6999         if (!list_empty(&rec->chunk_list))
7000                 list_del_init(&rec->chunk_list);
7001         if (!list_empty(&rec->device_list))
7002                 list_del_init(&rec->device_list);
7003         free(rec);
7004 }
7005
7006 void free_device_extent_tree(struct device_extent_tree *tree)
7007 {
7008         cache_tree_free_extents(&tree->tree, free_device_extent_record);
7009 }
7010
7011 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
7012 static int process_extent_ref_v0(struct cache_tree *extent_cache,
7013                                  struct extent_buffer *leaf, int slot)
7014 {
7015         struct btrfs_extent_ref_v0 *ref0;
7016         struct btrfs_key key;
7017         int ret;
7018
7019         btrfs_item_key_to_cpu(leaf, &key, slot);
7020         ref0 = btrfs_item_ptr(leaf, slot, struct btrfs_extent_ref_v0);
7021         if (btrfs_ref_objectid_v0(leaf, ref0) < BTRFS_FIRST_FREE_OBJECTID) {
7022                 ret = add_tree_backref(extent_cache, key.objectid, key.offset,
7023                                 0, 0);
7024         } else {
7025                 ret = add_data_backref(extent_cache, key.objectid, key.offset,
7026                                 0, 0, 0, btrfs_ref_count_v0(leaf, ref0), 0, 0);
7027         }
7028         return ret;
7029 }
7030 #endif
7031
7032 struct chunk_record *btrfs_new_chunk_record(struct extent_buffer *leaf,
7033                                             struct btrfs_key *key,
7034                                             int slot)
7035 {
7036         struct btrfs_chunk *ptr;
7037         struct chunk_record *rec;
7038         int num_stripes, i;
7039
7040         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
7041         num_stripes = btrfs_chunk_num_stripes(leaf, ptr);
7042
7043         rec = calloc(1, btrfs_chunk_record_size(num_stripes));
7044         if (!rec) {
7045                 fprintf(stderr, "memory allocation failed\n");
7046                 exit(-1);
7047         }
7048
7049         INIT_LIST_HEAD(&rec->list);
7050         INIT_LIST_HEAD(&rec->dextents);
7051         rec->bg_rec = NULL;
7052
7053         rec->cache.start = key->offset;
7054         rec->cache.size = btrfs_chunk_length(leaf, ptr);
7055
7056         rec->generation = btrfs_header_generation(leaf);
7057
7058         rec->objectid = key->objectid;
7059         rec->type = key->type;
7060         rec->offset = key->offset;
7061
7062         rec->length = rec->cache.size;
7063         rec->owner = btrfs_chunk_owner(leaf, ptr);
7064         rec->stripe_len = btrfs_chunk_stripe_len(leaf, ptr);
7065         rec->type_flags = btrfs_chunk_type(leaf, ptr);
7066         rec->io_width = btrfs_chunk_io_width(leaf, ptr);
7067         rec->io_align = btrfs_chunk_io_align(leaf, ptr);
7068         rec->sector_size = btrfs_chunk_sector_size(leaf, ptr);
7069         rec->num_stripes = num_stripes;
7070         rec->sub_stripes = btrfs_chunk_sub_stripes(leaf, ptr);
7071
7072         for (i = 0; i < rec->num_stripes; ++i) {
7073                 rec->stripes[i].devid =
7074                         btrfs_stripe_devid_nr(leaf, ptr, i);
7075                 rec->stripes[i].offset =
7076                         btrfs_stripe_offset_nr(leaf, ptr, i);
7077                 read_extent_buffer(leaf, rec->stripes[i].dev_uuid,
7078                                 (unsigned long)btrfs_stripe_dev_uuid_nr(ptr, i),
7079                                 BTRFS_UUID_SIZE);
7080         }
7081
7082         return rec;
7083 }
7084
7085 static int process_chunk_item(struct cache_tree *chunk_cache,
7086                               struct btrfs_key *key, struct extent_buffer *eb,
7087                               int slot)
7088 {
7089         struct chunk_record *rec;
7090         struct btrfs_chunk *chunk;
7091         int ret = 0;
7092
7093         chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
7094         /*
7095          * Do extra check for this chunk item,
7096          *
7097          * It's still possible one can craft a leaf with CHUNK_ITEM, with
7098          * wrong onwer(3) out of chunk tree, to pass both chunk tree check
7099          * and owner<->key_type check.
7100          */
7101         ret = btrfs_check_chunk_valid(global_info, eb, chunk, slot,
7102                                       key->offset);
7103         if (ret < 0) {
7104                 error("chunk(%llu, %llu) is not valid, ignore it",
7105                       key->offset, btrfs_chunk_length(eb, chunk));
7106                 return 0;
7107         }
7108         rec = btrfs_new_chunk_record(eb, key, slot);
7109         ret = insert_cache_extent(chunk_cache, &rec->cache);
7110         if (ret) {
7111                 fprintf(stderr, "Chunk[%llu, %llu] existed.\n",
7112                         rec->offset, rec->length);
7113                 free(rec);
7114         }
7115
7116         return ret;
7117 }
7118
7119 static int process_device_item(struct rb_root *dev_cache,
7120                 struct btrfs_key *key, struct extent_buffer *eb, int slot)
7121 {
7122         struct btrfs_dev_item *ptr;
7123         struct device_record *rec;
7124         int ret = 0;
7125
7126         ptr = btrfs_item_ptr(eb,
7127                 slot, struct btrfs_dev_item);
7128
7129         rec = malloc(sizeof(*rec));
7130         if (!rec) {
7131                 fprintf(stderr, "memory allocation failed\n");
7132                 return -ENOMEM;
7133         }
7134
7135         rec->devid = key->offset;
7136         rec->generation = btrfs_header_generation(eb);
7137
7138         rec->objectid = key->objectid;
7139         rec->type = key->type;
7140         rec->offset = key->offset;
7141
7142         rec->devid = btrfs_device_id(eb, ptr);
7143         rec->total_byte = btrfs_device_total_bytes(eb, ptr);
7144         rec->byte_used = btrfs_device_bytes_used(eb, ptr);
7145
7146         ret = rb_insert(dev_cache, &rec->node, device_record_compare);
7147         if (ret) {
7148                 fprintf(stderr, "Device[%llu] existed.\n", rec->devid);
7149                 free(rec);
7150         }
7151
7152         return ret;
7153 }
7154
7155 struct block_group_record *
7156 btrfs_new_block_group_record(struct extent_buffer *leaf, struct btrfs_key *key,
7157                              int slot)
7158 {
7159         struct btrfs_block_group_item *ptr;
7160         struct block_group_record *rec;
7161
7162         rec = calloc(1, sizeof(*rec));
7163         if (!rec) {
7164                 fprintf(stderr, "memory allocation failed\n");
7165                 exit(-1);
7166         }
7167
7168         rec->cache.start = key->objectid;
7169         rec->cache.size = key->offset;
7170
7171         rec->generation = btrfs_header_generation(leaf);
7172
7173         rec->objectid = key->objectid;
7174         rec->type = key->type;
7175         rec->offset = key->offset;
7176
7177         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_block_group_item);
7178         rec->flags = btrfs_disk_block_group_flags(leaf, ptr);
7179
7180         INIT_LIST_HEAD(&rec->list);
7181
7182         return rec;
7183 }
7184
7185 static int process_block_group_item(struct block_group_tree *block_group_cache,
7186                                     struct btrfs_key *key,
7187                                     struct extent_buffer *eb, int slot)
7188 {
7189         struct block_group_record *rec;
7190         int ret = 0;
7191
7192         rec = btrfs_new_block_group_record(eb, key, slot);
7193         ret = insert_block_group_record(block_group_cache, rec);
7194         if (ret) {
7195                 fprintf(stderr, "Block Group[%llu, %llu] existed.\n",
7196                         rec->objectid, rec->offset);
7197                 free(rec);
7198         }
7199
7200         return ret;
7201 }
7202
7203 struct device_extent_record *
7204 btrfs_new_device_extent_record(struct extent_buffer *leaf,
7205                                struct btrfs_key *key, int slot)
7206 {
7207         struct device_extent_record *rec;
7208         struct btrfs_dev_extent *ptr;
7209
7210         rec = calloc(1, sizeof(*rec));
7211         if (!rec) {
7212                 fprintf(stderr, "memory allocation failed\n");
7213                 exit(-1);
7214         }
7215
7216         rec->cache.objectid = key->objectid;
7217         rec->cache.start = key->offset;
7218
7219         rec->generation = btrfs_header_generation(leaf);
7220
7221         rec->objectid = key->objectid;
7222         rec->type = key->type;
7223         rec->offset = key->offset;
7224
7225         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_dev_extent);
7226         rec->chunk_objecteid =
7227                 btrfs_dev_extent_chunk_objectid(leaf, ptr);
7228         rec->chunk_offset =
7229                 btrfs_dev_extent_chunk_offset(leaf, ptr);
7230         rec->length = btrfs_dev_extent_length(leaf, ptr);
7231         rec->cache.size = rec->length;
7232
7233         INIT_LIST_HEAD(&rec->chunk_list);
7234         INIT_LIST_HEAD(&rec->device_list);
7235
7236         return rec;
7237 }
7238
7239 static int
7240 process_device_extent_item(struct device_extent_tree *dev_extent_cache,
7241                            struct btrfs_key *key, struct extent_buffer *eb,
7242                            int slot)
7243 {
7244         struct device_extent_record *rec;
7245         int ret;
7246
7247         rec = btrfs_new_device_extent_record(eb, key, slot);
7248         ret = insert_device_extent_record(dev_extent_cache, rec);
7249         if (ret) {
7250                 fprintf(stderr,
7251                         "Device extent[%llu, %llu, %llu] existed.\n",
7252                         rec->objectid, rec->offset, rec->length);
7253                 free(rec);
7254         }
7255
7256         return ret;
7257 }
7258
7259 static int process_extent_item(struct btrfs_root *root,
7260                                struct cache_tree *extent_cache,
7261                                struct extent_buffer *eb, int slot)
7262 {
7263         struct btrfs_extent_item *ei;
7264         struct btrfs_extent_inline_ref *iref;
7265         struct btrfs_extent_data_ref *dref;
7266         struct btrfs_shared_data_ref *sref;
7267         struct btrfs_key key;
7268         struct extent_record tmpl;
7269         unsigned long end;
7270         unsigned long ptr;
7271         int ret;
7272         int type;
7273         u32 item_size = btrfs_item_size_nr(eb, slot);
7274         u64 refs = 0;
7275         u64 offset;
7276         u64 num_bytes;
7277         int metadata = 0;
7278
7279         btrfs_item_key_to_cpu(eb, &key, slot);
7280
7281         if (key.type == BTRFS_METADATA_ITEM_KEY) {
7282                 metadata = 1;
7283                 num_bytes = root->fs_info->nodesize;
7284         } else {
7285                 num_bytes = key.offset;
7286         }
7287
7288         if (!IS_ALIGNED(key.objectid, root->fs_info->sectorsize)) {
7289                 error("ignoring invalid extent, bytenr %llu is not aligned to %u",
7290                       key.objectid, root->fs_info->sectorsize);
7291                 return -EIO;
7292         }
7293         if (item_size < sizeof(*ei)) {
7294 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
7295                 struct btrfs_extent_item_v0 *ei0;
7296                 BUG_ON(item_size != sizeof(*ei0));
7297                 ei0 = btrfs_item_ptr(eb, slot, struct btrfs_extent_item_v0);
7298                 refs = btrfs_extent_refs_v0(eb, ei0);
7299 #else
7300                 BUG();
7301 #endif
7302                 memset(&tmpl, 0, sizeof(tmpl));
7303                 tmpl.start = key.objectid;
7304                 tmpl.nr = num_bytes;
7305                 tmpl.extent_item_refs = refs;
7306                 tmpl.metadata = metadata;
7307                 tmpl.found_rec = 1;
7308                 tmpl.max_size = num_bytes;
7309
7310                 return add_extent_rec(extent_cache, &tmpl);
7311         }
7312
7313         ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
7314         refs = btrfs_extent_refs(eb, ei);
7315         if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK)
7316                 metadata = 1;
7317         else
7318                 metadata = 0;
7319         if (metadata && num_bytes != root->fs_info->nodesize) {
7320                 error("ignore invalid metadata extent, length %llu does not equal to %u",
7321                       num_bytes, root->fs_info->nodesize);
7322                 return -EIO;
7323         }
7324         if (!metadata && !IS_ALIGNED(num_bytes, root->fs_info->sectorsize)) {
7325                 error("ignore invalid data extent, length %llu is not aligned to %u",
7326                       num_bytes, root->fs_info->sectorsize);
7327                 return -EIO;
7328         }
7329
7330         memset(&tmpl, 0, sizeof(tmpl));
7331         tmpl.start = key.objectid;
7332         tmpl.nr = num_bytes;
7333         tmpl.extent_item_refs = refs;
7334         tmpl.metadata = metadata;
7335         tmpl.found_rec = 1;
7336         tmpl.max_size = num_bytes;
7337         add_extent_rec(extent_cache, &tmpl);
7338
7339         ptr = (unsigned long)(ei + 1);
7340         if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK &&
7341             key.type == BTRFS_EXTENT_ITEM_KEY)
7342                 ptr += sizeof(struct btrfs_tree_block_info);
7343
7344         end = (unsigned long)ei + item_size;
7345         while (ptr < end) {
7346                 iref = (struct btrfs_extent_inline_ref *)ptr;
7347                 type = btrfs_extent_inline_ref_type(eb, iref);
7348                 offset = btrfs_extent_inline_ref_offset(eb, iref);
7349                 switch (type) {
7350                 case BTRFS_TREE_BLOCK_REF_KEY:
7351                         ret = add_tree_backref(extent_cache, key.objectid,
7352                                         0, offset, 0);
7353                         if (ret < 0)
7354                                 error(
7355                         "add_tree_backref failed (extent items tree block): %s",
7356                                       strerror(-ret));
7357                         break;
7358                 case BTRFS_SHARED_BLOCK_REF_KEY:
7359                         ret = add_tree_backref(extent_cache, key.objectid,
7360                                         offset, 0, 0);
7361                         if (ret < 0)
7362                                 error(
7363                         "add_tree_backref failed (extent items shared block): %s",
7364                                       strerror(-ret));
7365                         break;
7366                 case BTRFS_EXTENT_DATA_REF_KEY:
7367                         dref = (struct btrfs_extent_data_ref *)(&iref->offset);
7368                         add_data_backref(extent_cache, key.objectid, 0,
7369                                         btrfs_extent_data_ref_root(eb, dref),
7370                                         btrfs_extent_data_ref_objectid(eb,
7371                                                                        dref),
7372                                         btrfs_extent_data_ref_offset(eb, dref),
7373                                         btrfs_extent_data_ref_count(eb, dref),
7374                                         0, num_bytes);
7375                         break;
7376                 case BTRFS_SHARED_DATA_REF_KEY:
7377                         sref = (struct btrfs_shared_data_ref *)(iref + 1);
7378                         add_data_backref(extent_cache, key.objectid, offset,
7379                                         0, 0, 0,
7380                                         btrfs_shared_data_ref_count(eb, sref),
7381                                         0, num_bytes);
7382                         break;
7383                 default:
7384                         fprintf(stderr, "corrupt extent record: key %Lu %u %Lu\n",
7385                                 key.objectid, key.type, num_bytes);
7386                         goto out;
7387                 }
7388                 ptr += btrfs_extent_inline_ref_size(type);
7389         }
7390         WARN_ON(ptr > end);
7391 out:
7392         return 0;
7393 }
7394
7395 static int check_cache_range(struct btrfs_root *root,
7396                              struct btrfs_block_group_cache *cache,
7397                              u64 offset, u64 bytes)
7398 {
7399         struct btrfs_free_space *entry;
7400         u64 *logical;
7401         u64 bytenr;
7402         int stripe_len;
7403         int i, nr, ret;
7404
7405         for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
7406                 bytenr = btrfs_sb_offset(i);
7407                 ret = btrfs_rmap_block(root->fs_info,
7408                                        cache->key.objectid, bytenr, 0,
7409                                        &logical, &nr, &stripe_len);
7410                 if (ret)
7411                         return ret;
7412
7413                 while (nr--) {
7414                         if (logical[nr] + stripe_len <= offset)
7415                                 continue;
7416                         if (offset + bytes <= logical[nr])
7417                                 continue;
7418                         if (logical[nr] == offset) {
7419                                 if (stripe_len >= bytes) {
7420                                         free(logical);
7421                                         return 0;
7422                                 }
7423                                 bytes -= stripe_len;
7424                                 offset += stripe_len;
7425                         } else if (logical[nr] < offset) {
7426                                 if (logical[nr] + stripe_len >=
7427                                     offset + bytes) {
7428                                         free(logical);
7429                                         return 0;
7430                                 }
7431                                 bytes = (offset + bytes) -
7432                                         (logical[nr] + stripe_len);
7433                                 offset = logical[nr] + stripe_len;
7434                         } else {
7435                                 /*
7436                                  * Could be tricky, the super may land in the
7437                                  * middle of the area we're checking.  First
7438                                  * check the easiest case, it's at the end.
7439                                  */
7440                                 if (logical[nr] + stripe_len >=
7441                                     bytes + offset) {
7442                                         bytes = logical[nr] - offset;
7443                                         continue;
7444                                 }
7445
7446                                 /* Check the left side */
7447                                 ret = check_cache_range(root, cache,
7448                                                         offset,
7449                                                         logical[nr] - offset);
7450                                 if (ret) {
7451                                         free(logical);
7452                                         return ret;
7453                                 }
7454
7455                                 /* Now we continue with the right side */
7456                                 bytes = (offset + bytes) -
7457                                         (logical[nr] + stripe_len);
7458                                 offset = logical[nr] + stripe_len;
7459                         }
7460                 }
7461
7462                 free(logical);
7463         }
7464
7465         entry = btrfs_find_free_space(cache->free_space_ctl, offset, bytes);
7466         if (!entry) {
7467                 fprintf(stderr, "There is no free space entry for %Lu-%Lu\n",
7468                         offset, offset+bytes);
7469                 return -EINVAL;
7470         }
7471
7472         if (entry->offset != offset) {
7473                 fprintf(stderr, "Wanted offset %Lu, found %Lu\n", offset,
7474                         entry->offset);
7475                 return -EINVAL;
7476         }
7477
7478         if (entry->bytes != bytes) {
7479                 fprintf(stderr, "Wanted bytes %Lu, found %Lu for off %Lu\n",
7480                         bytes, entry->bytes, offset);
7481                 return -EINVAL;
7482         }
7483
7484         unlink_free_space(cache->free_space_ctl, entry);
7485         free(entry);
7486         return 0;
7487 }
7488
7489 static int verify_space_cache(struct btrfs_root *root,
7490                               struct btrfs_block_group_cache *cache)
7491 {
7492         struct btrfs_path path;
7493         struct extent_buffer *leaf;
7494         struct btrfs_key key;
7495         u64 last;
7496         int ret = 0;
7497
7498         root = root->fs_info->extent_root;
7499
7500         last = max_t(u64, cache->key.objectid, BTRFS_SUPER_INFO_OFFSET);
7501
7502         btrfs_init_path(&path);
7503         key.objectid = last;
7504         key.offset = 0;
7505         key.type = BTRFS_EXTENT_ITEM_KEY;
7506         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
7507         if (ret < 0)
7508                 goto out;
7509         ret = 0;
7510         while (1) {
7511                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
7512                         ret = btrfs_next_leaf(root, &path);
7513                         if (ret < 0)
7514                                 goto out;
7515                         if (ret > 0) {
7516                                 ret = 0;
7517                                 break;
7518                         }
7519                 }
7520                 leaf = path.nodes[0];
7521                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
7522                 if (key.objectid >= cache->key.offset + cache->key.objectid)
7523                         break;
7524                 if (key.type != BTRFS_EXTENT_ITEM_KEY &&
7525                     key.type != BTRFS_METADATA_ITEM_KEY) {
7526                         path.slots[0]++;
7527                         continue;
7528                 }
7529
7530                 if (last == key.objectid) {
7531                         if (key.type == BTRFS_EXTENT_ITEM_KEY)
7532                                 last = key.objectid + key.offset;
7533                         else
7534                                 last = key.objectid + root->fs_info->nodesize;
7535                         path.slots[0]++;
7536                         continue;
7537                 }
7538
7539                 ret = check_cache_range(root, cache, last,
7540                                         key.objectid - last);
7541                 if (ret)
7542                         break;
7543                 if (key.type == BTRFS_EXTENT_ITEM_KEY)
7544                         last = key.objectid + key.offset;
7545                 else
7546                         last = key.objectid + root->fs_info->nodesize;
7547                 path.slots[0]++;
7548         }
7549
7550         if (last < cache->key.objectid + cache->key.offset)
7551                 ret = check_cache_range(root, cache, last,
7552                                         cache->key.objectid +
7553                                         cache->key.offset - last);
7554
7555 out:
7556         btrfs_release_path(&path);
7557
7558         if (!ret &&
7559             !RB_EMPTY_ROOT(&cache->free_space_ctl->free_space_offset)) {
7560                 fprintf(stderr, "There are still entries left in the space "
7561                         "cache\n");
7562                 ret = -EINVAL;
7563         }
7564
7565         return ret;
7566 }
7567
7568 static int check_space_cache(struct btrfs_root *root)
7569 {
7570         struct btrfs_block_group_cache *cache;
7571         u64 start = BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE;
7572         int ret;
7573         int error = 0;
7574
7575         if (btrfs_super_cache_generation(root->fs_info->super_copy) != -1ULL &&
7576             btrfs_super_generation(root->fs_info->super_copy) !=
7577             btrfs_super_cache_generation(root->fs_info->super_copy)) {
7578                 printf("cache and super generation don't match, space cache "
7579                        "will be invalidated\n");
7580                 return 0;
7581         }
7582
7583         if (ctx.progress_enabled) {
7584                 ctx.tp = TASK_FREE_SPACE;
7585                 task_start(ctx.info);
7586         }
7587
7588         while (1) {
7589                 cache = btrfs_lookup_first_block_group(root->fs_info, start);
7590                 if (!cache)
7591                         break;
7592
7593                 start = cache->key.objectid + cache->key.offset;
7594                 if (!cache->free_space_ctl) {
7595                         if (btrfs_init_free_space_ctl(cache,
7596                                                 root->fs_info->sectorsize)) {
7597                                 ret = -ENOMEM;
7598                                 break;
7599                         }
7600                 } else {
7601                         btrfs_remove_free_space_cache(cache);
7602                 }
7603
7604                 if (btrfs_fs_compat_ro(root->fs_info, FREE_SPACE_TREE)) {
7605                         ret = exclude_super_stripes(root, cache);
7606                         if (ret) {
7607                                 fprintf(stderr, "could not exclude super stripes: %s\n",
7608                                         strerror(-ret));
7609                                 error++;
7610                                 continue;
7611                         }
7612                         ret = load_free_space_tree(root->fs_info, cache);
7613                         free_excluded_extents(root, cache);
7614                         if (ret < 0) {
7615                                 fprintf(stderr, "could not load free space tree: %s\n",
7616                                         strerror(-ret));
7617                                 error++;
7618                                 continue;
7619                         }
7620                         error += ret;
7621                 } else {
7622                         ret = load_free_space_cache(root->fs_info, cache);
7623                         if (!ret)
7624                                 continue;
7625                 }
7626
7627                 ret = verify_space_cache(root, cache);
7628                 if (ret) {
7629                         fprintf(stderr, "cache appears valid but isn't %Lu\n",
7630                                 cache->key.objectid);
7631                         error++;
7632                 }
7633         }
7634
7635         task_stop(ctx.info);
7636
7637         return error ? -EINVAL : 0;
7638 }
7639
7640 static int check_extent_csums(struct btrfs_root *root, u64 bytenr,
7641                         u64 num_bytes, unsigned long leaf_offset,
7642                         struct extent_buffer *eb) {
7643
7644         struct btrfs_fs_info *fs_info = root->fs_info;
7645         u64 offset = 0;
7646         u16 csum_size = btrfs_super_csum_size(fs_info->super_copy);
7647         char *data;
7648         unsigned long csum_offset;
7649         u32 csum;
7650         u32 csum_expected;
7651         u64 read_len;
7652         u64 data_checked = 0;
7653         u64 tmp;
7654         int ret = 0;
7655         int mirror;
7656         int num_copies;
7657
7658         if (num_bytes % fs_info->sectorsize)
7659                 return -EINVAL;
7660
7661         data = malloc(num_bytes);
7662         if (!data)
7663                 return -ENOMEM;
7664
7665         while (offset < num_bytes) {
7666                 mirror = 0;
7667 again:
7668                 read_len = num_bytes - offset;
7669                 /* read as much space once a time */
7670                 ret = read_extent_data(fs_info, data + offset,
7671                                 bytenr + offset, &read_len, mirror);
7672                 if (ret)
7673                         goto out;
7674                 data_checked = 0;
7675                 /* verify every 4k data's checksum */
7676                 while (data_checked < read_len) {
7677                         csum = ~(u32)0;
7678                         tmp = offset + data_checked;
7679
7680                         csum = btrfs_csum_data((char *)data + tmp,
7681                                                csum, fs_info->sectorsize);
7682                         btrfs_csum_final(csum, (u8 *)&csum);
7683
7684                         csum_offset = leaf_offset +
7685                                  tmp / fs_info->sectorsize * csum_size;
7686                         read_extent_buffer(eb, (char *)&csum_expected,
7687                                            csum_offset, csum_size);
7688                         /* try another mirror */
7689                         if (csum != csum_expected) {
7690                                 fprintf(stderr, "mirror %d bytenr %llu csum %u expected csum %u\n",
7691                                                 mirror, bytenr + tmp,
7692                                                 csum, csum_expected);
7693                                 num_copies = btrfs_num_copies(root->fs_info,
7694                                                 bytenr, num_bytes);
7695                                 if (mirror < num_copies - 1) {
7696                                         mirror += 1;
7697                                         goto again;
7698                                 }
7699                         }
7700                         data_checked += fs_info->sectorsize;
7701                 }
7702                 offset += read_len;
7703         }
7704 out:
7705         free(data);
7706         return ret;
7707 }
7708
7709 static int check_extent_exists(struct btrfs_root *root, u64 bytenr,
7710                                u64 num_bytes)
7711 {
7712         struct btrfs_path path;
7713         struct extent_buffer *leaf;
7714         struct btrfs_key key;
7715         int ret;
7716
7717         btrfs_init_path(&path);
7718         key.objectid = bytenr;
7719         key.type = BTRFS_EXTENT_ITEM_KEY;
7720         key.offset = (u64)-1;
7721
7722 again:
7723         ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, &path,
7724                                 0, 0);
7725         if (ret < 0) {
7726                 fprintf(stderr, "Error looking up extent record %d\n", ret);
7727                 btrfs_release_path(&path);
7728                 return ret;
7729         } else if (ret) {
7730                 if (path.slots[0] > 0) {
7731                         path.slots[0]--;
7732                 } else {
7733                         ret = btrfs_prev_leaf(root, &path);
7734                         if (ret < 0) {
7735                                 goto out;
7736                         } else if (ret > 0) {
7737                                 ret = 0;
7738                                 goto out;
7739                         }
7740                 }
7741         }
7742
7743         btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
7744
7745         /*
7746          * Block group items come before extent items if they have the same
7747          * bytenr, so walk back one more just in case.  Dear future traveller,
7748          * first congrats on mastering time travel.  Now if it's not too much
7749          * trouble could you go back to 2006 and tell Chris to make the
7750          * BLOCK_GROUP_ITEM_KEY (and BTRFS_*_REF_KEY) lower than the
7751          * EXTENT_ITEM_KEY please?
7752          */
7753         while (key.type > BTRFS_EXTENT_ITEM_KEY) {
7754                 if (path.slots[0] > 0) {
7755                         path.slots[0]--;
7756                 } else {
7757                         ret = btrfs_prev_leaf(root, &path);
7758                         if (ret < 0) {
7759                                 goto out;
7760                         } else if (ret > 0) {
7761                                 ret = 0;
7762                                 goto out;
7763                         }
7764                 }
7765                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
7766         }
7767
7768         while (num_bytes) {
7769                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
7770                         ret = btrfs_next_leaf(root, &path);
7771                         if (ret < 0) {
7772                                 fprintf(stderr, "Error going to next leaf "
7773                                         "%d\n", ret);
7774                                 btrfs_release_path(&path);
7775                                 return ret;
7776                         } else if (ret) {
7777                                 break;
7778                         }
7779                 }
7780                 leaf = path.nodes[0];
7781                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
7782                 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
7783                         path.slots[0]++;
7784                         continue;
7785                 }
7786                 if (key.objectid + key.offset < bytenr) {
7787                         path.slots[0]++;
7788                         continue;
7789                 }
7790                 if (key.objectid > bytenr + num_bytes)
7791                         break;
7792
7793                 if (key.objectid == bytenr) {
7794                         if (key.offset >= num_bytes) {
7795                                 num_bytes = 0;
7796                                 break;
7797                         }
7798                         num_bytes -= key.offset;
7799                         bytenr += key.offset;
7800                 } else if (key.objectid < bytenr) {
7801                         if (key.objectid + key.offset >= bytenr + num_bytes) {
7802                                 num_bytes = 0;
7803                                 break;
7804                         }
7805                         num_bytes = (bytenr + num_bytes) -
7806                                 (key.objectid + key.offset);
7807                         bytenr = key.objectid + key.offset;
7808                 } else {
7809                         if (key.objectid + key.offset < bytenr + num_bytes) {
7810                                 u64 new_start = key.objectid + key.offset;
7811                                 u64 new_bytes = bytenr + num_bytes - new_start;
7812
7813                                 /*
7814                                  * Weird case, the extent is in the middle of
7815                                  * our range, we'll have to search one side
7816                                  * and then the other.  Not sure if this happens
7817                                  * in real life, but no harm in coding it up
7818                                  * anyway just in case.
7819                                  */
7820                                 btrfs_release_path(&path);
7821                                 ret = check_extent_exists(root, new_start,
7822                                                           new_bytes);
7823                                 if (ret) {
7824                                         fprintf(stderr, "Right section didn't "
7825                                                 "have a record\n");
7826                                         break;
7827                                 }
7828                                 num_bytes = key.objectid - bytenr;
7829                                 goto again;
7830                         }
7831                         num_bytes = key.objectid - bytenr;
7832                 }
7833                 path.slots[0]++;
7834         }
7835         ret = 0;
7836
7837 out:
7838         if (num_bytes && !ret) {
7839                 fprintf(stderr, "There are no extents for csum range "
7840                         "%Lu-%Lu\n", bytenr, bytenr+num_bytes);
7841                 ret = 1;
7842         }
7843
7844         btrfs_release_path(&path);
7845         return ret;
7846 }
7847
7848 static int check_csums(struct btrfs_root *root)
7849 {
7850         struct btrfs_path path;
7851         struct extent_buffer *leaf;
7852         struct btrfs_key key;
7853         u64 offset = 0, num_bytes = 0;
7854         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
7855         int errors = 0;
7856         int ret;
7857         u64 data_len;
7858         unsigned long leaf_offset;
7859
7860         root = root->fs_info->csum_root;
7861         if (!extent_buffer_uptodate(root->node)) {
7862                 fprintf(stderr, "No valid csum tree found\n");
7863                 return -ENOENT;
7864         }
7865
7866         btrfs_init_path(&path);
7867         key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
7868         key.type = BTRFS_EXTENT_CSUM_KEY;
7869         key.offset = 0;
7870         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
7871         if (ret < 0) {
7872                 fprintf(stderr, "Error searching csum tree %d\n", ret);
7873                 btrfs_release_path(&path);
7874                 return ret;
7875         }
7876
7877         if (ret > 0 && path.slots[0])
7878                 path.slots[0]--;
7879         ret = 0;
7880
7881         while (1) {
7882                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
7883                         ret = btrfs_next_leaf(root, &path);
7884                         if (ret < 0) {
7885                                 fprintf(stderr, "Error going to next leaf "
7886                                         "%d\n", ret);
7887                                 break;
7888                         }
7889                         if (ret)
7890                                 break;
7891                 }
7892                 leaf = path.nodes[0];
7893
7894                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
7895                 if (key.type != BTRFS_EXTENT_CSUM_KEY) {
7896                         path.slots[0]++;
7897                         continue;
7898                 }
7899
7900                 data_len = (btrfs_item_size_nr(leaf, path.slots[0]) /
7901                               csum_size) * root->fs_info->sectorsize;
7902                 if (!check_data_csum)
7903                         goto skip_csum_check;
7904                 leaf_offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
7905                 ret = check_extent_csums(root, key.offset, data_len,
7906                                          leaf_offset, leaf);
7907                 if (ret)
7908                         break;
7909 skip_csum_check:
7910                 if (!num_bytes) {
7911                         offset = key.offset;
7912                 } else if (key.offset != offset + num_bytes) {
7913                         ret = check_extent_exists(root, offset, num_bytes);
7914                         if (ret) {
7915                                 fprintf(stderr, "Csum exists for %Lu-%Lu but "
7916                                         "there is no extent record\n",
7917                                         offset, offset+num_bytes);
7918                                 errors++;
7919                         }
7920                         offset = key.offset;
7921                         num_bytes = 0;
7922                 }
7923                 num_bytes += data_len;
7924                 path.slots[0]++;
7925         }
7926
7927         btrfs_release_path(&path);
7928         return errors;
7929 }
7930
7931 static int is_dropped_key(struct btrfs_key *key,
7932                           struct btrfs_key *drop_key) {
7933         if (key->objectid < drop_key->objectid)
7934                 return 1;
7935         else if (key->objectid == drop_key->objectid) {
7936                 if (key->type < drop_key->type)
7937                         return 1;
7938                 else if (key->type == drop_key->type) {
7939                         if (key->offset < drop_key->offset)
7940                                 return 1;
7941                 }
7942         }
7943         return 0;
7944 }
7945
7946 /*
7947  * Here are the rules for FULL_BACKREF.
7948  *
7949  * 1) If BTRFS_HEADER_FLAG_RELOC is set then we have FULL_BACKREF set.
7950  * 2) If btrfs_header_owner(buf) no longer points to buf then we have
7951  *      FULL_BACKREF set.
7952  * 3) We cowed the block walking down a reloc tree.  This is impossible to tell
7953  *    if it happened after the relocation occurred since we'll have dropped the
7954  *    reloc root, so it's entirely possible to have FULL_BACKREF set on buf and
7955  *    have no real way to know for sure.
7956  *
7957  * We process the blocks one root at a time, and we start from the lowest root
7958  * objectid and go to the highest.  So we can just lookup the owner backref for
7959  * the record and if we don't find it then we know it doesn't exist and we have
7960  * a FULL BACKREF.
7961  *
7962  * FIXME: if we ever start reclaiming root objectid's then we need to fix this
7963  * assumption and simply indicate that we _think_ that the FULL BACKREF needs to
7964  * be set or not and then we can check later once we've gathered all the refs.
7965  */
7966 static int calc_extent_flag(struct cache_tree *extent_cache,
7967                            struct extent_buffer *buf,
7968                            struct root_item_record *ri,
7969                            u64 *flags)
7970 {
7971         struct extent_record *rec;
7972         struct cache_extent *cache;
7973         struct tree_backref *tback;
7974         u64 owner = 0;
7975
7976         cache = lookup_cache_extent(extent_cache, buf->start, 1);
7977         /* we have added this extent before */
7978         if (!cache)
7979                 return -ENOENT;
7980
7981         rec = container_of(cache, struct extent_record, cache);
7982
7983         /*
7984          * Except file/reloc tree, we can not have
7985          * FULL BACKREF MODE
7986          */
7987         if (ri->objectid < BTRFS_FIRST_FREE_OBJECTID)
7988                 goto normal;
7989         /*
7990          * root node
7991          */
7992         if (buf->start == ri->bytenr)
7993                 goto normal;
7994
7995         if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
7996                 goto full_backref;
7997
7998         owner = btrfs_header_owner(buf);
7999         if (owner == ri->objectid)
8000                 goto normal;
8001
8002         tback = find_tree_backref(rec, 0, owner);
8003         if (!tback)
8004                 goto full_backref;
8005 normal:
8006         *flags = 0;
8007         if (rec->flag_block_full_backref != FLAG_UNSET &&
8008             rec->flag_block_full_backref != 0)
8009                 rec->bad_full_backref = 1;
8010         return 0;
8011 full_backref:
8012         *flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
8013         if (rec->flag_block_full_backref != FLAG_UNSET &&
8014             rec->flag_block_full_backref != 1)
8015                 rec->bad_full_backref = 1;
8016         return 0;
8017 }
8018
8019 static void report_mismatch_key_root(u8 key_type, u64 rootid)
8020 {
8021         fprintf(stderr, "Invalid key type(");
8022         print_key_type(stderr, 0, key_type);
8023         fprintf(stderr, ") found in root(");
8024         print_objectid(stderr, rootid, 0);
8025         fprintf(stderr, ")\n");
8026 }
8027
8028 /*
8029  * Check if the key is valid with its extent buffer.
8030  *
8031  * This is a early check in case invalid key exists in a extent buffer
8032  * This is not comprehensive yet, but should prevent wrong key/item passed
8033  * further
8034  */
8035 static int check_type_with_root(u64 rootid, u8 key_type)
8036 {
8037         switch (key_type) {
8038         /* Only valid in chunk tree */
8039         case BTRFS_DEV_ITEM_KEY:
8040         case BTRFS_CHUNK_ITEM_KEY:
8041                 if (rootid != BTRFS_CHUNK_TREE_OBJECTID)
8042                         goto err;
8043                 break;
8044         /* valid in csum and log tree */
8045         case BTRFS_CSUM_TREE_OBJECTID:
8046                 if (!(rootid == BTRFS_TREE_LOG_OBJECTID ||
8047                       is_fstree(rootid)))
8048                         goto err;
8049                 break;
8050         case BTRFS_EXTENT_ITEM_KEY:
8051         case BTRFS_METADATA_ITEM_KEY:
8052         case BTRFS_BLOCK_GROUP_ITEM_KEY:
8053                 if (rootid != BTRFS_EXTENT_TREE_OBJECTID)
8054                         goto err;
8055                 break;
8056         case BTRFS_ROOT_ITEM_KEY:
8057                 if (rootid != BTRFS_ROOT_TREE_OBJECTID)
8058                         goto err;
8059                 break;
8060         case BTRFS_DEV_EXTENT_KEY:
8061                 if (rootid != BTRFS_DEV_TREE_OBJECTID)
8062                         goto err;
8063                 break;
8064         }
8065         return 0;
8066 err:
8067         report_mismatch_key_root(key_type, rootid);
8068         return -EINVAL;
8069 }
8070
8071 static int run_next_block(struct btrfs_root *root,
8072                           struct block_info *bits,
8073                           int bits_nr,
8074                           u64 *last,
8075                           struct cache_tree *pending,
8076                           struct cache_tree *seen,
8077                           struct cache_tree *reada,
8078                           struct cache_tree *nodes,
8079                           struct cache_tree *extent_cache,
8080                           struct cache_tree *chunk_cache,
8081                           struct rb_root *dev_cache,
8082                           struct block_group_tree *block_group_cache,
8083                           struct device_extent_tree *dev_extent_cache,
8084                           struct root_item_record *ri)
8085 {
8086         struct btrfs_fs_info *fs_info = root->fs_info;
8087         struct extent_buffer *buf;
8088         struct extent_record *rec = NULL;
8089         u64 bytenr;
8090         u32 size;
8091         u64 parent;
8092         u64 owner;
8093         u64 flags;
8094         u64 ptr;
8095         u64 gen = 0;
8096         int ret = 0;
8097         int i;
8098         int nritems;
8099         struct btrfs_key key;
8100         struct cache_extent *cache;
8101         int reada_bits;
8102
8103         nritems = pick_next_pending(pending, reada, nodes, *last, bits,
8104                                     bits_nr, &reada_bits);
8105         if (nritems == 0)
8106                 return 1;
8107
8108         if (!reada_bits) {
8109                 for(i = 0; i < nritems; i++) {
8110                         ret = add_cache_extent(reada, bits[i].start,
8111                                                bits[i].size);
8112                         if (ret == -EEXIST)
8113                                 continue;
8114
8115                         /* fixme, get the parent transid */
8116                         readahead_tree_block(fs_info, bits[i].start, 0);
8117                 }
8118         }
8119         *last = bits[0].start;
8120         bytenr = bits[0].start;
8121         size = bits[0].size;
8122
8123         cache = lookup_cache_extent(pending, bytenr, size);
8124         if (cache) {
8125                 remove_cache_extent(pending, cache);
8126                 free(cache);
8127         }
8128         cache = lookup_cache_extent(reada, bytenr, size);
8129         if (cache) {
8130                 remove_cache_extent(reada, cache);
8131                 free(cache);
8132         }
8133         cache = lookup_cache_extent(nodes, bytenr, size);
8134         if (cache) {
8135                 remove_cache_extent(nodes, cache);
8136                 free(cache);
8137         }
8138         cache = lookup_cache_extent(extent_cache, bytenr, size);
8139         if (cache) {
8140                 rec = container_of(cache, struct extent_record, cache);
8141                 gen = rec->parent_generation;
8142         }
8143
8144         /* fixme, get the real parent transid */
8145         buf = read_tree_block(root->fs_info, bytenr, gen);
8146         if (!extent_buffer_uptodate(buf)) {
8147                 record_bad_block_io(root->fs_info,
8148                                     extent_cache, bytenr, size);
8149                 goto out;
8150         }
8151
8152         nritems = btrfs_header_nritems(buf);
8153
8154         flags = 0;
8155         if (!init_extent_tree) {
8156                 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
8157                                        btrfs_header_level(buf), 1, NULL,
8158                                        &flags);
8159                 if (ret < 0) {
8160                         ret = calc_extent_flag(extent_cache, buf, ri, &flags);
8161                         if (ret < 0) {
8162                                 fprintf(stderr, "Couldn't calc extent flags\n");
8163                                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
8164                         }
8165                 }
8166         } else {
8167                 flags = 0;
8168                 ret = calc_extent_flag(extent_cache, buf, ri, &flags);
8169                 if (ret < 0) {
8170                         fprintf(stderr, "Couldn't calc extent flags\n");
8171                         flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
8172                 }
8173         }
8174
8175         if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
8176                 if (ri != NULL &&
8177                     ri->objectid != BTRFS_TREE_RELOC_OBJECTID &&
8178                     ri->objectid == btrfs_header_owner(buf)) {
8179                         /*
8180                          * Ok we got to this block from it's original owner and
8181                          * we have FULL_BACKREF set.  Relocation can leave
8182                          * converted blocks over so this is altogether possible,
8183                          * however it's not possible if the generation > the
8184                          * last snapshot, so check for this case.
8185                          */
8186                         if (!btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC) &&
8187                             btrfs_header_generation(buf) > ri->last_snapshot) {
8188                                 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
8189                                 rec->bad_full_backref = 1;
8190                         }
8191                 }
8192         } else {
8193                 if (ri != NULL &&
8194                     (ri->objectid == BTRFS_TREE_RELOC_OBJECTID ||
8195                      btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))) {
8196                         flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
8197                         rec->bad_full_backref = 1;
8198                 }
8199         }
8200
8201         if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
8202                 rec->flag_block_full_backref = 1;
8203                 parent = bytenr;
8204                 owner = 0;
8205         } else {
8206                 rec->flag_block_full_backref = 0;
8207                 parent = 0;
8208                 owner = btrfs_header_owner(buf);
8209         }
8210
8211         ret = check_block(root, extent_cache, buf, flags);
8212         if (ret)
8213                 goto out;
8214
8215         if (btrfs_is_leaf(buf)) {
8216                 btree_space_waste += btrfs_leaf_free_space(root, buf);
8217                 for (i = 0; i < nritems; i++) {
8218                         struct btrfs_file_extent_item *fi;
8219                         btrfs_item_key_to_cpu(buf, &key, i);
8220                         /*
8221                          * Check key type against the leaf owner.
8222                          * Could filter quite a lot of early error if
8223                          * owner is correct
8224                          */
8225                         if (check_type_with_root(btrfs_header_owner(buf),
8226                                                  key.type)) {
8227                                 fprintf(stderr, "ignoring invalid key\n");
8228                                 continue;
8229                         }
8230                         if (key.type == BTRFS_EXTENT_ITEM_KEY) {
8231                                 process_extent_item(root, extent_cache, buf,
8232                                                     i);
8233                                 continue;
8234                         }
8235                         if (key.type == BTRFS_METADATA_ITEM_KEY) {
8236                                 process_extent_item(root, extent_cache, buf,
8237                                                     i);
8238                                 continue;
8239                         }
8240                         if (key.type == BTRFS_EXTENT_CSUM_KEY) {
8241                                 total_csum_bytes +=
8242                                         btrfs_item_size_nr(buf, i);
8243                                 continue;
8244                         }
8245                         if (key.type == BTRFS_CHUNK_ITEM_KEY) {
8246                                 process_chunk_item(chunk_cache, &key, buf, i);
8247                                 continue;
8248                         }
8249                         if (key.type == BTRFS_DEV_ITEM_KEY) {
8250                                 process_device_item(dev_cache, &key, buf, i);
8251                                 continue;
8252                         }
8253                         if (key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
8254                                 process_block_group_item(block_group_cache,
8255                                         &key, buf, i);
8256                                 continue;
8257                         }
8258                         if (key.type == BTRFS_DEV_EXTENT_KEY) {
8259                                 process_device_extent_item(dev_extent_cache,
8260                                         &key, buf, i);
8261                                 continue;
8262
8263                         }
8264                         if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
8265 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
8266                                 process_extent_ref_v0(extent_cache, buf, i);
8267 #else
8268                                 BUG();
8269 #endif
8270                                 continue;
8271                         }
8272
8273                         if (key.type == BTRFS_TREE_BLOCK_REF_KEY) {
8274                                 ret = add_tree_backref(extent_cache,
8275                                                 key.objectid, 0, key.offset, 0);
8276                                 if (ret < 0)
8277                                         error(
8278                                 "add_tree_backref failed (leaf tree block): %s",
8279                                               strerror(-ret));
8280                                 continue;
8281                         }
8282                         if (key.type == BTRFS_SHARED_BLOCK_REF_KEY) {
8283                                 ret = add_tree_backref(extent_cache,
8284                                                 key.objectid, key.offset, 0, 0);
8285                                 if (ret < 0)
8286                                         error(
8287                                 "add_tree_backref failed (leaf shared block): %s",
8288                                               strerror(-ret));
8289                                 continue;
8290                         }
8291                         if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
8292                                 struct btrfs_extent_data_ref *ref;
8293                                 ref = btrfs_item_ptr(buf, i,
8294                                                 struct btrfs_extent_data_ref);
8295                                 add_data_backref(extent_cache,
8296                                         key.objectid, 0,
8297                                         btrfs_extent_data_ref_root(buf, ref),
8298                                         btrfs_extent_data_ref_objectid(buf,
8299                                                                        ref),
8300                                         btrfs_extent_data_ref_offset(buf, ref),
8301                                         btrfs_extent_data_ref_count(buf, ref),
8302                                         0, root->fs_info->sectorsize);
8303                                 continue;
8304                         }
8305                         if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
8306                                 struct btrfs_shared_data_ref *ref;
8307                                 ref = btrfs_item_ptr(buf, i,
8308                                                 struct btrfs_shared_data_ref);
8309                                 add_data_backref(extent_cache,
8310                                         key.objectid, key.offset, 0, 0, 0,
8311                                         btrfs_shared_data_ref_count(buf, ref),
8312                                         0, root->fs_info->sectorsize);
8313                                 continue;
8314                         }
8315                         if (key.type == BTRFS_ORPHAN_ITEM_KEY) {
8316                                 struct bad_item *bad;
8317
8318                                 if (key.objectid == BTRFS_ORPHAN_OBJECTID)
8319                                         continue;
8320                                 if (!owner)
8321                                         continue;
8322                                 bad = malloc(sizeof(struct bad_item));
8323                                 if (!bad)
8324                                         continue;
8325                                 INIT_LIST_HEAD(&bad->list);
8326                                 memcpy(&bad->key, &key,
8327                                        sizeof(struct btrfs_key));
8328                                 bad->root_id = owner;
8329                                 list_add_tail(&bad->list, &delete_items);
8330                                 continue;
8331                         }
8332                         if (key.type != BTRFS_EXTENT_DATA_KEY)
8333                                 continue;
8334                         fi = btrfs_item_ptr(buf, i,
8335                                             struct btrfs_file_extent_item);
8336                         if (btrfs_file_extent_type(buf, fi) ==
8337                             BTRFS_FILE_EXTENT_INLINE)
8338                                 continue;
8339                         if (btrfs_file_extent_disk_bytenr(buf, fi) == 0)
8340                                 continue;
8341
8342                         data_bytes_allocated +=
8343                                 btrfs_file_extent_disk_num_bytes(buf, fi);
8344                         if (data_bytes_allocated < root->fs_info->sectorsize) {
8345                                 abort();
8346                         }
8347                         data_bytes_referenced +=
8348                                 btrfs_file_extent_num_bytes(buf, fi);
8349                         add_data_backref(extent_cache,
8350                                 btrfs_file_extent_disk_bytenr(buf, fi),
8351                                 parent, owner, key.objectid, key.offset -
8352                                 btrfs_file_extent_offset(buf, fi), 1, 1,
8353                                 btrfs_file_extent_disk_num_bytes(buf, fi));
8354                 }
8355         } else {
8356                 int level;
8357                 struct btrfs_key first_key;
8358
8359                 first_key.objectid = 0;
8360
8361                 if (nritems > 0)
8362                         btrfs_item_key_to_cpu(buf, &first_key, 0);
8363                 level = btrfs_header_level(buf);
8364                 for (i = 0; i < nritems; i++) {
8365                         struct extent_record tmpl;
8366
8367                         ptr = btrfs_node_blockptr(buf, i);
8368                         size = root->fs_info->nodesize;
8369                         btrfs_node_key_to_cpu(buf, &key, i);
8370                         if (ri != NULL) {
8371                                 if ((level == ri->drop_level)
8372                                     && is_dropped_key(&key, &ri->drop_key)) {
8373                                         continue;
8374                                 }
8375                         }
8376
8377                         memset(&tmpl, 0, sizeof(tmpl));
8378                         btrfs_cpu_key_to_disk(&tmpl.parent_key, &key);
8379                         tmpl.parent_generation = btrfs_node_ptr_generation(buf, i);
8380                         tmpl.start = ptr;
8381                         tmpl.nr = size;
8382                         tmpl.refs = 1;
8383                         tmpl.metadata = 1;
8384                         tmpl.max_size = size;
8385                         ret = add_extent_rec(extent_cache, &tmpl);
8386                         if (ret < 0)
8387                                 goto out;
8388
8389                         ret = add_tree_backref(extent_cache, ptr, parent,
8390                                         owner, 1);
8391                         if (ret < 0) {
8392                                 error(
8393                                 "add_tree_backref failed (non-leaf block): %s",
8394                                       strerror(-ret));
8395                                 continue;
8396                         }
8397
8398                         if (level > 1) {
8399                                 add_pending(nodes, seen, ptr, size);
8400                         } else {
8401                                 add_pending(pending, seen, ptr, size);
8402                         }
8403                 }
8404                 btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) -
8405                                       nritems) * sizeof(struct btrfs_key_ptr);
8406         }
8407         total_btree_bytes += buf->len;
8408         if (fs_root_objectid(btrfs_header_owner(buf)))
8409                 total_fs_tree_bytes += buf->len;
8410         if (btrfs_header_owner(buf) == BTRFS_EXTENT_TREE_OBJECTID)
8411                 total_extent_tree_bytes += buf->len;
8412 out:
8413         free_extent_buffer(buf);
8414         return ret;
8415 }
8416
8417 static int add_root_to_pending(struct extent_buffer *buf,
8418                                struct cache_tree *extent_cache,
8419                                struct cache_tree *pending,
8420                                struct cache_tree *seen,
8421                                struct cache_tree *nodes,
8422                                u64 objectid)
8423 {
8424         struct extent_record tmpl;
8425         int ret;
8426
8427         if (btrfs_header_level(buf) > 0)
8428                 add_pending(nodes, seen, buf->start, buf->len);
8429         else
8430                 add_pending(pending, seen, buf->start, buf->len);
8431
8432         memset(&tmpl, 0, sizeof(tmpl));
8433         tmpl.start = buf->start;
8434         tmpl.nr = buf->len;
8435         tmpl.is_root = 1;
8436         tmpl.refs = 1;
8437         tmpl.metadata = 1;
8438         tmpl.max_size = buf->len;
8439         add_extent_rec(extent_cache, &tmpl);
8440
8441         if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
8442             btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
8443                 ret = add_tree_backref(extent_cache, buf->start, buf->start,
8444                                 0, 1);
8445         else
8446                 ret = add_tree_backref(extent_cache, buf->start, 0, objectid,
8447                                 1);
8448         return ret;
8449 }
8450
8451 /* as we fix the tree, we might be deleting blocks that
8452  * we're tracking for repair.  This hook makes sure we
8453  * remove any backrefs for blocks as we are fixing them.
8454  */
8455 static int free_extent_hook(struct btrfs_trans_handle *trans,
8456                             struct btrfs_root *root,
8457                             u64 bytenr, u64 num_bytes, u64 parent,
8458                             u64 root_objectid, u64 owner, u64 offset,
8459                             int refs_to_drop)
8460 {
8461         struct extent_record *rec;
8462         struct cache_extent *cache;
8463         int is_data;
8464         struct cache_tree *extent_cache = root->fs_info->fsck_extent_cache;
8465
8466         is_data = owner >= BTRFS_FIRST_FREE_OBJECTID;
8467         cache = lookup_cache_extent(extent_cache, bytenr, num_bytes);
8468         if (!cache)
8469                 return 0;
8470
8471         rec = container_of(cache, struct extent_record, cache);
8472         if (is_data) {
8473                 struct data_backref *back;
8474                 back = find_data_backref(rec, parent, root_objectid, owner,
8475                                          offset, 1, bytenr, num_bytes);
8476                 if (!back)
8477                         goto out;
8478                 if (back->node.found_ref) {
8479                         back->found_ref -= refs_to_drop;
8480                         if (rec->refs)
8481                                 rec->refs -= refs_to_drop;
8482                 }
8483                 if (back->node.found_extent_tree) {
8484                         back->num_refs -= refs_to_drop;
8485                         if (rec->extent_item_refs)
8486                                 rec->extent_item_refs -= refs_to_drop;
8487                 }
8488                 if (back->found_ref == 0)
8489                         back->node.found_ref = 0;
8490                 if (back->num_refs == 0)
8491                         back->node.found_extent_tree = 0;
8492
8493                 if (!back->node.found_extent_tree && back->node.found_ref) {
8494                         rb_erase(&back->node.node, &rec->backref_tree);
8495                         free(back);
8496                 }
8497         } else {
8498                 struct tree_backref *back;
8499                 back = find_tree_backref(rec, parent, root_objectid);
8500                 if (!back)
8501                         goto out;
8502                 if (back->node.found_ref) {
8503                         if (rec->refs)
8504                                 rec->refs--;
8505                         back->node.found_ref = 0;
8506                 }
8507                 if (back->node.found_extent_tree) {
8508                         if (rec->extent_item_refs)
8509                                 rec->extent_item_refs--;
8510                         back->node.found_extent_tree = 0;
8511                 }
8512                 if (!back->node.found_extent_tree && back->node.found_ref) {
8513                         rb_erase(&back->node.node, &rec->backref_tree);
8514                         free(back);
8515                 }
8516         }
8517         maybe_free_extent_rec(extent_cache, rec);
8518 out:
8519         return 0;
8520 }
8521
8522 static int delete_extent_records(struct btrfs_trans_handle *trans,
8523                                  struct btrfs_root *root,
8524                                  struct btrfs_path *path,
8525                                  u64 bytenr)
8526 {
8527         struct btrfs_key key;
8528         struct btrfs_key found_key;
8529         struct extent_buffer *leaf;
8530         int ret;
8531         int slot;
8532
8533
8534         key.objectid = bytenr;
8535         key.type = (u8)-1;
8536         key.offset = (u64)-1;
8537
8538         while(1) {
8539                 ret = btrfs_search_slot(trans, root->fs_info->extent_root,
8540                                         &key, path, 0, 1);
8541                 if (ret < 0)
8542                         break;
8543
8544                 if (ret > 0) {
8545                         ret = 0;
8546                         if (path->slots[0] == 0)
8547                                 break;
8548                         path->slots[0]--;
8549                 }
8550                 ret = 0;
8551
8552                 leaf = path->nodes[0];
8553                 slot = path->slots[0];
8554
8555                 btrfs_item_key_to_cpu(leaf, &found_key, slot);
8556                 if (found_key.objectid != bytenr)
8557                         break;
8558
8559                 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
8560                     found_key.type != BTRFS_METADATA_ITEM_KEY &&
8561                     found_key.type != BTRFS_TREE_BLOCK_REF_KEY &&
8562                     found_key.type != BTRFS_EXTENT_DATA_REF_KEY &&
8563                     found_key.type != BTRFS_EXTENT_REF_V0_KEY &&
8564                     found_key.type != BTRFS_SHARED_BLOCK_REF_KEY &&
8565                     found_key.type != BTRFS_SHARED_DATA_REF_KEY) {
8566                         btrfs_release_path(path);
8567                         if (found_key.type == 0) {
8568                                 if (found_key.offset == 0)
8569                                         break;
8570                                 key.offset = found_key.offset - 1;
8571                                 key.type = found_key.type;
8572                         }
8573                         key.type = found_key.type - 1;
8574                         key.offset = (u64)-1;
8575                         continue;
8576                 }
8577
8578                 fprintf(stderr, "repair deleting extent record: key %Lu %u %Lu\n",
8579                         found_key.objectid, found_key.type, found_key.offset);
8580
8581                 ret = btrfs_del_item(trans, root->fs_info->extent_root, path);
8582                 if (ret)
8583                         break;
8584                 btrfs_release_path(path);
8585
8586                 if (found_key.type == BTRFS_EXTENT_ITEM_KEY ||
8587                     found_key.type == BTRFS_METADATA_ITEM_KEY) {
8588                         u64 bytes = (found_key.type == BTRFS_EXTENT_ITEM_KEY) ?
8589                                 found_key.offset : root->fs_info->nodesize;
8590
8591                         ret = btrfs_update_block_group(trans, root, bytenr,
8592                                                        bytes, 0, 0);
8593                         if (ret)
8594                                 break;
8595                 }
8596         }
8597
8598         btrfs_release_path(path);
8599         return ret;
8600 }
8601
8602 /*
8603  * for a single backref, this will allocate a new extent
8604  * and add the backref to it.
8605  */
8606 static int record_extent(struct btrfs_trans_handle *trans,
8607                          struct btrfs_fs_info *info,
8608                          struct btrfs_path *path,
8609                          struct extent_record *rec,
8610                          struct extent_backref *back,
8611                          int allocated, u64 flags)
8612 {
8613         int ret = 0;
8614         struct btrfs_root *extent_root = info->extent_root;
8615         struct extent_buffer *leaf;
8616         struct btrfs_key ins_key;
8617         struct btrfs_extent_item *ei;
8618         struct data_backref *dback;
8619         struct btrfs_tree_block_info *bi;
8620
8621         if (!back->is_data)
8622                 rec->max_size = max_t(u64, rec->max_size,
8623                                     info->nodesize);
8624
8625         if (!allocated) {
8626                 u32 item_size = sizeof(*ei);
8627
8628                 if (!back->is_data)
8629                         item_size += sizeof(*bi);
8630
8631                 ins_key.objectid = rec->start;
8632                 ins_key.offset = rec->max_size;
8633                 ins_key.type = BTRFS_EXTENT_ITEM_KEY;
8634
8635                 ret = btrfs_insert_empty_item(trans, extent_root, path,
8636                                         &ins_key, item_size);
8637                 if (ret)
8638                         goto fail;
8639
8640                 leaf = path->nodes[0];
8641                 ei = btrfs_item_ptr(leaf, path->slots[0],
8642                                     struct btrfs_extent_item);
8643
8644                 btrfs_set_extent_refs(leaf, ei, 0);
8645                 btrfs_set_extent_generation(leaf, ei, rec->generation);
8646
8647                 if (back->is_data) {
8648                         btrfs_set_extent_flags(leaf, ei,
8649                                                BTRFS_EXTENT_FLAG_DATA);
8650                 } else {
8651                         struct btrfs_disk_key copy_key;;
8652
8653                         bi = (struct btrfs_tree_block_info *)(ei + 1);
8654                         memset_extent_buffer(leaf, 0, (unsigned long)bi,
8655                                              sizeof(*bi));
8656
8657                         btrfs_set_disk_key_objectid(&copy_key,
8658                                                     rec->info_objectid);
8659                         btrfs_set_disk_key_type(&copy_key, 0);
8660                         btrfs_set_disk_key_offset(&copy_key, 0);
8661
8662                         btrfs_set_tree_block_level(leaf, bi, rec->info_level);
8663                         btrfs_set_tree_block_key(leaf, bi, &copy_key);
8664
8665                         btrfs_set_extent_flags(leaf, ei,
8666                                                BTRFS_EXTENT_FLAG_TREE_BLOCK | flags);
8667                 }
8668
8669                 btrfs_mark_buffer_dirty(leaf);
8670                 ret = btrfs_update_block_group(trans, extent_root, rec->start,
8671                                                rec->max_size, 1, 0);
8672                 if (ret)
8673                         goto fail;
8674                 btrfs_release_path(path);
8675         }
8676
8677         if (back->is_data) {
8678                 u64 parent;
8679                 int i;
8680
8681                 dback = to_data_backref(back);
8682                 if (back->full_backref)
8683                         parent = dback->parent;
8684                 else
8685                         parent = 0;
8686
8687                 for (i = 0; i < dback->found_ref; i++) {
8688                         /* if parent != 0, we're doing a full backref
8689                          * passing BTRFS_FIRST_FREE_OBJECTID as the owner
8690                          * just makes the backref allocator create a data
8691                          * backref
8692                          */
8693                         ret = btrfs_inc_extent_ref(trans, info->extent_root,
8694                                                    rec->start, rec->max_size,
8695                                                    parent,
8696                                                    dback->root,
8697                                                    parent ?
8698                                                    BTRFS_FIRST_FREE_OBJECTID :
8699                                                    dback->owner,
8700                                                    dback->offset);
8701                         if (ret)
8702                                 break;
8703                 }
8704                 fprintf(stderr, "adding new data backref"
8705                                 " on %llu %s %llu owner %llu"
8706                                 " offset %llu found %d\n",
8707                                 (unsigned long long)rec->start,
8708                                 back->full_backref ?
8709                                 "parent" : "root",
8710                                 back->full_backref ?
8711                                 (unsigned long long)parent :
8712                                 (unsigned long long)dback->root,
8713                                 (unsigned long long)dback->owner,
8714                                 (unsigned long long)dback->offset,
8715                                 dback->found_ref);
8716         } else {
8717                 u64 parent;
8718                 struct tree_backref *tback;
8719
8720                 tback = to_tree_backref(back);
8721                 if (back->full_backref)
8722                         parent = tback->parent;
8723                 else
8724                         parent = 0;
8725
8726                 ret = btrfs_inc_extent_ref(trans, info->extent_root,
8727                                            rec->start, rec->max_size,
8728                                            parent, tback->root, 0, 0);
8729                 fprintf(stderr, "adding new tree backref on "
8730                         "start %llu len %llu parent %llu root %llu\n",
8731                         rec->start, rec->max_size, parent, tback->root);
8732         }
8733 fail:
8734         btrfs_release_path(path);
8735         return ret;
8736 }
8737
8738 static struct extent_entry *find_entry(struct list_head *entries,
8739                                        u64 bytenr, u64 bytes)
8740 {
8741         struct extent_entry *entry = NULL;
8742
8743         list_for_each_entry(entry, entries, list) {
8744                 if (entry->bytenr == bytenr && entry->bytes == bytes)
8745                         return entry;
8746         }
8747
8748         return NULL;
8749 }
8750
8751 static struct extent_entry *find_most_right_entry(struct list_head *entries)
8752 {
8753         struct extent_entry *entry, *best = NULL, *prev = NULL;
8754
8755         list_for_each_entry(entry, entries, list) {
8756                 /*
8757                  * If there are as many broken entries as entries then we know
8758                  * not to trust this particular entry.
8759                  */
8760                 if (entry->broken == entry->count)
8761                         continue;
8762
8763                 /*
8764                  * Special case, when there are only two entries and 'best' is
8765                  * the first one
8766                  */
8767                 if (!prev) {
8768                         best = entry;
8769                         prev = entry;
8770                         continue;
8771                 }
8772
8773                 /*
8774                  * If our current entry == best then we can't be sure our best
8775                  * is really the best, so we need to keep searching.
8776                  */
8777                 if (best && best->count == entry->count) {
8778                         prev = entry;
8779                         best = NULL;
8780                         continue;
8781                 }
8782
8783                 /* Prev == entry, not good enough, have to keep searching */
8784                 if (!prev->broken && prev->count == entry->count)
8785                         continue;
8786
8787                 if (!best)
8788                         best = (prev->count > entry->count) ? prev : entry;
8789                 else if (best->count < entry->count)
8790                         best = entry;
8791                 prev = entry;
8792         }
8793
8794         return best;
8795 }
8796
8797 static int repair_ref(struct btrfs_fs_info *info, struct btrfs_path *path,
8798                       struct data_backref *dback, struct extent_entry *entry)
8799 {
8800         struct btrfs_trans_handle *trans;
8801         struct btrfs_root *root;
8802         struct btrfs_file_extent_item *fi;
8803         struct extent_buffer *leaf;
8804         struct btrfs_key key;
8805         u64 bytenr, bytes;
8806         int ret, err;
8807
8808         key.objectid = dback->root;
8809         key.type = BTRFS_ROOT_ITEM_KEY;
8810         key.offset = (u64)-1;
8811         root = btrfs_read_fs_root(info, &key);
8812         if (IS_ERR(root)) {
8813                 fprintf(stderr, "Couldn't find root for our ref\n");
8814                 return -EINVAL;
8815         }
8816
8817         /*
8818          * The backref points to the original offset of the extent if it was
8819          * split, so we need to search down to the offset we have and then walk
8820          * forward until we find the backref we're looking for.
8821          */
8822         key.objectid = dback->owner;
8823         key.type = BTRFS_EXTENT_DATA_KEY;
8824         key.offset = dback->offset;
8825         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
8826         if (ret < 0) {
8827                 fprintf(stderr, "Error looking up ref %d\n", ret);
8828                 return ret;
8829         }
8830
8831         while (1) {
8832                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
8833                         ret = btrfs_next_leaf(root, path);
8834                         if (ret) {
8835                                 fprintf(stderr, "Couldn't find our ref, next\n");
8836                                 return -EINVAL;
8837                         }
8838                 }
8839                 leaf = path->nodes[0];
8840                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
8841                 if (key.objectid != dback->owner ||
8842                     key.type != BTRFS_EXTENT_DATA_KEY) {
8843                         fprintf(stderr, "Couldn't find our ref, search\n");
8844                         return -EINVAL;
8845                 }
8846                 fi = btrfs_item_ptr(leaf, path->slots[0],
8847                                     struct btrfs_file_extent_item);
8848                 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
8849                 bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
8850
8851                 if (bytenr == dback->disk_bytenr && bytes == dback->bytes)
8852                         break;
8853                 path->slots[0]++;
8854         }
8855
8856         btrfs_release_path(path);
8857
8858         trans = btrfs_start_transaction(root, 1);
8859         if (IS_ERR(trans))
8860                 return PTR_ERR(trans);
8861
8862         /*
8863          * Ok we have the key of the file extent we want to fix, now we can cow
8864          * down to the thing and fix it.
8865          */
8866         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
8867         if (ret < 0) {
8868                 fprintf(stderr, "Error cowing down to ref [%Lu, %u, %Lu]: %d\n",
8869                         key.objectid, key.type, key.offset, ret);
8870                 goto out;
8871         }
8872         if (ret > 0) {
8873                 fprintf(stderr, "Well that's odd, we just found this key "
8874                         "[%Lu, %u, %Lu]\n", key.objectid, key.type,
8875                         key.offset);
8876                 ret = -EINVAL;
8877                 goto out;
8878         }
8879         leaf = path->nodes[0];
8880         fi = btrfs_item_ptr(leaf, path->slots[0],
8881                             struct btrfs_file_extent_item);
8882
8883         if (btrfs_file_extent_compression(leaf, fi) &&
8884             dback->disk_bytenr != entry->bytenr) {
8885                 fprintf(stderr, "Ref doesn't match the record start and is "
8886                         "compressed, please take a btrfs-image of this file "
8887                         "system and send it to a btrfs developer so they can "
8888                         "complete this functionality for bytenr %Lu\n",
8889                         dback->disk_bytenr);
8890                 ret = -EINVAL;
8891                 goto out;
8892         }
8893
8894         if (dback->node.broken && dback->disk_bytenr != entry->bytenr) {
8895                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
8896         } else if (dback->disk_bytenr > entry->bytenr) {
8897                 u64 off_diff, offset;
8898
8899                 off_diff = dback->disk_bytenr - entry->bytenr;
8900                 offset = btrfs_file_extent_offset(leaf, fi);
8901                 if (dback->disk_bytenr + offset +
8902                     btrfs_file_extent_num_bytes(leaf, fi) >
8903                     entry->bytenr + entry->bytes) {
8904                         fprintf(stderr, "Ref is past the entry end, please "
8905                                 "take a btrfs-image of this file system and "
8906                                 "send it to a btrfs developer, ref %Lu\n",
8907                                 dback->disk_bytenr);
8908                         ret = -EINVAL;
8909                         goto out;
8910                 }
8911                 offset += off_diff;
8912                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
8913                 btrfs_set_file_extent_offset(leaf, fi, offset);
8914         } else if (dback->disk_bytenr < entry->bytenr) {
8915                 u64 offset;
8916
8917                 offset = btrfs_file_extent_offset(leaf, fi);
8918                 if (dback->disk_bytenr + offset < entry->bytenr) {
8919                         fprintf(stderr, "Ref is before the entry start, please"
8920                                 " take a btrfs-image of this file system and "
8921                                 "send it to a btrfs developer, ref %Lu\n",
8922                                 dback->disk_bytenr);
8923                         ret = -EINVAL;
8924                         goto out;
8925                 }
8926
8927                 offset += dback->disk_bytenr;
8928                 offset -= entry->bytenr;
8929                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
8930                 btrfs_set_file_extent_offset(leaf, fi, offset);
8931         }
8932
8933         btrfs_set_file_extent_disk_num_bytes(leaf, fi, entry->bytes);
8934
8935         /*
8936          * Chances are if disk_num_bytes were wrong then so is ram_bytes, but
8937          * only do this if we aren't using compression, otherwise it's a
8938          * trickier case.
8939          */
8940         if (!btrfs_file_extent_compression(leaf, fi))
8941                 btrfs_set_file_extent_ram_bytes(leaf, fi, entry->bytes);
8942         else
8943                 printf("ram bytes may be wrong?\n");
8944         btrfs_mark_buffer_dirty(leaf);
8945 out:
8946         err = btrfs_commit_transaction(trans, root);
8947         btrfs_release_path(path);
8948         return ret ? ret : err;
8949 }
8950
8951 static int verify_backrefs(struct btrfs_fs_info *info, struct btrfs_path *path,
8952                            struct extent_record *rec)
8953 {
8954         struct extent_backref *back, *tmp;
8955         struct data_backref *dback;
8956         struct extent_entry *entry, *best = NULL;
8957         LIST_HEAD(entries);
8958         int nr_entries = 0;
8959         int broken_entries = 0;
8960         int ret = 0;
8961         short mismatch = 0;
8962
8963         /*
8964          * Metadata is easy and the backrefs should always agree on bytenr and
8965          * size, if not we've got bigger issues.
8966          */
8967         if (rec->metadata)
8968                 return 0;
8969
8970         rbtree_postorder_for_each_entry_safe(back, tmp,
8971                                              &rec->backref_tree, node) {
8972                 if (back->full_backref || !back->is_data)
8973                         continue;
8974
8975                 dback = to_data_backref(back);
8976
8977                 /*
8978                  * We only pay attention to backrefs that we found a real
8979                  * backref for.
8980                  */
8981                 if (dback->found_ref == 0)
8982                         continue;
8983
8984                 /*
8985                  * For now we only catch when the bytes don't match, not the
8986                  * bytenr.  We can easily do this at the same time, but I want
8987                  * to have a fs image to test on before we just add repair
8988                  * functionality willy-nilly so we know we won't screw up the
8989                  * repair.
8990                  */
8991
8992                 entry = find_entry(&entries, dback->disk_bytenr,
8993                                    dback->bytes);
8994                 if (!entry) {
8995                         entry = malloc(sizeof(struct extent_entry));
8996                         if (!entry) {
8997                                 ret = -ENOMEM;
8998                                 goto out;
8999                         }
9000                         memset(entry, 0, sizeof(*entry));
9001                         entry->bytenr = dback->disk_bytenr;
9002                         entry->bytes = dback->bytes;
9003                         list_add_tail(&entry->list, &entries);
9004                         nr_entries++;
9005                 }
9006
9007                 /*
9008                  * If we only have on entry we may think the entries agree when
9009                  * in reality they don't so we have to do some extra checking.
9010                  */
9011                 if (dback->disk_bytenr != rec->start ||
9012                     dback->bytes != rec->nr || back->broken)
9013                         mismatch = 1;
9014
9015                 if (back->broken) {
9016                         entry->broken++;
9017                         broken_entries++;
9018                 }
9019
9020                 entry->count++;
9021         }
9022
9023         /* Yay all the backrefs agree, carry on good sir */
9024         if (nr_entries <= 1 && !mismatch)
9025                 goto out;
9026
9027         fprintf(stderr, "attempting to repair backref discrepency for bytenr "
9028                 "%Lu\n", rec->start);
9029
9030         /*
9031          * First we want to see if the backrefs can agree amongst themselves who
9032          * is right, so figure out which one of the entries has the highest
9033          * count.
9034          */
9035         best = find_most_right_entry(&entries);
9036
9037         /*
9038          * Ok so we may have an even split between what the backrefs think, so
9039          * this is where we use the extent ref to see what it thinks.
9040          */
9041         if (!best) {
9042                 entry = find_entry(&entries, rec->start, rec->nr);
9043                 if (!entry && (!broken_entries || !rec->found_rec)) {
9044                         fprintf(stderr, "Backrefs don't agree with each other "
9045                                 "and extent record doesn't agree with anybody,"
9046                                 " so we can't fix bytenr %Lu bytes %Lu\n",
9047                                 rec->start, rec->nr);
9048                         ret = -EINVAL;
9049                         goto out;
9050                 } else if (!entry) {
9051                         /*
9052                          * Ok our backrefs were broken, we'll assume this is the
9053                          * correct value and add an entry for this range.
9054                          */
9055                         entry = malloc(sizeof(struct extent_entry));
9056                         if (!entry) {
9057                                 ret = -ENOMEM;
9058                                 goto out;
9059                         }
9060                         memset(entry, 0, sizeof(*entry));
9061                         entry->bytenr = rec->start;
9062                         entry->bytes = rec->nr;
9063                         list_add_tail(&entry->list, &entries);
9064                         nr_entries++;
9065                 }
9066                 entry->count++;
9067                 best = find_most_right_entry(&entries);
9068                 if (!best) {
9069                         fprintf(stderr, "Backrefs and extent record evenly "
9070                                 "split on who is right, this is going to "
9071                                 "require user input to fix bytenr %Lu bytes "
9072                                 "%Lu\n", rec->start, rec->nr);
9073                         ret = -EINVAL;
9074                         goto out;
9075                 }
9076         }
9077
9078         /*
9079          * I don't think this can happen currently as we'll abort() if we catch
9080          * this case higher up, but in case somebody removes that we still can't
9081          * deal with it properly here yet, so just bail out of that's the case.
9082          */
9083         if (best->bytenr != rec->start) {
9084                 fprintf(stderr, "Extent start and backref starts don't match, "
9085                         "please use btrfs-image on this file system and send "
9086                         "it to a btrfs developer so they can make fsck fix "
9087                         "this particular case.  bytenr is %Lu, bytes is %Lu\n",
9088                         rec->start, rec->nr);
9089                 ret = -EINVAL;
9090                 goto out;
9091         }
9092
9093         /*
9094          * Ok great we all agreed on an extent record, let's go find the real
9095          * references and fix up the ones that don't match.
9096          */
9097         rbtree_postorder_for_each_entry_safe(back, tmp,
9098                                              &rec->backref_tree, node) {
9099                 if (back->full_backref || !back->is_data)
9100                         continue;
9101
9102                 dback = to_data_backref(back);
9103
9104                 /*
9105                  * Still ignoring backrefs that don't have a real ref attached
9106                  * to them.
9107                  */
9108                 if (dback->found_ref == 0)
9109                         continue;
9110
9111                 if (dback->bytes == best->bytes &&
9112                     dback->disk_bytenr == best->bytenr)
9113                         continue;
9114
9115                 ret = repair_ref(info, path, dback, best);
9116                 if (ret)
9117                         goto out;
9118         }
9119
9120         /*
9121          * Ok we messed with the actual refs, which means we need to drop our
9122          * entire cache and go back and rescan.  I know this is a huge pain and
9123          * adds a lot of extra work, but it's the only way to be safe.  Once all
9124          * the backrefs agree we may not need to do anything to the extent
9125          * record itself.
9126          */
9127         ret = -EAGAIN;
9128 out:
9129         while (!list_empty(&entries)) {
9130                 entry = list_entry(entries.next, struct extent_entry, list);
9131                 list_del_init(&entry->list);
9132                 free(entry);
9133         }
9134         return ret;
9135 }
9136
9137 static int process_duplicates(struct cache_tree *extent_cache,
9138                               struct extent_record *rec)
9139 {
9140         struct extent_record *good, *tmp;
9141         struct cache_extent *cache;
9142         int ret;
9143
9144         /*
9145          * If we found a extent record for this extent then return, or if we
9146          * have more than one duplicate we are likely going to need to delete
9147          * something.
9148          */
9149         if (rec->found_rec || rec->num_duplicates > 1)
9150                 return 0;
9151
9152         /* Shouldn't happen but just in case */
9153         BUG_ON(!rec->num_duplicates);
9154
9155         /*
9156          * So this happens if we end up with a backref that doesn't match the
9157          * actual extent entry.  So either the backref is bad or the extent
9158          * entry is bad.  Either way we want to have the extent_record actually
9159          * reflect what we found in the extent_tree, so we need to take the
9160          * duplicate out and use that as the extent_record since the only way we
9161          * get a duplicate is if we find a real life BTRFS_EXTENT_ITEM_KEY.
9162          */
9163         remove_cache_extent(extent_cache, &rec->cache);
9164
9165         good = to_extent_record(rec->dups.next);
9166         list_del_init(&good->list);
9167         INIT_LIST_HEAD(&good->backrefs);
9168         INIT_LIST_HEAD(&good->dups);
9169         good->cache.start = good->start;
9170         good->cache.size = good->nr;
9171         good->content_checked = 0;
9172         good->owner_ref_checked = 0;
9173         good->num_duplicates = 0;
9174         good->refs = rec->refs;
9175         list_splice_init(&rec->backrefs, &good->backrefs);
9176         while (1) {
9177                 cache = lookup_cache_extent(extent_cache, good->start,
9178                                             good->nr);
9179                 if (!cache)
9180                         break;
9181                 tmp = container_of(cache, struct extent_record, cache);
9182
9183                 /*
9184                  * If we find another overlapping extent and it's found_rec is
9185                  * set then it's a duplicate and we need to try and delete
9186                  * something.
9187                  */
9188                 if (tmp->found_rec || tmp->num_duplicates > 0) {
9189                         if (list_empty(&good->list))
9190                                 list_add_tail(&good->list,
9191                                               &duplicate_extents);
9192                         good->num_duplicates += tmp->num_duplicates + 1;
9193                         list_splice_init(&tmp->dups, &good->dups);
9194                         list_del_init(&tmp->list);
9195                         list_add_tail(&tmp->list, &good->dups);
9196                         remove_cache_extent(extent_cache, &tmp->cache);
9197                         continue;
9198                 }
9199
9200                 /*
9201                  * Ok we have another non extent item backed extent rec, so lets
9202                  * just add it to this extent and carry on like we did above.
9203                  */
9204                 good->refs += tmp->refs;
9205                 list_splice_init(&tmp->backrefs, &good->backrefs);
9206                 remove_cache_extent(extent_cache, &tmp->cache);
9207                 free(tmp);
9208         }
9209         ret = insert_cache_extent(extent_cache, &good->cache);
9210         BUG_ON(ret);
9211         free(rec);
9212         return good->num_duplicates ? 0 : 1;
9213 }
9214
9215 static int delete_duplicate_records(struct btrfs_root *root,
9216                                     struct extent_record *rec)
9217 {
9218         struct btrfs_trans_handle *trans;
9219         LIST_HEAD(delete_list);
9220         struct btrfs_path path;
9221         struct extent_record *tmp, *good, *n;
9222         int nr_del = 0;
9223         int ret = 0, err;
9224         struct btrfs_key key;
9225
9226         btrfs_init_path(&path);
9227
9228         good = rec;
9229         /* Find the record that covers all of the duplicates. */
9230         list_for_each_entry(tmp, &rec->dups, list) {
9231                 if (good->start < tmp->start)
9232                         continue;
9233                 if (good->nr > tmp->nr)
9234                         continue;
9235
9236                 if (tmp->start + tmp->nr < good->start + good->nr) {
9237                         fprintf(stderr, "Ok we have overlapping extents that "
9238                                 "aren't completely covered by each other, this "
9239                                 "is going to require more careful thought.  "
9240                                 "The extents are [%Lu-%Lu] and [%Lu-%Lu]\n",
9241                                 tmp->start, tmp->nr, good->start, good->nr);
9242                         abort();
9243                 }
9244                 good = tmp;
9245         }
9246
9247         if (good != rec)
9248                 list_add_tail(&rec->list, &delete_list);
9249
9250         list_for_each_entry_safe(tmp, n, &rec->dups, list) {
9251                 if (tmp == good)
9252                         continue;
9253                 list_move_tail(&tmp->list, &delete_list);
9254         }
9255
9256         root = root->fs_info->extent_root;
9257         trans = btrfs_start_transaction(root, 1);
9258         if (IS_ERR(trans)) {
9259                 ret = PTR_ERR(trans);
9260                 goto out;
9261         }
9262
9263         list_for_each_entry(tmp, &delete_list, list) {
9264                 if (tmp->found_rec == 0)
9265                         continue;
9266                 key.objectid = tmp->start;
9267                 key.type = BTRFS_EXTENT_ITEM_KEY;
9268                 key.offset = tmp->nr;
9269
9270                 /* Shouldn't happen but just in case */
9271                 if (tmp->metadata) {
9272                         fprintf(stderr, "Well this shouldn't happen, extent "
9273                                 "record overlaps but is metadata? "
9274                                 "[%Lu, %Lu]\n", tmp->start, tmp->nr);
9275                         abort();
9276                 }
9277
9278                 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
9279                 if (ret) {
9280                         if (ret > 0)
9281                                 ret = -EINVAL;
9282                         break;
9283                 }
9284                 ret = btrfs_del_item(trans, root, &path);
9285                 if (ret)
9286                         break;
9287                 btrfs_release_path(&path);
9288                 nr_del++;
9289         }
9290         err = btrfs_commit_transaction(trans, root);
9291         if (err && !ret)
9292                 ret = err;
9293 out:
9294         while (!list_empty(&delete_list)) {
9295                 tmp = to_extent_record(delete_list.next);
9296                 list_del_init(&tmp->list);
9297                 if (tmp == rec)
9298                         continue;
9299                 free(tmp);
9300         }
9301
9302         while (!list_empty(&rec->dups)) {
9303                 tmp = to_extent_record(rec->dups.next);
9304                 list_del_init(&tmp->list);
9305                 free(tmp);
9306         }
9307
9308         btrfs_release_path(&path);
9309
9310         if (!ret && !nr_del)
9311                 rec->num_duplicates = 0;
9312
9313         return ret ? ret : nr_del;
9314 }
9315
9316 static int find_possible_backrefs(struct btrfs_fs_info *info,
9317                                   struct btrfs_path *path,
9318                                   struct cache_tree *extent_cache,
9319                                   struct extent_record *rec)
9320 {
9321         struct btrfs_root *root;
9322         struct extent_backref *back, *tmp;
9323         struct data_backref *dback;
9324         struct cache_extent *cache;
9325         struct btrfs_file_extent_item *fi;
9326         struct btrfs_key key;
9327         u64 bytenr, bytes;
9328         int ret;
9329
9330         rbtree_postorder_for_each_entry_safe(back, tmp,
9331                                              &rec->backref_tree, node) {
9332                 /* Don't care about full backrefs (poor unloved backrefs) */
9333                 if (back->full_backref || !back->is_data)
9334                         continue;
9335
9336                 dback = to_data_backref(back);
9337
9338                 /* We found this one, we don't need to do a lookup */
9339                 if (dback->found_ref)
9340                         continue;
9341
9342                 key.objectid = dback->root;
9343                 key.type = BTRFS_ROOT_ITEM_KEY;
9344                 key.offset = (u64)-1;
9345
9346                 root = btrfs_read_fs_root(info, &key);
9347
9348                 /* No root, definitely a bad ref, skip */
9349                 if (IS_ERR(root) && PTR_ERR(root) == -ENOENT)
9350                         continue;
9351                 /* Other err, exit */
9352                 if (IS_ERR(root))
9353                         return PTR_ERR(root);
9354
9355                 key.objectid = dback->owner;
9356                 key.type = BTRFS_EXTENT_DATA_KEY;
9357                 key.offset = dback->offset;
9358                 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
9359                 if (ret) {
9360                         btrfs_release_path(path);
9361                         if (ret < 0)
9362                                 return ret;
9363                         /* Didn't find it, we can carry on */
9364                         ret = 0;
9365                         continue;
9366                 }
9367
9368                 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
9369                                     struct btrfs_file_extent_item);
9370                 bytenr = btrfs_file_extent_disk_bytenr(path->nodes[0], fi);
9371                 bytes = btrfs_file_extent_disk_num_bytes(path->nodes[0], fi);
9372                 btrfs_release_path(path);
9373                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
9374                 if (cache) {
9375                         struct extent_record *tmp;
9376                         tmp = container_of(cache, struct extent_record, cache);
9377
9378                         /*
9379                          * If we found an extent record for the bytenr for this
9380                          * particular backref then we can't add it to our
9381                          * current extent record.  We only want to add backrefs
9382                          * that don't have a corresponding extent item in the
9383                          * extent tree since they likely belong to this record
9384                          * and we need to fix it if it doesn't match bytenrs.
9385                          */
9386                         if  (tmp->found_rec)
9387                                 continue;
9388                 }
9389
9390                 dback->found_ref += 1;
9391                 dback->disk_bytenr = bytenr;
9392                 dback->bytes = bytes;
9393
9394                 /*
9395                  * Set this so the verify backref code knows not to trust the
9396                  * values in this backref.
9397                  */
9398                 back->broken = 1;
9399         }
9400
9401         return 0;
9402 }
9403
9404 /*
9405  * Record orphan data ref into corresponding root.
9406  *
9407  * Return 0 if the extent item contains data ref and recorded.
9408  * Return 1 if the extent item contains no useful data ref
9409  *   On that case, it may contains only shared_dataref or metadata backref
9410  *   or the file extent exists(this should be handled by the extent bytenr
9411  *   recovery routine)
9412  * Return <0 if something goes wrong.
9413  */
9414 static int record_orphan_data_extents(struct btrfs_fs_info *fs_info,
9415                                       struct extent_record *rec)
9416 {
9417         struct btrfs_key key;
9418         struct btrfs_root *dest_root;
9419         struct extent_backref *back, *tmp;
9420         struct data_backref *dback;
9421         struct orphan_data_extent *orphan;
9422         struct btrfs_path path;
9423         int recorded_data_ref = 0;
9424         int ret = 0;
9425
9426         if (rec->metadata)
9427                 return 1;
9428         btrfs_init_path(&path);
9429         rbtree_postorder_for_each_entry_safe(back, tmp,
9430                                              &rec->backref_tree, node) {
9431                 if (back->full_backref || !back->is_data ||
9432                     !back->found_extent_tree)
9433                         continue;
9434                 dback = to_data_backref(back);
9435                 if (dback->found_ref)
9436                         continue;
9437                 key.objectid = dback->root;
9438                 key.type = BTRFS_ROOT_ITEM_KEY;
9439                 key.offset = (u64)-1;
9440
9441                 dest_root = btrfs_read_fs_root(fs_info, &key);
9442
9443                 /* For non-exist root we just skip it */
9444                 if (IS_ERR(dest_root) || !dest_root)
9445                         continue;
9446
9447                 key.objectid = dback->owner;
9448                 key.type = BTRFS_EXTENT_DATA_KEY;
9449                 key.offset = dback->offset;
9450
9451                 ret = btrfs_search_slot(NULL, dest_root, &key, &path, 0, 0);
9452                 btrfs_release_path(&path);
9453                 /*
9454                  * For ret < 0, it's OK since the fs-tree may be corrupted,
9455                  * we need to record it for inode/file extent rebuild.
9456                  * For ret > 0, we record it only for file extent rebuild.
9457                  * For ret == 0, the file extent exists but only bytenr
9458                  * mismatch, let the original bytenr fix routine to handle,
9459                  * don't record it.
9460                  */
9461                 if (ret == 0)
9462                         continue;
9463                 ret = 0;
9464                 orphan = malloc(sizeof(*orphan));
9465                 if (!orphan) {
9466                         ret = -ENOMEM;
9467                         goto out;
9468                 }
9469                 INIT_LIST_HEAD(&orphan->list);
9470                 orphan->root = dback->root;
9471                 orphan->objectid = dback->owner;
9472                 orphan->offset = dback->offset;
9473                 orphan->disk_bytenr = rec->cache.start;
9474                 orphan->disk_len = rec->cache.size;
9475                 list_add(&dest_root->orphan_data_extents, &orphan->list);
9476                 recorded_data_ref = 1;
9477         }
9478 out:
9479         btrfs_release_path(&path);
9480         if (!ret)
9481                 return !recorded_data_ref;
9482         else
9483                 return ret;
9484 }
9485
9486 /*
9487  * when an incorrect extent item is found, this will delete
9488  * all of the existing entries for it and recreate them
9489  * based on what the tree scan found.
9490  */
9491 static int fixup_extent_refs(struct btrfs_fs_info *info,
9492                              struct cache_tree *extent_cache,
9493                              struct extent_record *rec)
9494 {
9495         struct btrfs_trans_handle *trans = NULL;
9496         int ret;
9497         struct btrfs_path path;
9498         struct cache_extent *cache;
9499         struct extent_backref *back, *tmp;
9500         int allocated = 0;
9501         u64 flags = 0;
9502
9503         if (rec->flag_block_full_backref)
9504                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
9505
9506         btrfs_init_path(&path);
9507         if (rec->refs != rec->extent_item_refs && !rec->metadata) {
9508                 /*
9509                  * Sometimes the backrefs themselves are so broken they don't
9510                  * get attached to any meaningful rec, so first go back and
9511                  * check any of our backrefs that we couldn't find and throw
9512                  * them into the list if we find the backref so that
9513                  * verify_backrefs can figure out what to do.
9514                  */
9515                 ret = find_possible_backrefs(info, &path, extent_cache, rec);
9516                 if (ret < 0)
9517                         goto out;
9518         }
9519
9520         /* step one, make sure all of the backrefs agree */
9521         ret = verify_backrefs(info, &path, rec);
9522         if (ret < 0)
9523                 goto out;
9524
9525         trans = btrfs_start_transaction(info->extent_root, 1);
9526         if (IS_ERR(trans)) {
9527                 ret = PTR_ERR(trans);
9528                 goto out;
9529         }
9530
9531         /* step two, delete all the existing records */
9532         ret = delete_extent_records(trans, info->extent_root, &path,
9533                                     rec->start);
9534
9535         if (ret < 0)
9536                 goto out;
9537
9538         /* was this block corrupt?  If so, don't add references to it */
9539         cache = lookup_cache_extent(info->corrupt_blocks,
9540                                     rec->start, rec->max_size);
9541         if (cache) {
9542                 ret = 0;
9543                 goto out;
9544         }
9545
9546         /* step three, recreate all the refs we did find */
9547         rbtree_postorder_for_each_entry_safe(back, tmp,
9548                                              &rec->backref_tree, node) {
9549                 /*
9550                  * if we didn't find any references, don't create a
9551                  * new extent record
9552                  */
9553                 if (!back->found_ref)
9554                         continue;
9555
9556                 rec->bad_full_backref = 0;
9557                 ret = record_extent(trans, info, &path, rec, back, allocated, flags);
9558                 allocated = 1;
9559
9560                 if (ret)
9561                         goto out;
9562         }
9563 out:
9564         if (trans) {
9565                 int err = btrfs_commit_transaction(trans, info->extent_root);
9566                 if (!ret)
9567                         ret = err;
9568         }
9569
9570         if (!ret)
9571                 fprintf(stderr, "Repaired extent references for %llu\n",
9572                                 (unsigned long long)rec->start);
9573
9574         btrfs_release_path(&path);
9575         return ret;
9576 }
9577
9578 static int fixup_extent_flags(struct btrfs_fs_info *fs_info,
9579                               struct extent_record *rec)
9580 {
9581         struct btrfs_trans_handle *trans;
9582         struct btrfs_root *root = fs_info->extent_root;
9583         struct btrfs_path path;
9584         struct btrfs_extent_item *ei;
9585         struct btrfs_key key;
9586         u64 flags;
9587         int ret = 0;
9588
9589         key.objectid = rec->start;
9590         if (rec->metadata) {
9591                 key.type = BTRFS_METADATA_ITEM_KEY;
9592                 key.offset = rec->info_level;
9593         } else {
9594                 key.type = BTRFS_EXTENT_ITEM_KEY;
9595                 key.offset = rec->max_size;
9596         }
9597
9598         trans = btrfs_start_transaction(root, 0);
9599         if (IS_ERR(trans))
9600                 return PTR_ERR(trans);
9601
9602         btrfs_init_path(&path);
9603         ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
9604         if (ret < 0) {
9605                 btrfs_release_path(&path);
9606                 btrfs_commit_transaction(trans, root);
9607                 return ret;
9608         } else if (ret) {
9609                 fprintf(stderr, "Didn't find extent for %llu\n",
9610                         (unsigned long long)rec->start);
9611                 btrfs_release_path(&path);
9612                 btrfs_commit_transaction(trans, root);
9613                 return -ENOENT;
9614         }
9615
9616         ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
9617                             struct btrfs_extent_item);
9618         flags = btrfs_extent_flags(path.nodes[0], ei);
9619         if (rec->flag_block_full_backref) {
9620                 fprintf(stderr, "setting full backref on %llu\n",
9621                         (unsigned long long)key.objectid);
9622                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
9623         } else {
9624                 fprintf(stderr, "clearing full backref on %llu\n",
9625                         (unsigned long long)key.objectid);
9626                 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
9627         }
9628         btrfs_set_extent_flags(path.nodes[0], ei, flags);
9629         btrfs_mark_buffer_dirty(path.nodes[0]);
9630         btrfs_release_path(&path);
9631         ret = btrfs_commit_transaction(trans, root);
9632         if (!ret)
9633                 fprintf(stderr, "Repaired extent flags for %llu\n",
9634                                 (unsigned long long)rec->start);
9635
9636         return ret;
9637 }
9638
9639 /* right now we only prune from the extent allocation tree */
9640 static int prune_one_block(struct btrfs_trans_handle *trans,
9641                            struct btrfs_fs_info *info,
9642                            struct btrfs_corrupt_block *corrupt)
9643 {
9644         int ret;
9645         struct btrfs_path path;
9646         struct extent_buffer *eb;
9647         u64 found;
9648         int slot;
9649         int nritems;
9650         int level = corrupt->level + 1;
9651
9652         btrfs_init_path(&path);
9653 again:
9654         /* we want to stop at the parent to our busted block */
9655         path.lowest_level = level;
9656
9657         ret = btrfs_search_slot(trans, info->extent_root,
9658                                 &corrupt->key, &path, -1, 1);
9659
9660         if (ret < 0)
9661                 goto out;
9662
9663         eb = path.nodes[level];
9664         if (!eb) {
9665                 ret = -ENOENT;
9666                 goto out;
9667         }
9668
9669         /*
9670          * hopefully the search gave us the block we want to prune,
9671          * lets try that first
9672          */
9673         slot = path.slots[level];
9674         found =  btrfs_node_blockptr(eb, slot);
9675         if (found == corrupt->cache.start)
9676                 goto del_ptr;
9677
9678         nritems = btrfs_header_nritems(eb);
9679
9680         /* the search failed, lets scan this node and hope we find it */
9681         for (slot = 0; slot < nritems; slot++) {
9682                 found =  btrfs_node_blockptr(eb, slot);
9683                 if (found == corrupt->cache.start)
9684                         goto del_ptr;
9685         }
9686         /*
9687          * we couldn't find the bad block.  TODO, search all the nodes for pointers
9688          * to this block
9689          */
9690         if (eb == info->extent_root->node) {
9691                 ret = -ENOENT;
9692                 goto out;
9693         } else {
9694                 level++;
9695                 btrfs_release_path(&path);
9696                 goto again;
9697         }
9698
9699 del_ptr:
9700         printk("deleting pointer to block %Lu\n", corrupt->cache.start);
9701         ret = btrfs_del_ptr(info->extent_root, &path, level, slot);
9702
9703 out:
9704         btrfs_release_path(&path);
9705         return ret;
9706 }
9707
9708 static int prune_corrupt_blocks(struct btrfs_fs_info *info)
9709 {
9710         struct btrfs_trans_handle *trans = NULL;
9711         struct cache_extent *cache;
9712         struct btrfs_corrupt_block *corrupt;
9713
9714         while (1) {
9715                 cache = search_cache_extent(info->corrupt_blocks, 0);
9716                 if (!cache)
9717                         break;
9718                 if (!trans) {
9719                         trans = btrfs_start_transaction(info->extent_root, 1);
9720                         if (IS_ERR(trans))
9721                                 return PTR_ERR(trans);
9722                 }
9723                 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
9724                 prune_one_block(trans, info, corrupt);
9725                 remove_cache_extent(info->corrupt_blocks, cache);
9726         }
9727         if (trans)
9728                 return btrfs_commit_transaction(trans, info->extent_root);
9729         return 0;
9730 }
9731
9732 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info)
9733 {
9734         struct btrfs_block_group_cache *cache;
9735         u64 start, end;
9736         int ret;
9737
9738         while (1) {
9739                 ret = find_first_extent_bit(&fs_info->free_space_cache, 0,
9740                                             &start, &end, EXTENT_DIRTY);
9741                 if (ret)
9742                         break;
9743                 clear_extent_dirty(&fs_info->free_space_cache, start, end);
9744         }
9745
9746         start = 0;
9747         while (1) {
9748                 cache = btrfs_lookup_first_block_group(fs_info, start);
9749                 if (!cache)
9750                         break;
9751                 if (cache->cached)
9752                         cache->cached = 0;
9753                 start = cache->key.objectid + cache->key.offset;
9754         }
9755 }
9756
9757 static int check_extent_refs(struct btrfs_root *root,
9758                              struct cache_tree *extent_cache)
9759 {
9760         struct extent_record *rec;
9761         struct cache_extent *cache;
9762         int ret = 0;
9763         int had_dups = 0;
9764
9765         if (repair) {
9766                 /*
9767                  * if we're doing a repair, we have to make sure
9768                  * we don't allocate from the problem extents.
9769                  * In the worst case, this will be all the
9770                  * extents in the FS
9771                  */
9772                 cache = search_cache_extent(extent_cache, 0);
9773                 while(cache) {
9774                         rec = container_of(cache, struct extent_record, cache);
9775                         set_extent_dirty(root->fs_info->excluded_extents,
9776                                          rec->start,
9777                                          rec->start + rec->max_size - 1);
9778                         cache = next_cache_extent(cache);
9779                 }
9780
9781                 /* pin down all the corrupted blocks too */
9782                 cache = search_cache_extent(root->fs_info->corrupt_blocks, 0);
9783                 while(cache) {
9784                         set_extent_dirty(root->fs_info->excluded_extents,
9785                                          cache->start,
9786                                          cache->start + cache->size - 1);
9787                         cache = next_cache_extent(cache);
9788                 }
9789                 prune_corrupt_blocks(root->fs_info);
9790                 reset_cached_block_groups(root->fs_info);
9791         }
9792
9793         reset_cached_block_groups(root->fs_info);
9794
9795         /*
9796          * We need to delete any duplicate entries we find first otherwise we
9797          * could mess up the extent tree when we have backrefs that actually
9798          * belong to a different extent item and not the weird duplicate one.
9799          */
9800         while (repair && !list_empty(&duplicate_extents)) {
9801                 rec = to_extent_record(duplicate_extents.next);
9802                 list_del_init(&rec->list);
9803
9804                 /* Sometimes we can find a backref before we find an actual
9805                  * extent, so we need to process it a little bit to see if there
9806                  * truly are multiple EXTENT_ITEM_KEY's for the same range, or
9807                  * if this is a backref screwup.  If we need to delete stuff
9808                  * process_duplicates() will return 0, otherwise it will return
9809                  * 1 and we
9810                  */
9811                 if (process_duplicates(extent_cache, rec))
9812                         continue;
9813                 ret = delete_duplicate_records(root, rec);
9814                 if (ret < 0)
9815                         return ret;
9816                 /*
9817                  * delete_duplicate_records will return the number of entries
9818                  * deleted, so if it's greater than 0 then we know we actually
9819                  * did something and we need to remove.
9820                  */
9821                 if (ret)
9822                         had_dups = 1;
9823         }
9824
9825         if (had_dups)
9826                 return -EAGAIN;
9827
9828         while(1) {
9829                 int cur_err = 0;
9830                 int fix = 0;
9831
9832                 cache = search_cache_extent(extent_cache, 0);
9833                 if (!cache)
9834                         break;
9835                 rec = container_of(cache, struct extent_record, cache);
9836                 if (rec->num_duplicates) {
9837                         fprintf(stderr, "extent item %llu has multiple extent "
9838                                 "items\n", (unsigned long long)rec->start);
9839                         cur_err = 1;
9840                 }
9841
9842                 if (rec->refs != rec->extent_item_refs) {
9843                         fprintf(stderr, "ref mismatch on [%llu %llu] ",
9844                                 (unsigned long long)rec->start,
9845                                 (unsigned long long)rec->nr);
9846                         fprintf(stderr, "extent item %llu, found %llu\n",
9847                                 (unsigned long long)rec->extent_item_refs,
9848                                 (unsigned long long)rec->refs);
9849                         ret = record_orphan_data_extents(root->fs_info, rec);
9850                         if (ret < 0)
9851                                 goto repair_abort;
9852                         fix = ret;
9853                         cur_err = 1;
9854                 }
9855                 if (all_backpointers_checked(rec, 1)) {
9856                         fprintf(stderr, "backpointer mismatch on [%llu %llu]\n",
9857                                 (unsigned long long)rec->start,
9858                                 (unsigned long long)rec->nr);
9859                         fix = 1;
9860                         cur_err = 1;
9861                 }
9862                 if (!rec->owner_ref_checked) {
9863                         fprintf(stderr, "owner ref check failed [%llu %llu]\n",
9864                                 (unsigned long long)rec->start,
9865                                 (unsigned long long)rec->nr);
9866                         fix = 1;
9867                         cur_err = 1;
9868                 }
9869
9870                 if (repair && fix) {
9871                         ret = fixup_extent_refs(root->fs_info, extent_cache, rec);
9872                         if (ret)
9873                                 goto repair_abort;
9874                 }
9875
9876
9877                 if (rec->bad_full_backref) {
9878                         fprintf(stderr, "bad full backref, on [%llu]\n",
9879                                 (unsigned long long)rec->start);
9880                         if (repair) {
9881                                 ret = fixup_extent_flags(root->fs_info, rec);
9882                                 if (ret)
9883                                         goto repair_abort;
9884                                 fix = 1;
9885                         }
9886                         cur_err = 1;
9887                 }
9888                 /*
9889                  * Although it's not a extent ref's problem, we reuse this
9890                  * routine for error reporting.
9891                  * No repair function yet.
9892                  */
9893                 if (rec->crossing_stripes) {
9894                         fprintf(stderr,
9895                                 "bad metadata [%llu, %llu) crossing stripe boundary\n",
9896                                 rec->start, rec->start + rec->max_size);
9897                         cur_err = 1;
9898                 }
9899
9900                 if (rec->wrong_chunk_type) {
9901                         fprintf(stderr,
9902                                 "bad extent [%llu, %llu), type mismatch with chunk\n",
9903                                 rec->start, rec->start + rec->max_size);
9904                         cur_err = 1;
9905                 }
9906
9907                 remove_cache_extent(extent_cache, cache);
9908                 free_all_extent_backrefs(rec);
9909                 if (!init_extent_tree && repair && (!cur_err || fix))
9910                         clear_extent_dirty(root->fs_info->excluded_extents,
9911                                            rec->start,
9912                                            rec->start + rec->max_size - 1);
9913                 free(rec);
9914         }
9915 repair_abort:
9916         if (repair) {
9917                 if (ret && ret != -EAGAIN) {
9918                         fprintf(stderr, "failed to repair damaged filesystem, aborting\n");
9919                         exit(1);
9920                 } else if (!ret) {
9921                         struct btrfs_trans_handle *trans;
9922
9923                         root = root->fs_info->extent_root;
9924                         trans = btrfs_start_transaction(root, 1);
9925                         if (IS_ERR(trans)) {
9926                                 ret = PTR_ERR(trans);
9927                                 goto repair_abort;
9928                         }
9929
9930                         ret = btrfs_fix_block_accounting(trans, root);
9931                         if (ret)
9932                                 goto repair_abort;
9933                         ret = btrfs_commit_transaction(trans, root);
9934                         if (ret)
9935                                 goto repair_abort;
9936                 }
9937                 return ret;
9938         }
9939         return 0;
9940 }
9941
9942 u64 calc_stripe_length(u64 type, u64 length, int num_stripes)
9943 {
9944         u64 stripe_size;
9945
9946         if (type & BTRFS_BLOCK_GROUP_RAID0) {
9947                 stripe_size = length;
9948                 stripe_size /= num_stripes;
9949         } else if (type & BTRFS_BLOCK_GROUP_RAID10) {
9950                 stripe_size = length * 2;
9951                 stripe_size /= num_stripes;
9952         } else if (type & BTRFS_BLOCK_GROUP_RAID5) {
9953                 stripe_size = length;
9954                 stripe_size /= (num_stripes - 1);
9955         } else if (type & BTRFS_BLOCK_GROUP_RAID6) {
9956                 stripe_size = length;
9957                 stripe_size /= (num_stripes - 2);
9958         } else {
9959                 stripe_size = length;
9960         }
9961         return stripe_size;
9962 }
9963
9964 /*
9965  * Check the chunk with its block group/dev list ref:
9966  * Return 0 if all refs seems valid.
9967  * Return 1 if part of refs seems valid, need later check for rebuild ref
9968  * like missing block group and needs to search extent tree to rebuild them.
9969  * Return -1 if essential refs are missing and unable to rebuild.
9970  */
9971 static int check_chunk_refs(struct chunk_record *chunk_rec,
9972                             struct block_group_tree *block_group_cache,
9973                             struct device_extent_tree *dev_extent_cache,
9974                             int silent)
9975 {
9976         struct cache_extent *block_group_item;
9977         struct block_group_record *block_group_rec;
9978         struct cache_extent *dev_extent_item;
9979         struct device_extent_record *dev_extent_rec;
9980         u64 devid;
9981         u64 offset;
9982         u64 length;
9983         int metadump_v2 = 0;
9984         int i;
9985         int ret = 0;
9986
9987         block_group_item = lookup_cache_extent(&block_group_cache->tree,
9988                                                chunk_rec->offset,
9989                                                chunk_rec->length);
9990         if (block_group_item) {
9991                 block_group_rec = container_of(block_group_item,
9992                                                struct block_group_record,
9993                                                cache);
9994                 if (chunk_rec->length != block_group_rec->offset ||
9995                     chunk_rec->offset != block_group_rec->objectid ||
9996                     (!metadump_v2 &&
9997                      chunk_rec->type_flags != block_group_rec->flags)) {
9998                         if (!silent)
9999                                 fprintf(stderr,
10000                                         "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) mismatch with block group[%llu, %u, %llu]: offset(%llu), objectid(%llu), flags(%llu)\n",
10001                                         chunk_rec->objectid,
10002                                         chunk_rec->type,
10003                                         chunk_rec->offset,
10004                                         chunk_rec->length,
10005                                         chunk_rec->offset,
10006                                         chunk_rec->type_flags,
10007                                         block_group_rec->objectid,
10008                                         block_group_rec->type,
10009                                         block_group_rec->offset,
10010                                         block_group_rec->offset,
10011                                         block_group_rec->objectid,
10012                                         block_group_rec->flags);
10013                         ret = -1;
10014                 } else {
10015                         list_del_init(&block_group_rec->list);
10016                         chunk_rec->bg_rec = block_group_rec;
10017                 }
10018         } else {
10019                 if (!silent)
10020                         fprintf(stderr,
10021                                 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) is not found in block group\n",
10022                                 chunk_rec->objectid,
10023                                 chunk_rec->type,
10024                                 chunk_rec->offset,
10025                                 chunk_rec->length,
10026                                 chunk_rec->offset,
10027                                 chunk_rec->type_flags);
10028                 ret = 1;
10029         }
10030
10031         if (metadump_v2)
10032                 return ret;
10033
10034         length = calc_stripe_length(chunk_rec->type_flags, chunk_rec->length,
10035                                     chunk_rec->num_stripes);
10036         for (i = 0; i < chunk_rec->num_stripes; ++i) {
10037                 devid = chunk_rec->stripes[i].devid;
10038                 offset = chunk_rec->stripes[i].offset;
10039                 dev_extent_item = lookup_cache_extent2(&dev_extent_cache->tree,
10040                                                        devid, offset, length);
10041                 if (dev_extent_item) {
10042                         dev_extent_rec = container_of(dev_extent_item,
10043                                                 struct device_extent_record,
10044                                                 cache);
10045                         if (dev_extent_rec->objectid != devid ||
10046                             dev_extent_rec->offset != offset ||
10047                             dev_extent_rec->chunk_offset != chunk_rec->offset ||
10048                             dev_extent_rec->length != length) {
10049                                 if (!silent)
10050                                         fprintf(stderr,
10051                                                 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] dismatch dev extent[%llu, %llu, %llu]\n",
10052                                                 chunk_rec->objectid,
10053                                                 chunk_rec->type,
10054                                                 chunk_rec->offset,
10055                                                 chunk_rec->stripes[i].devid,
10056                                                 chunk_rec->stripes[i].offset,
10057                                                 dev_extent_rec->objectid,
10058                                                 dev_extent_rec->offset,
10059                                                 dev_extent_rec->length);
10060                                 ret = -1;
10061                         } else {
10062                                 list_move(&dev_extent_rec->chunk_list,
10063                                           &chunk_rec->dextents);
10064                         }
10065                 } else {
10066                         if (!silent)
10067                                 fprintf(stderr,
10068                                         "Chunk[%llu, %u, %llu] stripe[%llu, %llu] is not found in dev extent\n",
10069                                         chunk_rec->objectid,
10070                                         chunk_rec->type,
10071                                         chunk_rec->offset,
10072                                         chunk_rec->stripes[i].devid,
10073                                         chunk_rec->stripes[i].offset);
10074                         ret = -1;
10075                 }
10076         }
10077         return ret;
10078 }
10079
10080 /* check btrfs_chunk -> btrfs_dev_extent / btrfs_block_group_item */
10081 int check_chunks(struct cache_tree *chunk_cache,
10082                  struct block_group_tree *block_group_cache,
10083                  struct device_extent_tree *dev_extent_cache,
10084                  struct list_head *good, struct list_head *bad,
10085                  struct list_head *rebuild, int silent)
10086 {
10087         struct cache_extent *chunk_item;
10088         struct chunk_record *chunk_rec;
10089         struct block_group_record *bg_rec;
10090         struct device_extent_record *dext_rec;
10091         int err;
10092         int ret = 0;
10093
10094         chunk_item = first_cache_extent(chunk_cache);
10095         while (chunk_item) {
10096                 chunk_rec = container_of(chunk_item, struct chunk_record,
10097                                          cache);
10098                 err = check_chunk_refs(chunk_rec, block_group_cache,
10099                                        dev_extent_cache, silent);
10100                 if (err < 0)
10101                         ret = err;
10102                 if (err == 0 && good)
10103                         list_add_tail(&chunk_rec->list, good);
10104                 if (err > 0 && rebuild)
10105                         list_add_tail(&chunk_rec->list, rebuild);
10106                 if (err < 0 && bad)
10107                         list_add_tail(&chunk_rec->list, bad);
10108                 chunk_item = next_cache_extent(chunk_item);
10109         }
10110
10111         list_for_each_entry(bg_rec, &block_group_cache->block_groups, list) {
10112                 if (!silent)
10113                         fprintf(stderr,
10114                                 "Block group[%llu, %llu] (flags = %llu) didn't find the relative chunk.\n",
10115                                 bg_rec->objectid,
10116                                 bg_rec->offset,
10117                                 bg_rec->flags);
10118                 if (!ret)
10119                         ret = 1;
10120         }
10121
10122         list_for_each_entry(dext_rec, &dev_extent_cache->no_chunk_orphans,
10123                             chunk_list) {
10124                 if (!silent)
10125                         fprintf(stderr,
10126                                 "Device extent[%llu, %llu, %llu] didn't find the relative chunk.\n",
10127                                 dext_rec->objectid,
10128                                 dext_rec->offset,
10129                                 dext_rec->length);
10130                 if (!ret)
10131                         ret = 1;
10132         }
10133         return ret;
10134 }
10135
10136
10137 static int check_device_used(struct device_record *dev_rec,
10138                              struct device_extent_tree *dext_cache)
10139 {
10140         struct cache_extent *cache;
10141         struct device_extent_record *dev_extent_rec;
10142         u64 total_byte = 0;
10143
10144         cache = search_cache_extent2(&dext_cache->tree, dev_rec->devid, 0);
10145         while (cache) {
10146                 dev_extent_rec = container_of(cache,
10147                                               struct device_extent_record,
10148                                               cache);
10149                 if (dev_extent_rec->objectid != dev_rec->devid)
10150                         break;
10151
10152                 list_del_init(&dev_extent_rec->device_list);
10153                 total_byte += dev_extent_rec->length;
10154                 cache = next_cache_extent(cache);
10155         }
10156
10157         if (total_byte != dev_rec->byte_used) {
10158                 fprintf(stderr,
10159                         "Dev extent's total-byte(%llu) is not equal to byte-used(%llu) in dev[%llu, %u, %llu]\n",
10160                         total_byte, dev_rec->byte_used, dev_rec->objectid,
10161                         dev_rec->type, dev_rec->offset);
10162                 return -1;
10163         } else {
10164                 return 0;
10165         }
10166 }
10167
10168 /* check btrfs_dev_item -> btrfs_dev_extent */
10169 static int check_devices(struct rb_root *dev_cache,
10170                          struct device_extent_tree *dev_extent_cache)
10171 {
10172         struct rb_node *dev_node;
10173         struct device_record *dev_rec;
10174         struct device_extent_record *dext_rec;
10175         int err;
10176         int ret = 0;
10177
10178         dev_node = rb_first(dev_cache);
10179         while (dev_node) {
10180                 dev_rec = container_of(dev_node, struct device_record, node);
10181                 err = check_device_used(dev_rec, dev_extent_cache);
10182                 if (err)
10183                         ret = err;
10184
10185                 dev_node = rb_next(dev_node);
10186         }
10187         list_for_each_entry(dext_rec, &dev_extent_cache->no_device_orphans,
10188                             device_list) {
10189                 fprintf(stderr,
10190                         "Device extent[%llu, %llu, %llu] didn't find its device.\n",
10191                         dext_rec->objectid, dext_rec->offset, dext_rec->length);
10192                 if (!ret)
10193                         ret = 1;
10194         }
10195         return ret;
10196 }
10197
10198 static int add_root_item_to_list(struct list_head *head,
10199                                   u64 objectid, u64 bytenr, u64 last_snapshot,
10200                                   u8 level, u8 drop_level,
10201                                   struct btrfs_key *drop_key)
10202 {
10203
10204         struct root_item_record *ri_rec;
10205         ri_rec = malloc(sizeof(*ri_rec));
10206         if (!ri_rec)
10207                 return -ENOMEM;
10208         ri_rec->bytenr = bytenr;
10209         ri_rec->objectid = objectid;
10210         ri_rec->level = level;
10211         ri_rec->drop_level = drop_level;
10212         ri_rec->last_snapshot = last_snapshot;
10213         if (drop_key)
10214                 memcpy(&ri_rec->drop_key, drop_key, sizeof(*drop_key));
10215         list_add_tail(&ri_rec->list, head);
10216
10217         return 0;
10218 }
10219
10220 static void free_root_item_list(struct list_head *list)
10221 {
10222         struct root_item_record *ri_rec;
10223
10224         while (!list_empty(list)) {
10225                 ri_rec = list_first_entry(list, struct root_item_record,
10226                                           list);
10227                 list_del_init(&ri_rec->list);
10228                 free(ri_rec);
10229         }
10230 }
10231
10232 static int deal_root_from_list(struct list_head *list,
10233                                struct btrfs_root *root,
10234                                struct block_info *bits,
10235                                int bits_nr,
10236                                struct cache_tree *pending,
10237                                struct cache_tree *seen,
10238                                struct cache_tree *reada,
10239                                struct cache_tree *nodes,
10240                                struct cache_tree *extent_cache,
10241                                struct cache_tree *chunk_cache,
10242                                struct rb_root *dev_cache,
10243                                struct block_group_tree *block_group_cache,
10244                                struct device_extent_tree *dev_extent_cache)
10245 {
10246         int ret = 0;
10247         u64 last;
10248
10249         while (!list_empty(list)) {
10250                 struct root_item_record *rec;
10251                 struct extent_buffer *buf;
10252                 rec = list_entry(list->next,
10253                                  struct root_item_record, list);
10254                 last = 0;
10255                 buf = read_tree_block(root->fs_info, rec->bytenr, 0);
10256                 if (!extent_buffer_uptodate(buf)) {
10257                         free_extent_buffer(buf);
10258                         ret = -EIO;
10259                         break;
10260                 }
10261                 ret = add_root_to_pending(buf, extent_cache, pending,
10262                                     seen, nodes, rec->objectid);
10263                 if (ret < 0)
10264                         break;
10265                 /*
10266                  * To rebuild extent tree, we need deal with snapshot
10267                  * one by one, otherwise we deal with node firstly which
10268                  * can maximize readahead.
10269                  */
10270                 while (1) {
10271                         ret = run_next_block(root, bits, bits_nr, &last,
10272                                              pending, seen, reada, nodes,
10273                                              extent_cache, chunk_cache,
10274                                              dev_cache, block_group_cache,
10275                                              dev_extent_cache, rec);
10276                         if (ret != 0)
10277                                 break;
10278                 }
10279                 free_extent_buffer(buf);
10280                 list_del(&rec->list);
10281                 free(rec);
10282                 if (ret < 0)
10283                         break;
10284         }
10285         while (ret >= 0) {
10286                 ret = run_next_block(root, bits, bits_nr, &last, pending, seen,
10287                                      reada, nodes, extent_cache, chunk_cache,
10288                                      dev_cache, block_group_cache,
10289                                      dev_extent_cache, NULL);
10290                 if (ret != 0) {
10291                         if (ret > 0)
10292                                 ret = 0;
10293                         break;
10294                 }
10295         }
10296         return ret;
10297 }
10298
10299 static int check_chunks_and_extents(struct btrfs_fs_info *fs_info)
10300 {
10301         struct rb_root dev_cache;
10302         struct cache_tree chunk_cache;
10303         struct block_group_tree block_group_cache;
10304         struct device_extent_tree dev_extent_cache;
10305         struct cache_tree extent_cache;
10306         struct cache_tree seen;
10307         struct cache_tree pending;
10308         struct cache_tree reada;
10309         struct cache_tree nodes;
10310         struct extent_io_tree excluded_extents;
10311         struct cache_tree corrupt_blocks;
10312         struct btrfs_path path;
10313         struct btrfs_key key;
10314         struct btrfs_key found_key;
10315         int ret, err = 0;
10316         struct block_info *bits;
10317         int bits_nr;
10318         struct extent_buffer *leaf;
10319         int slot;
10320         struct btrfs_root_item ri;
10321         struct list_head dropping_trees;
10322         struct list_head normal_trees;
10323         struct btrfs_root *root1;
10324         struct btrfs_root *root;
10325         u64 objectid;
10326         u8 level;
10327
10328         root = fs_info->fs_root;
10329         dev_cache = RB_ROOT;
10330         cache_tree_init(&chunk_cache);
10331         block_group_tree_init(&block_group_cache);
10332         device_extent_tree_init(&dev_extent_cache);
10333
10334         cache_tree_init(&extent_cache);
10335         cache_tree_init(&seen);
10336         cache_tree_init(&pending);
10337         cache_tree_init(&nodes);
10338         cache_tree_init(&reada);
10339         cache_tree_init(&corrupt_blocks);
10340         extent_io_tree_init(&excluded_extents);
10341         INIT_LIST_HEAD(&dropping_trees);
10342         INIT_LIST_HEAD(&normal_trees);
10343
10344         if (repair) {
10345                 fs_info->excluded_extents = &excluded_extents;
10346                 fs_info->fsck_extent_cache = &extent_cache;
10347                 fs_info->free_extent_hook = free_extent_hook;
10348                 fs_info->corrupt_blocks = &corrupt_blocks;
10349         }
10350
10351         bits_nr = 1024;
10352         bits = malloc(bits_nr * sizeof(struct block_info));
10353         if (!bits) {
10354                 perror("malloc");
10355                 exit(1);
10356         }
10357
10358         if (ctx.progress_enabled) {
10359                 ctx.tp = TASK_EXTENTS;
10360                 task_start(ctx.info);
10361         }
10362
10363 again:
10364         root1 = fs_info->tree_root;
10365         level = btrfs_header_level(root1->node);
10366         ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
10367                                     root1->node->start, 0, level, 0, NULL);
10368         if (ret < 0)
10369                 goto out;
10370         root1 = fs_info->chunk_root;
10371         level = btrfs_header_level(root1->node);
10372         ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
10373                                     root1->node->start, 0, level, 0, NULL);
10374         if (ret < 0)
10375                 goto out;
10376         btrfs_init_path(&path);
10377         key.offset = 0;
10378         key.objectid = 0;
10379         key.type = BTRFS_ROOT_ITEM_KEY;
10380         ret = btrfs_search_slot(NULL, fs_info->tree_root, &key, &path, 0, 0);
10381         if (ret < 0)
10382                 goto out;
10383         while(1) {
10384                 leaf = path.nodes[0];
10385                 slot = path.slots[0];
10386                 if (slot >= btrfs_header_nritems(path.nodes[0])) {
10387                         ret = btrfs_next_leaf(root, &path);
10388                         if (ret != 0)
10389                                 break;
10390                         leaf = path.nodes[0];
10391                         slot = path.slots[0];
10392                 }
10393                 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
10394                 if (found_key.type == BTRFS_ROOT_ITEM_KEY) {
10395                         unsigned long offset;
10396                         u64 last_snapshot;
10397
10398                         offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
10399                         read_extent_buffer(leaf, &ri, offset, sizeof(ri));
10400                         last_snapshot = btrfs_root_last_snapshot(&ri);
10401                         if (btrfs_disk_key_objectid(&ri.drop_progress) == 0) {
10402                                 level = btrfs_root_level(&ri);
10403                                 ret = add_root_item_to_list(&normal_trees,
10404                                                 found_key.objectid,
10405                                                 btrfs_root_bytenr(&ri),
10406                                                 last_snapshot, level,
10407                                                 0, NULL);
10408                                 if (ret < 0)
10409                                         goto out;
10410                         } else {
10411                                 level = btrfs_root_level(&ri);
10412                                 objectid = found_key.objectid;
10413                                 btrfs_disk_key_to_cpu(&found_key,
10414                                                       &ri.drop_progress);
10415                                 ret = add_root_item_to_list(&dropping_trees,
10416                                                 objectid,
10417                                                 btrfs_root_bytenr(&ri),
10418                                                 last_snapshot, level,
10419                                                 ri.drop_level, &found_key);
10420                                 if (ret < 0)
10421                                         goto out;
10422                         }
10423                 }
10424                 path.slots[0]++;
10425         }
10426         btrfs_release_path(&path);
10427
10428         /*
10429          * check_block can return -EAGAIN if it fixes something, please keep
10430          * this in mind when dealing with return values from these functions, if
10431          * we get -EAGAIN we want to fall through and restart the loop.
10432          */
10433         ret = deal_root_from_list(&normal_trees, root, bits, bits_nr, &pending,
10434                                   &seen, &reada, &nodes, &extent_cache,
10435                                   &chunk_cache, &dev_cache, &block_group_cache,
10436                                   &dev_extent_cache);
10437         if (ret < 0) {
10438                 if (ret == -EAGAIN)
10439                         goto loop;
10440                 goto out;
10441         }
10442         ret = deal_root_from_list(&dropping_trees, root, bits, bits_nr,
10443                                   &pending, &seen, &reada, &nodes,
10444                                   &extent_cache, &chunk_cache, &dev_cache,
10445                                   &block_group_cache, &dev_extent_cache);
10446         if (ret < 0) {
10447                 if (ret == -EAGAIN)
10448                         goto loop;
10449                 goto out;
10450         }
10451
10452         ret = check_chunks(&chunk_cache, &block_group_cache,
10453                            &dev_extent_cache, NULL, NULL, NULL, 0);
10454         if (ret) {
10455                 if (ret == -EAGAIN)
10456                         goto loop;
10457                 err = ret;
10458         }
10459
10460         ret = check_extent_refs(root, &extent_cache);
10461         if (ret < 0) {
10462                 if (ret == -EAGAIN)
10463                         goto loop;
10464                 goto out;
10465         }
10466
10467         ret = check_devices(&dev_cache, &dev_extent_cache);
10468         if (ret && err)
10469                 ret = err;
10470
10471 out:
10472         task_stop(ctx.info);
10473         if (repair) {
10474                 free_corrupt_blocks_tree(fs_info->corrupt_blocks);
10475                 extent_io_tree_cleanup(&excluded_extents);
10476                 fs_info->fsck_extent_cache = NULL;
10477                 fs_info->free_extent_hook = NULL;
10478                 fs_info->corrupt_blocks = NULL;
10479                 fs_info->excluded_extents = NULL;
10480         }
10481         free(bits);
10482         free_chunk_cache_tree(&chunk_cache);
10483         free_device_cache_tree(&dev_cache);
10484         free_block_group_tree(&block_group_cache);
10485         free_device_extent_tree(&dev_extent_cache);
10486         free_extent_cache_tree(&seen);
10487         free_extent_cache_tree(&pending);
10488         free_extent_cache_tree(&reada);
10489         free_extent_cache_tree(&nodes);
10490         free_root_item_list(&normal_trees);
10491         free_root_item_list(&dropping_trees);
10492         return ret;
10493 loop:
10494         free_corrupt_blocks_tree(fs_info->corrupt_blocks);
10495         free_extent_cache_tree(&seen);
10496         free_extent_cache_tree(&pending);
10497         free_extent_cache_tree(&reada);
10498         free_extent_cache_tree(&nodes);
10499         free_chunk_cache_tree(&chunk_cache);
10500         free_block_group_tree(&block_group_cache);
10501         free_device_cache_tree(&dev_cache);
10502         free_device_extent_tree(&dev_extent_cache);
10503         free_extent_record_cache(&extent_cache);
10504         free_root_item_list(&normal_trees);
10505         free_root_item_list(&dropping_trees);
10506         extent_io_tree_cleanup(&excluded_extents);
10507         goto again;
10508 }
10509
10510 /*
10511  * Check backrefs of a tree block given by @bytenr or @eb.
10512  *
10513  * @root:       the root containing the @bytenr or @eb
10514  * @eb:         tree block extent buffer, can be NULL
10515  * @bytenr:     bytenr of the tree block to search
10516  * @level:      tree level of the tree block
10517  * @owner:      owner of the tree block
10518  *
10519  * Return >0 for any error found and output error message
10520  * Return 0 for no error found
10521  */
10522 static int check_tree_block_ref(struct btrfs_root *root,
10523                                 struct extent_buffer *eb, u64 bytenr,
10524                                 int level, u64 owner)
10525 {
10526         struct btrfs_key key;
10527         struct btrfs_root *extent_root = root->fs_info->extent_root;
10528         struct btrfs_path path;
10529         struct btrfs_extent_item *ei;
10530         struct btrfs_extent_inline_ref *iref;
10531         struct extent_buffer *leaf;
10532         unsigned long end;
10533         unsigned long ptr;
10534         int slot;
10535         int skinny_level;
10536         int type;
10537         u32 nodesize = root->fs_info->nodesize;
10538         u32 item_size;
10539         u64 offset;
10540         int tree_reloc_root = 0;
10541         int found_ref = 0;
10542         int err = 0;
10543         int ret;
10544
10545         if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID &&
10546             btrfs_header_bytenr(root->node) == bytenr)
10547                 tree_reloc_root = 1;
10548
10549         btrfs_init_path(&path);
10550         key.objectid = bytenr;
10551         if (btrfs_fs_incompat(root->fs_info, SKINNY_METADATA))
10552                 key.type = BTRFS_METADATA_ITEM_KEY;
10553         else
10554                 key.type = BTRFS_EXTENT_ITEM_KEY;
10555         key.offset = (u64)-1;
10556
10557         /* Search for the backref in extent tree */
10558         ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
10559         if (ret < 0) {
10560                 err |= BACKREF_MISSING;
10561                 goto out;
10562         }
10563         ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
10564         if (ret) {
10565                 err |= BACKREF_MISSING;
10566                 goto out;
10567         }
10568
10569         leaf = path.nodes[0];
10570         slot = path.slots[0];
10571         btrfs_item_key_to_cpu(leaf, &key, slot);
10572
10573         ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
10574
10575         if (key.type == BTRFS_METADATA_ITEM_KEY) {
10576                 skinny_level = (int)key.offset;
10577                 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
10578         } else {
10579                 struct btrfs_tree_block_info *info;
10580
10581                 info = (struct btrfs_tree_block_info *)(ei + 1);
10582                 skinny_level = btrfs_tree_block_level(leaf, info);
10583                 iref = (struct btrfs_extent_inline_ref *)(info + 1);
10584         }
10585
10586         if (eb) {
10587                 u64 header_gen;
10588                 u64 extent_gen;
10589
10590                 if (!(btrfs_extent_flags(leaf, ei) &
10591                       BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
10592                         error(
10593                 "extent[%llu %u] backref type mismatch, missing bit: %llx",
10594                                 key.objectid, nodesize,
10595                                 BTRFS_EXTENT_FLAG_TREE_BLOCK);
10596                         err = BACKREF_MISMATCH;
10597                 }
10598                 header_gen = btrfs_header_generation(eb);
10599                 extent_gen = btrfs_extent_generation(leaf, ei);
10600                 if (header_gen != extent_gen) {
10601                         error(
10602         "extent[%llu %u] backref generation mismatch, wanted: %llu, have: %llu",
10603                                 key.objectid, nodesize, header_gen,
10604                                 extent_gen);
10605                         err = BACKREF_MISMATCH;
10606                 }
10607                 if (level != skinny_level) {
10608                         error(
10609                         "extent[%llu %u] level mismatch, wanted: %u, have: %u",
10610                                 key.objectid, nodesize, level, skinny_level);
10611                         err = BACKREF_MISMATCH;
10612                 }
10613                 if (!is_fstree(owner) && btrfs_extent_refs(leaf, ei) != 1) {
10614                         error(
10615                         "extent[%llu %u] is referred by other roots than %llu",
10616                                 key.objectid, nodesize, root->objectid);
10617                         err = BACKREF_MISMATCH;
10618                 }
10619         }
10620
10621         /*
10622          * Iterate the extent/metadata item to find the exact backref
10623          */
10624         item_size = btrfs_item_size_nr(leaf, slot);
10625         ptr = (unsigned long)iref;
10626         end = (unsigned long)ei + item_size;
10627         while (ptr < end) {
10628                 iref = (struct btrfs_extent_inline_ref *)ptr;
10629                 type = btrfs_extent_inline_ref_type(leaf, iref);
10630                 offset = btrfs_extent_inline_ref_offset(leaf, iref);
10631
10632                 if (type == BTRFS_TREE_BLOCK_REF_KEY &&
10633                         (offset == root->objectid || offset == owner)) {
10634                         found_ref = 1;
10635                 } else if (type == BTRFS_SHARED_BLOCK_REF_KEY) {
10636                         /*
10637                          * Backref of tree reloc root points to itself, no need
10638                          * to check backref any more.
10639                          */
10640                         if (tree_reloc_root)
10641                                 found_ref = 1;
10642                         else
10643                         /* Check if the backref points to valid referencer */
10644                                 found_ref = !check_tree_block_ref(root, NULL,
10645                                                 offset, level + 1, owner);
10646                 }
10647
10648                 if (found_ref)
10649                         break;
10650                 ptr += btrfs_extent_inline_ref_size(type);
10651         }
10652
10653         /*
10654          * Inlined extent item doesn't have what we need, check
10655          * TREE_BLOCK_REF_KEY
10656          */
10657         if (!found_ref) {
10658                 btrfs_release_path(&path);
10659                 key.objectid = bytenr;
10660                 key.type = BTRFS_TREE_BLOCK_REF_KEY;
10661                 key.offset = root->objectid;
10662
10663                 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
10664                 if (!ret)
10665                         found_ref = 1;
10666         }
10667         if (!found_ref)
10668                 err |= BACKREF_MISSING;
10669 out:
10670         btrfs_release_path(&path);
10671         if (eb && (err & BACKREF_MISSING))
10672                 error("extent[%llu %u] backref lost (owner: %llu, level: %u)",
10673                         bytenr, nodesize, owner, level);
10674         return err;
10675 }
10676
10677 /*
10678  * Check EXTENT_DATA item, mainly for its dbackref in extent tree
10679  *
10680  * Return >0 any error found and output error message
10681  * Return 0 for no error found
10682  */
10683 static int check_extent_data_item(struct btrfs_root *root,
10684                                   struct extent_buffer *eb, int slot)
10685 {
10686         struct btrfs_file_extent_item *fi;
10687         struct btrfs_path path;
10688         struct btrfs_root *extent_root = root->fs_info->extent_root;
10689         struct btrfs_key fi_key;
10690         struct btrfs_key dbref_key;
10691         struct extent_buffer *leaf;
10692         struct btrfs_extent_item *ei;
10693         struct btrfs_extent_inline_ref *iref;
10694         struct btrfs_extent_data_ref *dref;
10695         u64 owner;
10696         u64 disk_bytenr;
10697         u64 disk_num_bytes;
10698         u64 extent_num_bytes;
10699         u64 extent_flags;
10700         u32 item_size;
10701         unsigned long end;
10702         unsigned long ptr;
10703         int type;
10704         u64 ref_root;
10705         int found_dbackref = 0;
10706         int err = 0;
10707         int ret;
10708
10709         btrfs_item_key_to_cpu(eb, &fi_key, slot);
10710         fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
10711
10712         /* Nothing to check for hole and inline data extents */
10713         if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE ||
10714             btrfs_file_extent_disk_bytenr(eb, fi) == 0)
10715                 return 0;
10716
10717         disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
10718         disk_num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
10719         extent_num_bytes = btrfs_file_extent_num_bytes(eb, fi);
10720
10721         /* Check unaligned disk_num_bytes and num_bytes */
10722         if (!IS_ALIGNED(disk_num_bytes, root->fs_info->sectorsize)) {
10723                 error(
10724 "file extent [%llu, %llu] has unaligned disk num bytes: %llu, should be aligned to %u",
10725                         fi_key.objectid, fi_key.offset, disk_num_bytes,
10726                         root->fs_info->sectorsize);
10727                 err |= BYTES_UNALIGNED;
10728         } else {
10729                 data_bytes_allocated += disk_num_bytes;
10730         }
10731         if (!IS_ALIGNED(extent_num_bytes, root->fs_info->sectorsize)) {
10732                 error(
10733 "file extent [%llu, %llu] has unaligned num bytes: %llu, should be aligned to %u",
10734                         fi_key.objectid, fi_key.offset, extent_num_bytes,
10735                         root->fs_info->sectorsize);
10736                 err |= BYTES_UNALIGNED;
10737         } else {
10738                 data_bytes_referenced += extent_num_bytes;
10739         }
10740         owner = btrfs_header_owner(eb);
10741
10742         /* Check the extent item of the file extent in extent tree */
10743         btrfs_init_path(&path);
10744         dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
10745         dbref_key.type = BTRFS_EXTENT_ITEM_KEY;
10746         dbref_key.offset = btrfs_file_extent_disk_num_bytes(eb, fi);
10747
10748         ret = btrfs_search_slot(NULL, extent_root, &dbref_key, &path, 0, 0);
10749         if (ret)
10750                 goto out;
10751
10752         leaf = path.nodes[0];
10753         slot = path.slots[0];
10754         ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
10755
10756         extent_flags = btrfs_extent_flags(leaf, ei);
10757
10758         if (!(extent_flags & BTRFS_EXTENT_FLAG_DATA)) {
10759                 error(
10760                     "extent[%llu %llu] backref type mismatch, wanted bit: %llx",
10761                     disk_bytenr, disk_num_bytes,
10762                     BTRFS_EXTENT_FLAG_DATA);
10763                 err |= BACKREF_MISMATCH;
10764         }
10765
10766         /* Check data backref inside that extent item */
10767         item_size = btrfs_item_size_nr(leaf, path.slots[0]);
10768         iref = (struct btrfs_extent_inline_ref *)(ei + 1);
10769         ptr = (unsigned long)iref;
10770         end = (unsigned long)ei + item_size;
10771         while (ptr < end) {
10772                 iref = (struct btrfs_extent_inline_ref *)ptr;
10773                 type = btrfs_extent_inline_ref_type(leaf, iref);
10774                 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
10775
10776                 if (type == BTRFS_EXTENT_DATA_REF_KEY) {
10777                         ref_root = btrfs_extent_data_ref_root(leaf, dref);
10778                         if (ref_root == owner || ref_root == root->objectid)
10779                                 found_dbackref = 1;
10780                 } else if (type == BTRFS_SHARED_DATA_REF_KEY) {
10781                         found_dbackref = !check_tree_block_ref(root, NULL,
10782                                 btrfs_extent_inline_ref_offset(leaf, iref),
10783                                 0, owner);
10784                 }
10785
10786                 if (found_dbackref)
10787                         break;
10788                 ptr += btrfs_extent_inline_ref_size(type);
10789         }
10790
10791         if (!found_dbackref) {
10792                 btrfs_release_path(&path);
10793
10794                 /* Didn't find inlined data backref, try EXTENT_DATA_REF_KEY */
10795                 dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
10796                 dbref_key.type = BTRFS_EXTENT_DATA_REF_KEY;
10797                 dbref_key.offset = hash_extent_data_ref(root->objectid,
10798                                 fi_key.objectid, fi_key.offset);
10799
10800                 ret = btrfs_search_slot(NULL, root->fs_info->extent_root,
10801                                         &dbref_key, &path, 0, 0);
10802                 if (!ret) {
10803                         found_dbackref = 1;
10804                         goto out;
10805                 }
10806
10807                 btrfs_release_path(&path);
10808
10809                 /*
10810                  * Neither inlined nor EXTENT_DATA_REF found, try
10811                  * SHARED_DATA_REF as last chance.
10812                  */
10813                 dbref_key.objectid = disk_bytenr;
10814                 dbref_key.type = BTRFS_SHARED_DATA_REF_KEY;
10815                 dbref_key.offset = eb->start;
10816
10817                 ret = btrfs_search_slot(NULL, root->fs_info->extent_root,
10818                                         &dbref_key, &path, 0, 0);
10819                 if (!ret) {
10820                         found_dbackref = 1;
10821                         goto out;
10822                 }
10823         }
10824
10825 out:
10826         if (!found_dbackref)
10827                 err |= BACKREF_MISSING;
10828         btrfs_release_path(&path);
10829         if (err & BACKREF_MISSING) {
10830                 error("data extent[%llu %llu] backref lost",
10831                       disk_bytenr, disk_num_bytes);
10832         }
10833         return err;
10834 }
10835
10836 /*
10837  * Get real tree block level for the case like shared block
10838  * Return >= 0 as tree level
10839  * Return <0 for error
10840  */
10841 static int query_tree_block_level(struct btrfs_fs_info *fs_info, u64 bytenr)
10842 {
10843         struct extent_buffer *eb;
10844         struct btrfs_path path;
10845         struct btrfs_key key;
10846         struct btrfs_extent_item *ei;
10847         u64 flags;
10848         u64 transid;
10849         u8 backref_level;
10850         u8 header_level;
10851         int ret;
10852
10853         /* Search extent tree for extent generation and level */
10854         key.objectid = bytenr;
10855         key.type = BTRFS_METADATA_ITEM_KEY;
10856         key.offset = (u64)-1;
10857
10858         btrfs_init_path(&path);
10859         ret = btrfs_search_slot(NULL, fs_info->extent_root, &key, &path, 0, 0);
10860         if (ret < 0)
10861                 goto release_out;
10862         ret = btrfs_previous_extent_item(fs_info->extent_root, &path, bytenr);
10863         if (ret < 0)
10864                 goto release_out;
10865         if (ret > 0) {
10866                 ret = -ENOENT;
10867                 goto release_out;
10868         }
10869
10870         btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
10871         ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
10872                             struct btrfs_extent_item);
10873         flags = btrfs_extent_flags(path.nodes[0], ei);
10874         if (!(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
10875                 ret = -ENOENT;
10876                 goto release_out;
10877         }
10878
10879         /* Get transid for later read_tree_block() check */
10880         transid = btrfs_extent_generation(path.nodes[0], ei);
10881
10882         /* Get backref level as one source */
10883         if (key.type == BTRFS_METADATA_ITEM_KEY) {
10884                 backref_level = key.offset;
10885         } else {
10886                 struct btrfs_tree_block_info *info;
10887
10888                 info = (struct btrfs_tree_block_info *)(ei + 1);
10889                 backref_level = btrfs_tree_block_level(path.nodes[0], info);
10890         }
10891         btrfs_release_path(&path);
10892
10893         /* Get level from tree block as an alternative source */
10894         eb = read_tree_block(fs_info, bytenr, transid);
10895         if (!extent_buffer_uptodate(eb)) {
10896                 free_extent_buffer(eb);
10897                 return -EIO;
10898         }
10899         header_level = btrfs_header_level(eb);
10900         free_extent_buffer(eb);
10901
10902         if (header_level != backref_level)
10903                 return -EIO;
10904         return header_level;
10905
10906 release_out:
10907         btrfs_release_path(&path);
10908         return ret;
10909 }
10910
10911 /*
10912  * Check if a tree block backref is valid (points to a valid tree block)
10913  * if level == -1, level will be resolved
10914  * Return >0 for any error found and print error message
10915  */
10916 static int check_tree_block_backref(struct btrfs_fs_info *fs_info, u64 root_id,
10917                                     u64 bytenr, int level)
10918 {
10919         struct btrfs_root *root;
10920         struct btrfs_key key;
10921         struct btrfs_path path;
10922         struct extent_buffer *eb;
10923         struct extent_buffer *node;
10924         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10925         int err = 0;
10926         int ret;
10927
10928         /* Query level for level == -1 special case */
10929         if (level == -1)
10930                 level = query_tree_block_level(fs_info, bytenr);
10931         if (level < 0) {
10932                 err |= REFERENCER_MISSING;
10933                 goto out;
10934         }
10935
10936         key.objectid = root_id;
10937         key.type = BTRFS_ROOT_ITEM_KEY;
10938         key.offset = (u64)-1;
10939
10940         root = btrfs_read_fs_root(fs_info, &key);
10941         if (IS_ERR(root)) {
10942                 err |= REFERENCER_MISSING;
10943                 goto out;
10944         }
10945
10946         /* Read out the tree block to get item/node key */
10947         eb = read_tree_block(fs_info, bytenr, 0);
10948         if (!extent_buffer_uptodate(eb)) {
10949                 err |= REFERENCER_MISSING;
10950                 free_extent_buffer(eb);
10951                 goto out;
10952         }
10953
10954         /* Empty tree, no need to check key */
10955         if (!btrfs_header_nritems(eb) && !level) {
10956                 free_extent_buffer(eb);
10957                 goto out;
10958         }
10959
10960         if (level)
10961                 btrfs_node_key_to_cpu(eb, &key, 0);
10962         else
10963                 btrfs_item_key_to_cpu(eb, &key, 0);
10964
10965         free_extent_buffer(eb);
10966
10967         btrfs_init_path(&path);
10968         path.lowest_level = level;
10969         /* Search with the first key, to ensure we can reach it */
10970         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
10971         if (ret < 0) {
10972                 err |= REFERENCER_MISSING;
10973                 goto release_out;
10974         }
10975
10976         node = path.nodes[level];
10977         if (btrfs_header_bytenr(node) != bytenr) {
10978                 error(
10979         "extent [%llu %d] referencer bytenr mismatch, wanted: %llu, have: %llu",
10980                         bytenr, nodesize, bytenr,
10981                         btrfs_header_bytenr(node));
10982                 err |= REFERENCER_MISMATCH;
10983         }
10984         if (btrfs_header_level(node) != level) {
10985                 error(
10986         "extent [%llu %d] referencer level mismatch, wanted: %d, have: %d",
10987                         bytenr, nodesize, level,
10988                         btrfs_header_level(node));
10989                 err |= REFERENCER_MISMATCH;
10990         }
10991
10992 release_out:
10993         btrfs_release_path(&path);
10994 out:
10995         if (err & REFERENCER_MISSING) {
10996                 if (level < 0)
10997                         error("extent [%llu %d] lost referencer (owner: %llu)",
10998                                 bytenr, nodesize, root_id);
10999                 else
11000                         error(
11001                 "extent [%llu %d] lost referencer (owner: %llu, level: %u)",
11002                                 bytenr, nodesize, root_id, level);
11003         }
11004
11005         return err;
11006 }
11007
11008 /*
11009  * Check if tree block @eb is tree reloc root.
11010  * Return 0 if it's not or any problem happens
11011  * Return 1 if it's a tree reloc root
11012  */
11013 static int is_tree_reloc_root(struct btrfs_fs_info *fs_info,
11014                                  struct extent_buffer *eb)
11015 {
11016         struct btrfs_root *tree_reloc_root;
11017         struct btrfs_key key;
11018         u64 bytenr = btrfs_header_bytenr(eb);
11019         u64 owner = btrfs_header_owner(eb);
11020         int ret = 0;
11021
11022         key.objectid = BTRFS_TREE_RELOC_OBJECTID;
11023         key.offset = owner;
11024         key.type = BTRFS_ROOT_ITEM_KEY;
11025
11026         tree_reloc_root = btrfs_read_fs_root_no_cache(fs_info, &key);
11027         if (IS_ERR(tree_reloc_root))
11028                 return 0;
11029
11030         if (bytenr == btrfs_header_bytenr(tree_reloc_root->node))
11031                 ret = 1;
11032         btrfs_free_fs_root(tree_reloc_root);
11033         return ret;
11034 }
11035
11036 /*
11037  * Check referencer for shared block backref
11038  * If level == -1, this function will resolve the level.
11039  */
11040 static int check_shared_block_backref(struct btrfs_fs_info *fs_info,
11041                                      u64 parent, u64 bytenr, int level)
11042 {
11043         struct extent_buffer *eb;
11044         u32 nr;
11045         int found_parent = 0;
11046         int i;
11047
11048         eb = read_tree_block(fs_info, parent, 0);
11049         if (!extent_buffer_uptodate(eb))
11050                 goto out;
11051
11052         if (level == -1)
11053                 level = query_tree_block_level(fs_info, bytenr);
11054         if (level < 0)
11055                 goto out;
11056
11057         /* It's possible it's a tree reloc root */
11058         if (parent == bytenr) {
11059                 if (is_tree_reloc_root(fs_info, eb))
11060                         found_parent = 1;
11061                 goto out;
11062         }
11063
11064         if (level + 1 != btrfs_header_level(eb))
11065                 goto out;
11066
11067         nr = btrfs_header_nritems(eb);
11068         for (i = 0; i < nr; i++) {
11069                 if (bytenr == btrfs_node_blockptr(eb, i)) {
11070                         found_parent = 1;
11071                         break;
11072                 }
11073         }
11074 out:
11075         free_extent_buffer(eb);
11076         if (!found_parent) {
11077                 error(
11078         "shared extent[%llu %u] lost its parent (parent: %llu, level: %u)",
11079                         bytenr, fs_info->nodesize, parent, level);
11080                 return REFERENCER_MISSING;
11081         }
11082         return 0;
11083 }
11084
11085 /*
11086  * Check referencer for normal (inlined) data ref
11087  * If len == 0, it will be resolved by searching in extent tree
11088  */
11089 static int check_extent_data_backref(struct btrfs_fs_info *fs_info,
11090                                      u64 root_id, u64 objectid, u64 offset,
11091                                      u64 bytenr, u64 len, u32 count)
11092 {
11093         struct btrfs_root *root;
11094         struct btrfs_root *extent_root = fs_info->extent_root;
11095         struct btrfs_key key;
11096         struct btrfs_path path;
11097         struct extent_buffer *leaf;
11098         struct btrfs_file_extent_item *fi;
11099         u32 found_count = 0;
11100         int slot;
11101         int ret = 0;
11102
11103         if (!len) {
11104                 key.objectid = bytenr;
11105                 key.type = BTRFS_EXTENT_ITEM_KEY;
11106                 key.offset = (u64)-1;
11107
11108                 btrfs_init_path(&path);
11109                 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
11110                 if (ret < 0)
11111                         goto out;
11112                 ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
11113                 if (ret)
11114                         goto out;
11115                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
11116                 if (key.objectid != bytenr ||
11117                     key.type != BTRFS_EXTENT_ITEM_KEY)
11118                         goto out;
11119                 len = key.offset;
11120                 btrfs_release_path(&path);
11121         }
11122         key.objectid = root_id;
11123         key.type = BTRFS_ROOT_ITEM_KEY;
11124         key.offset = (u64)-1;
11125         btrfs_init_path(&path);
11126
11127         root = btrfs_read_fs_root(fs_info, &key);
11128         if (IS_ERR(root))
11129                 goto out;
11130
11131         key.objectid = objectid;
11132         key.type = BTRFS_EXTENT_DATA_KEY;
11133         /*
11134          * It can be nasty as data backref offset is
11135          * file offset - file extent offset, which is smaller or
11136          * equal to original backref offset.  The only special case is
11137          * overflow.  So we need to special check and do further search.
11138          */
11139         key.offset = offset & (1ULL << 63) ? 0 : offset;
11140
11141         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
11142         if (ret < 0)
11143                 goto out;
11144
11145         /*
11146          * Search afterwards to get correct one
11147          * NOTE: As we must do a comprehensive check on the data backref to
11148          * make sure the dref count also matches, we must iterate all file
11149          * extents for that inode.
11150          */
11151         while (1) {
11152                 leaf = path.nodes[0];
11153                 slot = path.slots[0];
11154
11155                 if (slot >= btrfs_header_nritems(leaf))
11156                         goto next;
11157                 btrfs_item_key_to_cpu(leaf, &key, slot);
11158                 if (key.objectid != objectid || key.type != BTRFS_EXTENT_DATA_KEY)
11159                         break;
11160                 fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
11161                 /*
11162                  * Except normal disk bytenr and disk num bytes, we still
11163                  * need to do extra check on dbackref offset as
11164                  * dbackref offset = file_offset - file_extent_offset
11165                  */
11166                 if (btrfs_file_extent_disk_bytenr(leaf, fi) == bytenr &&
11167                     btrfs_file_extent_disk_num_bytes(leaf, fi) == len &&
11168                     (u64)(key.offset - btrfs_file_extent_offset(leaf, fi)) ==
11169                     offset)
11170                         found_count++;
11171
11172 next:
11173                 ret = btrfs_next_item(root, &path);
11174                 if (ret)
11175                         break;
11176         }
11177 out:
11178         btrfs_release_path(&path);
11179         if (found_count != count) {
11180                 error(
11181 "extent[%llu, %llu] referencer count mismatch (root: %llu, owner: %llu, offset: %llu) wanted: %u, have: %u",
11182                         bytenr, len, root_id, objectid, offset, count, found_count);
11183                 return REFERENCER_MISSING;
11184         }
11185         return 0;
11186 }
11187
11188 /*
11189  * Check if the referencer of a shared data backref exists
11190  */
11191 static int check_shared_data_backref(struct btrfs_fs_info *fs_info,
11192                                      u64 parent, u64 bytenr)
11193 {
11194         struct extent_buffer *eb;
11195         struct btrfs_key key;
11196         struct btrfs_file_extent_item *fi;
11197         u32 nr;
11198         int found_parent = 0;
11199         int i;
11200
11201         eb = read_tree_block(fs_info, parent, 0);
11202         if (!extent_buffer_uptodate(eb))
11203                 goto out;
11204
11205         nr = btrfs_header_nritems(eb);
11206         for (i = 0; i < nr; i++) {
11207                 btrfs_item_key_to_cpu(eb, &key, i);
11208                 if (key.type != BTRFS_EXTENT_DATA_KEY)
11209                         continue;
11210
11211                 fi = btrfs_item_ptr(eb, i, struct btrfs_file_extent_item);
11212                 if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE)
11213                         continue;
11214
11215                 if (btrfs_file_extent_disk_bytenr(eb, fi) == bytenr) {
11216                         found_parent = 1;
11217                         break;
11218                 }
11219         }
11220
11221 out:
11222         free_extent_buffer(eb);
11223         if (!found_parent) {
11224                 error("shared extent %llu referencer lost (parent: %llu)",
11225                         bytenr, parent);
11226                 return REFERENCER_MISSING;
11227         }
11228         return 0;
11229 }
11230
11231 /*
11232  * This function will check a given extent item, including its backref and
11233  * itself (like crossing stripe boundary and type)
11234  *
11235  * Since we don't use extent_record anymore, introduce new error bit
11236  */
11237 static int check_extent_item(struct btrfs_fs_info *fs_info,
11238                              struct extent_buffer *eb, int slot)
11239 {
11240         struct btrfs_extent_item *ei;
11241         struct btrfs_extent_inline_ref *iref;
11242         struct btrfs_extent_data_ref *dref;
11243         unsigned long end;
11244         unsigned long ptr;
11245         int type;
11246         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
11247         u32 item_size = btrfs_item_size_nr(eb, slot);
11248         u64 flags;
11249         u64 offset;
11250         int metadata = 0;
11251         int level;
11252         struct btrfs_key key;
11253         int ret;
11254         int err = 0;
11255
11256         btrfs_item_key_to_cpu(eb, &key, slot);
11257         if (key.type == BTRFS_EXTENT_ITEM_KEY)
11258                 bytes_used += key.offset;
11259         else
11260                 bytes_used += nodesize;
11261
11262         if (item_size < sizeof(*ei)) {
11263                 /*
11264                  * COMPAT_EXTENT_TREE_V0 case, but it's already a super
11265                  * old thing when on disk format is still un-determined.
11266                  * No need to care about it anymore
11267                  */
11268                 error("unsupported COMPAT_EXTENT_TREE_V0 detected");
11269                 return -ENOTTY;
11270         }
11271
11272         ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
11273         flags = btrfs_extent_flags(eb, ei);
11274
11275         if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)
11276                 metadata = 1;
11277         if (metadata && check_crossing_stripes(global_info, key.objectid,
11278                                                eb->len)) {
11279                 error("bad metadata [%llu, %llu) crossing stripe boundary",
11280                       key.objectid, key.objectid + nodesize);
11281                 err |= CROSSING_STRIPE_BOUNDARY;
11282         }
11283
11284         ptr = (unsigned long)(ei + 1);
11285
11286         if (metadata && key.type == BTRFS_EXTENT_ITEM_KEY) {
11287                 /* Old EXTENT_ITEM metadata */
11288                 struct btrfs_tree_block_info *info;
11289
11290                 info = (struct btrfs_tree_block_info *)ptr;
11291                 level = btrfs_tree_block_level(eb, info);
11292                 ptr += sizeof(struct btrfs_tree_block_info);
11293         } else {
11294                 /* New METADATA_ITEM */
11295                 level = key.offset;
11296         }
11297         end = (unsigned long)ei + item_size;
11298
11299 next:
11300         /* Reached extent item end normally */
11301         if (ptr == end)
11302                 goto out;
11303
11304         /* Beyond extent item end, wrong item size */
11305         if (ptr > end) {
11306                 err |= ITEM_SIZE_MISMATCH;
11307                 error("extent item at bytenr %llu slot %d has wrong size",
11308                         eb->start, slot);
11309                 goto out;
11310         }
11311
11312         /* Now check every backref in this extent item */
11313         iref = (struct btrfs_extent_inline_ref *)ptr;
11314         type = btrfs_extent_inline_ref_type(eb, iref);
11315         offset = btrfs_extent_inline_ref_offset(eb, iref);
11316         switch (type) {
11317         case BTRFS_TREE_BLOCK_REF_KEY:
11318                 ret = check_tree_block_backref(fs_info, offset, key.objectid,
11319                                                level);
11320                 err |= ret;
11321                 break;
11322         case BTRFS_SHARED_BLOCK_REF_KEY:
11323                 ret = check_shared_block_backref(fs_info, offset, key.objectid,
11324                                                  level);
11325                 err |= ret;
11326                 break;
11327         case BTRFS_EXTENT_DATA_REF_KEY:
11328                 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
11329                 ret = check_extent_data_backref(fs_info,
11330                                 btrfs_extent_data_ref_root(eb, dref),
11331                                 btrfs_extent_data_ref_objectid(eb, dref),
11332                                 btrfs_extent_data_ref_offset(eb, dref),
11333                                 key.objectid, key.offset,
11334                                 btrfs_extent_data_ref_count(eb, dref));
11335                 err |= ret;
11336                 break;
11337         case BTRFS_SHARED_DATA_REF_KEY:
11338                 ret = check_shared_data_backref(fs_info, offset, key.objectid);
11339                 err |= ret;
11340                 break;
11341         default:
11342                 error("extent[%llu %d %llu] has unknown ref type: %d",
11343                         key.objectid, key.type, key.offset, type);
11344                 err |= UNKNOWN_TYPE;
11345                 goto out;
11346         }
11347
11348         ptr += btrfs_extent_inline_ref_size(type);
11349         goto next;
11350
11351 out:
11352         return err;
11353 }
11354
11355 /*
11356  * Check if a dev extent item is referred correctly by its chunk
11357  */
11358 static int check_dev_extent_item(struct btrfs_fs_info *fs_info,
11359                                  struct extent_buffer *eb, int slot)
11360 {
11361         struct btrfs_root *chunk_root = fs_info->chunk_root;
11362         struct btrfs_dev_extent *ptr;
11363         struct btrfs_path path;
11364         struct btrfs_key chunk_key;
11365         struct btrfs_key devext_key;
11366         struct btrfs_chunk *chunk;
11367         struct extent_buffer *l;
11368         int num_stripes;
11369         u64 length;
11370         int i;
11371         int found_chunk = 0;
11372         int ret;
11373
11374         btrfs_item_key_to_cpu(eb, &devext_key, slot);
11375         ptr = btrfs_item_ptr(eb, slot, struct btrfs_dev_extent);
11376         length = btrfs_dev_extent_length(eb, ptr);
11377
11378         chunk_key.objectid = btrfs_dev_extent_chunk_objectid(eb, ptr);
11379         chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
11380         chunk_key.offset = btrfs_dev_extent_chunk_offset(eb, ptr);
11381
11382         btrfs_init_path(&path);
11383         ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
11384         if (ret)
11385                 goto out;
11386
11387         l = path.nodes[0];
11388         chunk = btrfs_item_ptr(l, path.slots[0], struct btrfs_chunk);
11389         ret = btrfs_check_chunk_valid(fs_info, l, chunk, path.slots[0],
11390                                       chunk_key.offset);
11391         if (ret < 0)
11392                 goto out;
11393
11394         if (btrfs_stripe_length(fs_info, l, chunk) != length)
11395                 goto out;
11396
11397         num_stripes = btrfs_chunk_num_stripes(l, chunk);
11398         for (i = 0; i < num_stripes; i++) {
11399                 u64 devid = btrfs_stripe_devid_nr(l, chunk, i);
11400                 u64 offset = btrfs_stripe_offset_nr(l, chunk, i);
11401
11402                 if (devid == devext_key.objectid &&
11403                     offset == devext_key.offset) {
11404                         found_chunk = 1;
11405                         break;
11406                 }
11407         }
11408 out:
11409         btrfs_release_path(&path);
11410         if (!found_chunk) {
11411                 error(
11412                 "device extent[%llu, %llu, %llu] did not find the related chunk",
11413                         devext_key.objectid, devext_key.offset, length);
11414                 return REFERENCER_MISSING;
11415         }
11416         return 0;
11417 }
11418
11419 /*
11420  * Check if the used space is correct with the dev item
11421  */
11422 static int check_dev_item(struct btrfs_fs_info *fs_info,
11423                           struct extent_buffer *eb, int slot)
11424 {
11425         struct btrfs_root *dev_root = fs_info->dev_root;
11426         struct btrfs_dev_item *dev_item;
11427         struct btrfs_path path;
11428         struct btrfs_key key;
11429         struct btrfs_dev_extent *ptr;
11430         u64 dev_id;
11431         u64 used;
11432         u64 total = 0;
11433         int ret;
11434
11435         dev_item = btrfs_item_ptr(eb, slot, struct btrfs_dev_item);
11436         dev_id = btrfs_device_id(eb, dev_item);
11437         used = btrfs_device_bytes_used(eb, dev_item);
11438
11439         key.objectid = dev_id;
11440         key.type = BTRFS_DEV_EXTENT_KEY;
11441         key.offset = 0;
11442
11443         btrfs_init_path(&path);
11444         ret = btrfs_search_slot(NULL, dev_root, &key, &path, 0, 0);
11445         if (ret < 0) {
11446                 btrfs_item_key_to_cpu(eb, &key, slot);
11447                 error("cannot find any related dev extent for dev[%llu, %u, %llu]",
11448                         key.objectid, key.type, key.offset);
11449                 btrfs_release_path(&path);
11450                 return REFERENCER_MISSING;
11451         }
11452
11453         /* Iterate dev_extents to calculate the used space of a device */
11454         while (1) {
11455                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0]))
11456                         goto next;
11457
11458                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
11459                 if (key.objectid > dev_id)
11460                         break;
11461                 if (key.type != BTRFS_DEV_EXTENT_KEY || key.objectid != dev_id)
11462                         goto next;
11463
11464                 ptr = btrfs_item_ptr(path.nodes[0], path.slots[0],
11465                                      struct btrfs_dev_extent);
11466                 total += btrfs_dev_extent_length(path.nodes[0], ptr);
11467 next:
11468                 ret = btrfs_next_item(dev_root, &path);
11469                 if (ret)
11470                         break;
11471         }
11472         btrfs_release_path(&path);
11473
11474         if (used != total) {
11475                 btrfs_item_key_to_cpu(eb, &key, slot);
11476                 error(
11477 "Dev extent's total-byte %llu is not equal to bytes-used %llu in dev[%llu, %u, %llu]",
11478                         total, used, BTRFS_ROOT_TREE_OBJECTID,
11479                         BTRFS_DEV_EXTENT_KEY, dev_id);
11480                 return ACCOUNTING_MISMATCH;
11481         }
11482         return 0;
11483 }
11484
11485 /*
11486  * Check a block group item with its referener (chunk) and its used space
11487  * with extent/metadata item
11488  */
11489 static int check_block_group_item(struct btrfs_fs_info *fs_info,
11490                                   struct extent_buffer *eb, int slot)
11491 {
11492         struct btrfs_root *extent_root = fs_info->extent_root;
11493         struct btrfs_root *chunk_root = fs_info->chunk_root;
11494         struct btrfs_block_group_item *bi;
11495         struct btrfs_block_group_item bg_item;
11496         struct btrfs_path path;
11497         struct btrfs_key bg_key;
11498         struct btrfs_key chunk_key;
11499         struct btrfs_key extent_key;
11500         struct btrfs_chunk *chunk;
11501         struct extent_buffer *leaf;
11502         struct btrfs_extent_item *ei;
11503         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
11504         u64 flags;
11505         u64 bg_flags;
11506         u64 used;
11507         u64 total = 0;
11508         int ret;
11509         int err = 0;
11510
11511         btrfs_item_key_to_cpu(eb, &bg_key, slot);
11512         bi = btrfs_item_ptr(eb, slot, struct btrfs_block_group_item);
11513         read_extent_buffer(eb, &bg_item, (unsigned long)bi, sizeof(bg_item));
11514         used = btrfs_block_group_used(&bg_item);
11515         bg_flags = btrfs_block_group_flags(&bg_item);
11516
11517         chunk_key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
11518         chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
11519         chunk_key.offset = bg_key.objectid;
11520
11521         btrfs_init_path(&path);
11522         /* Search for the referencer chunk */
11523         ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
11524         if (ret) {
11525                 error(
11526                 "block group[%llu %llu] did not find the related chunk item",
11527                         bg_key.objectid, bg_key.offset);
11528                 err |= REFERENCER_MISSING;
11529         } else {
11530                 chunk = btrfs_item_ptr(path.nodes[0], path.slots[0],
11531                                         struct btrfs_chunk);
11532                 if (btrfs_chunk_length(path.nodes[0], chunk) !=
11533                                                 bg_key.offset) {
11534                         error(
11535         "block group[%llu %llu] related chunk item length does not match",
11536                                 bg_key.objectid, bg_key.offset);
11537                         err |= REFERENCER_MISMATCH;
11538                 }
11539         }
11540         btrfs_release_path(&path);
11541
11542         /* Search from the block group bytenr */
11543         extent_key.objectid = bg_key.objectid;
11544         extent_key.type = 0;
11545         extent_key.offset = 0;
11546
11547         btrfs_init_path(&path);
11548         ret = btrfs_search_slot(NULL, extent_root, &extent_key, &path, 0, 0);
11549         if (ret < 0)
11550                 goto out;
11551
11552         /* Iterate extent tree to account used space */
11553         while (1) {
11554                 leaf = path.nodes[0];
11555
11556                 /* Search slot can point to the last item beyond leaf nritems */
11557                 if (path.slots[0] >= btrfs_header_nritems(leaf))
11558                         goto next;
11559
11560                 btrfs_item_key_to_cpu(leaf, &extent_key, path.slots[0]);
11561                 if (extent_key.objectid >= bg_key.objectid + bg_key.offset)
11562                         break;
11563
11564                 if (extent_key.type != BTRFS_METADATA_ITEM_KEY &&
11565                     extent_key.type != BTRFS_EXTENT_ITEM_KEY)
11566                         goto next;
11567                 if (extent_key.objectid < bg_key.objectid)
11568                         goto next;
11569
11570                 if (extent_key.type == BTRFS_METADATA_ITEM_KEY)
11571                         total += nodesize;
11572                 else
11573                         total += extent_key.offset;
11574
11575                 ei = btrfs_item_ptr(leaf, path.slots[0],
11576                                     struct btrfs_extent_item);
11577                 flags = btrfs_extent_flags(leaf, ei);
11578                 if (flags & BTRFS_EXTENT_FLAG_DATA) {
11579                         if (!(bg_flags & BTRFS_BLOCK_GROUP_DATA)) {
11580                                 error(
11581                         "bad extent[%llu, %llu) type mismatch with chunk",
11582                                         extent_key.objectid,
11583                                         extent_key.objectid + extent_key.offset);
11584                                 err |= CHUNK_TYPE_MISMATCH;
11585                         }
11586                 } else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
11587                         if (!(bg_flags & (BTRFS_BLOCK_GROUP_SYSTEM |
11588                                     BTRFS_BLOCK_GROUP_METADATA))) {
11589                                 error(
11590                         "bad extent[%llu, %llu) type mismatch with chunk",
11591                                         extent_key.objectid,
11592                                         extent_key.objectid + nodesize);
11593                                 err |= CHUNK_TYPE_MISMATCH;
11594                         }
11595                 }
11596 next:
11597                 ret = btrfs_next_item(extent_root, &path);
11598                 if (ret)
11599                         break;
11600         }
11601
11602 out:
11603         btrfs_release_path(&path);
11604
11605         if (total != used) {
11606                 error(
11607                 "block group[%llu %llu] used %llu but extent items used %llu",
11608                         bg_key.objectid, bg_key.offset, used, total);
11609                 err |= ACCOUNTING_MISMATCH;
11610         }
11611         return err;
11612 }
11613
11614 /*
11615  * Check a chunk item.
11616  * Including checking all referred dev_extents and block group
11617  */
11618 static int check_chunk_item(struct btrfs_fs_info *fs_info,
11619                             struct extent_buffer *eb, int slot)
11620 {
11621         struct btrfs_root *extent_root = fs_info->extent_root;
11622         struct btrfs_root *dev_root = fs_info->dev_root;
11623         struct btrfs_path path;
11624         struct btrfs_key chunk_key;
11625         struct btrfs_key bg_key;
11626         struct btrfs_key devext_key;
11627         struct btrfs_chunk *chunk;
11628         struct extent_buffer *leaf;
11629         struct btrfs_block_group_item *bi;
11630         struct btrfs_block_group_item bg_item;
11631         struct btrfs_dev_extent *ptr;
11632         u64 length;
11633         u64 chunk_end;
11634         u64 stripe_len;
11635         u64 type;
11636         int num_stripes;
11637         u64 offset;
11638         u64 objectid;
11639         int i;
11640         int ret;
11641         int err = 0;
11642
11643         btrfs_item_key_to_cpu(eb, &chunk_key, slot);
11644         chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
11645         length = btrfs_chunk_length(eb, chunk);
11646         chunk_end = chunk_key.offset + length;
11647         ret = btrfs_check_chunk_valid(fs_info, eb, chunk, slot,
11648                                       chunk_key.offset);
11649         if (ret < 0) {
11650                 error("chunk[%llu %llu) is invalid", chunk_key.offset,
11651                         chunk_end);
11652                 err |= BYTES_UNALIGNED | UNKNOWN_TYPE;
11653                 goto out;
11654         }
11655         type = btrfs_chunk_type(eb, chunk);
11656
11657         bg_key.objectid = chunk_key.offset;
11658         bg_key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
11659         bg_key.offset = length;
11660
11661         btrfs_init_path(&path);
11662         ret = btrfs_search_slot(NULL, extent_root, &bg_key, &path, 0, 0);
11663         if (ret) {
11664                 error(
11665                 "chunk[%llu %llu) did not find the related block group item",
11666                         chunk_key.offset, chunk_end);
11667                 err |= REFERENCER_MISSING;
11668         } else{
11669                 leaf = path.nodes[0];
11670                 bi = btrfs_item_ptr(leaf, path.slots[0],
11671                                     struct btrfs_block_group_item);
11672                 read_extent_buffer(leaf, &bg_item, (unsigned long)bi,
11673                                    sizeof(bg_item));
11674                 if (btrfs_block_group_flags(&bg_item) != type) {
11675                         error(
11676 "chunk[%llu %llu) related block group item flags mismatch, wanted: %llu, have: %llu",
11677                                 chunk_key.offset, chunk_end, type,
11678                                 btrfs_block_group_flags(&bg_item));
11679                         err |= REFERENCER_MISSING;
11680                 }
11681         }
11682
11683         num_stripes = btrfs_chunk_num_stripes(eb, chunk);
11684         stripe_len = btrfs_stripe_length(fs_info, eb, chunk);
11685         for (i = 0; i < num_stripes; i++) {
11686                 btrfs_release_path(&path);
11687                 btrfs_init_path(&path);
11688                 devext_key.objectid = btrfs_stripe_devid_nr(eb, chunk, i);
11689                 devext_key.type = BTRFS_DEV_EXTENT_KEY;
11690                 devext_key.offset = btrfs_stripe_offset_nr(eb, chunk, i);
11691
11692                 ret = btrfs_search_slot(NULL, dev_root, &devext_key, &path,
11693                                         0, 0);
11694                 if (ret)
11695                         goto not_match_dev;
11696
11697                 leaf = path.nodes[0];
11698                 ptr = btrfs_item_ptr(leaf, path.slots[0],
11699                                      struct btrfs_dev_extent);
11700                 objectid = btrfs_dev_extent_chunk_objectid(leaf, ptr);
11701                 offset = btrfs_dev_extent_chunk_offset(leaf, ptr);
11702                 if (objectid != chunk_key.objectid ||
11703                     offset != chunk_key.offset ||
11704                     btrfs_dev_extent_length(leaf, ptr) != stripe_len)
11705                         goto not_match_dev;
11706                 continue;
11707 not_match_dev:
11708                 err |= BACKREF_MISSING;
11709                 error(
11710                 "chunk[%llu %llu) stripe %d did not find the related dev extent",
11711                         chunk_key.objectid, chunk_end, i);
11712                 continue;
11713         }
11714         btrfs_release_path(&path);
11715 out:
11716         return err;
11717 }
11718
11719 /*
11720  * Main entry function to check known items and update related accounting info
11721  */
11722 static int check_leaf_items(struct btrfs_root *root, struct extent_buffer *eb)
11723 {
11724         struct btrfs_fs_info *fs_info = root->fs_info;
11725         struct btrfs_key key;
11726         int slot = 0;
11727         int type;
11728         struct btrfs_extent_data_ref *dref;
11729         int ret;
11730         int err = 0;
11731
11732 next:
11733         btrfs_item_key_to_cpu(eb, &key, slot);
11734         type = key.type;
11735
11736         switch (type) {
11737         case BTRFS_EXTENT_DATA_KEY:
11738                 ret = check_extent_data_item(root, eb, slot);
11739                 err |= ret;
11740                 break;
11741         case BTRFS_BLOCK_GROUP_ITEM_KEY:
11742                 ret = check_block_group_item(fs_info, eb, slot);
11743                 err |= ret;
11744                 break;
11745         case BTRFS_DEV_ITEM_KEY:
11746                 ret = check_dev_item(fs_info, eb, slot);
11747                 err |= ret;
11748                 break;
11749         case BTRFS_CHUNK_ITEM_KEY:
11750                 ret = check_chunk_item(fs_info, eb, slot);
11751                 err |= ret;
11752                 break;
11753         case BTRFS_DEV_EXTENT_KEY:
11754                 ret = check_dev_extent_item(fs_info, eb, slot);
11755                 err |= ret;
11756                 break;
11757         case BTRFS_EXTENT_ITEM_KEY:
11758         case BTRFS_METADATA_ITEM_KEY:
11759                 ret = check_extent_item(fs_info, eb, slot);
11760                 err |= ret;
11761                 break;
11762         case BTRFS_EXTENT_CSUM_KEY:
11763                 total_csum_bytes += btrfs_item_size_nr(eb, slot);
11764                 break;
11765         case BTRFS_TREE_BLOCK_REF_KEY:
11766                 ret = check_tree_block_backref(fs_info, key.offset,
11767                                                key.objectid, -1);
11768                 err |= ret;
11769                 break;
11770         case BTRFS_EXTENT_DATA_REF_KEY:
11771                 dref = btrfs_item_ptr(eb, slot, struct btrfs_extent_data_ref);
11772                 ret = check_extent_data_backref(fs_info,
11773                                 btrfs_extent_data_ref_root(eb, dref),
11774                                 btrfs_extent_data_ref_objectid(eb, dref),
11775                                 btrfs_extent_data_ref_offset(eb, dref),
11776                                 key.objectid, 0,
11777                                 btrfs_extent_data_ref_count(eb, dref));
11778                 err |= ret;
11779                 break;
11780         case BTRFS_SHARED_BLOCK_REF_KEY:
11781                 ret = check_shared_block_backref(fs_info, key.offset,
11782                                                  key.objectid, -1);
11783                 err |= ret;
11784                 break;
11785         case BTRFS_SHARED_DATA_REF_KEY:
11786                 ret = check_shared_data_backref(fs_info, key.offset,
11787                                                 key.objectid);
11788                 err |= ret;
11789                 break;
11790         default:
11791                 break;
11792         }
11793
11794         if (++slot < btrfs_header_nritems(eb))
11795                 goto next;
11796
11797         return err;
11798 }
11799
11800 /*
11801  * Helper function for later fs/subvol tree check.  To determine if a tree
11802  * block should be checked.
11803  * This function will ensure only the direct referencer with lowest rootid to
11804  * check a fs/subvolume tree block.
11805  *
11806  * Backref check at extent tree would detect errors like missing subvolume
11807  * tree, so we can do aggressive check to reduce duplicated checks.
11808  */
11809 static int should_check(struct btrfs_root *root, struct extent_buffer *eb)
11810 {
11811         struct btrfs_root *extent_root = root->fs_info->extent_root;
11812         struct btrfs_key key;
11813         struct btrfs_path path;
11814         struct extent_buffer *leaf;
11815         int slot;
11816         struct btrfs_extent_item *ei;
11817         unsigned long ptr;
11818         unsigned long end;
11819         int type;
11820         u32 item_size;
11821         u64 offset;
11822         struct btrfs_extent_inline_ref *iref;
11823         int ret;
11824
11825         btrfs_init_path(&path);
11826         key.objectid = btrfs_header_bytenr(eb);
11827         key.type = BTRFS_METADATA_ITEM_KEY;
11828         key.offset = (u64)-1;
11829
11830         /*
11831          * Any failure in backref resolving means we can't determine
11832          * whom the tree block belongs to.
11833          * So in that case, we need to check that tree block
11834          */
11835         ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
11836         if (ret < 0)
11837                 goto need_check;
11838
11839         ret = btrfs_previous_extent_item(extent_root, &path,
11840                                          btrfs_header_bytenr(eb));
11841         if (ret)
11842                 goto need_check;
11843
11844         leaf = path.nodes[0];
11845         slot = path.slots[0];
11846         btrfs_item_key_to_cpu(leaf, &key, slot);
11847         ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
11848
11849         if (key.type == BTRFS_METADATA_ITEM_KEY) {
11850                 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
11851         } else {
11852                 struct btrfs_tree_block_info *info;
11853
11854                 info = (struct btrfs_tree_block_info *)(ei + 1);
11855                 iref = (struct btrfs_extent_inline_ref *)(info + 1);
11856         }
11857
11858         item_size = btrfs_item_size_nr(leaf, slot);
11859         ptr = (unsigned long)iref;
11860         end = (unsigned long)ei + item_size;
11861         while (ptr < end) {
11862                 iref = (struct btrfs_extent_inline_ref *)ptr;
11863                 type = btrfs_extent_inline_ref_type(leaf, iref);
11864                 offset = btrfs_extent_inline_ref_offset(leaf, iref);
11865
11866                 /*
11867                  * We only check the tree block if current root is
11868                  * the lowest referencer of it.
11869                  */
11870                 if (type == BTRFS_TREE_BLOCK_REF_KEY &&
11871                     offset < root->objectid) {
11872                         btrfs_release_path(&path);
11873                         return 0;
11874                 }
11875
11876                 ptr += btrfs_extent_inline_ref_size(type);
11877         }
11878         /*
11879          * Normally we should also check keyed tree block ref, but that may be
11880          * very time consuming.  Inlined ref should already make us skip a lot
11881          * of refs now.  So skip search keyed tree block ref.
11882          */
11883
11884 need_check:
11885         btrfs_release_path(&path);
11886         return 1;
11887 }
11888
11889 /*
11890  * Traversal function for tree block. We will do:
11891  * 1) Skip shared fs/subvolume tree blocks
11892  * 2) Update related bytes accounting
11893  * 3) Pre-order traversal
11894  */
11895 static int traverse_tree_block(struct btrfs_root *root,
11896                                 struct extent_buffer *node)
11897 {
11898         struct extent_buffer *eb;
11899         struct btrfs_key key;
11900         struct btrfs_key drop_key;
11901         int level;
11902         u64 nr;
11903         int i;
11904         int err = 0;
11905         int ret;
11906
11907         /*
11908          * Skip shared fs/subvolume tree block, in that case they will
11909          * be checked by referencer with lowest rootid
11910          */
11911         if (is_fstree(root->objectid) && !should_check(root, node))
11912                 return 0;
11913
11914         /* Update bytes accounting */
11915         total_btree_bytes += node->len;
11916         if (fs_root_objectid(btrfs_header_owner(node)))
11917                 total_fs_tree_bytes += node->len;
11918         if (btrfs_header_owner(node) == BTRFS_EXTENT_TREE_OBJECTID)
11919                 total_extent_tree_bytes += node->len;
11920
11921         /* pre-order tranversal, check itself first */
11922         level = btrfs_header_level(node);
11923         ret = check_tree_block_ref(root, node, btrfs_header_bytenr(node),
11924                                    btrfs_header_level(node),
11925                                    btrfs_header_owner(node));
11926         err |= ret;
11927         if (err)
11928                 error(
11929         "check %s failed root %llu bytenr %llu level %d, force continue check",
11930                         level ? "node":"leaf", root->objectid,
11931                         btrfs_header_bytenr(node), btrfs_header_level(node));
11932
11933         if (!level) {
11934                 btree_space_waste += btrfs_leaf_free_space(root, node);
11935                 ret = check_leaf_items(root, node);
11936                 err |= ret;
11937                 return err;
11938         }
11939
11940         nr = btrfs_header_nritems(node);
11941         btrfs_disk_key_to_cpu(&drop_key, &root->root_item.drop_progress);
11942         btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) - nr) *
11943                 sizeof(struct btrfs_key_ptr);
11944
11945         /* Then check all its children */
11946         for (i = 0; i < nr; i++) {
11947                 u64 blocknr = btrfs_node_blockptr(node, i);
11948
11949                 btrfs_node_key_to_cpu(node, &key, i);
11950                 if (level == root->root_item.drop_level &&
11951                     is_dropped_key(&key, &drop_key))
11952                         continue;
11953
11954                 /*
11955                  * As a btrfs tree has most 8 levels (0..7), so it's quite safe
11956                  * to call the function itself.
11957                  */
11958                 eb = read_tree_block(root->fs_info, blocknr, 0);
11959                 if (extent_buffer_uptodate(eb)) {
11960                         ret = traverse_tree_block(root, eb);
11961                         err |= ret;
11962                 }
11963                 free_extent_buffer(eb);
11964         }
11965
11966         return err;
11967 }
11968
11969 /*
11970  * Low memory usage version check_chunks_and_extents.
11971  */
11972 static int check_chunks_and_extents_v2(struct btrfs_fs_info *fs_info)
11973 {
11974         struct btrfs_path path;
11975         struct btrfs_key key;
11976         struct btrfs_root *root1;
11977         struct btrfs_root *root;
11978         struct btrfs_root *cur_root;
11979         int err = 0;
11980         int ret;
11981
11982         root = fs_info->fs_root;
11983
11984         root1 = root->fs_info->chunk_root;
11985         ret = traverse_tree_block(root1, root1->node);
11986         err |= ret;
11987
11988         root1 = root->fs_info->tree_root;
11989         ret = traverse_tree_block(root1, root1->node);
11990         err |= ret;
11991
11992         btrfs_init_path(&path);
11993         key.objectid = BTRFS_EXTENT_TREE_OBJECTID;
11994         key.offset = 0;
11995         key.type = BTRFS_ROOT_ITEM_KEY;
11996
11997         ret = btrfs_search_slot(NULL, root1, &key, &path, 0, 0);
11998         if (ret) {
11999                 error("cannot find extent treet in tree_root");
12000                 goto out;
12001         }
12002
12003         while (1) {
12004                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
12005                 if (key.type != BTRFS_ROOT_ITEM_KEY)
12006                         goto next;
12007                 key.offset = (u64)-1;
12008
12009                 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
12010                         cur_root = btrfs_read_fs_root_no_cache(root->fs_info,
12011                                         &key);
12012                 else
12013                         cur_root = btrfs_read_fs_root(root->fs_info, &key);
12014                 if (IS_ERR(cur_root) || !cur_root) {
12015                         error("failed to read tree: %lld", key.objectid);
12016                         goto next;
12017                 }
12018
12019                 ret = traverse_tree_block(cur_root, cur_root->node);
12020                 err |= ret;
12021
12022                 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
12023                         btrfs_free_fs_root(cur_root);
12024 next:
12025                 ret = btrfs_next_item(root1, &path);
12026                 if (ret)
12027                         goto out;
12028         }
12029
12030 out:
12031         btrfs_release_path(&path);
12032         return err;
12033 }
12034
12035 static int do_check_chunks_and_extents(struct btrfs_fs_info *fs_info)
12036 {
12037         int ret;
12038
12039         if (!ctx.progress_enabled)
12040                 fprintf(stderr, "checking extents\n");
12041         if (check_mode == CHECK_MODE_LOWMEM)
12042                 ret = check_chunks_and_extents_v2(fs_info);
12043         else
12044                 ret = check_chunks_and_extents(fs_info);
12045
12046         return ret;
12047 }
12048
12049 static int btrfs_fsck_reinit_root(struct btrfs_trans_handle *trans,
12050                            struct btrfs_root *root, int overwrite)
12051 {
12052         struct extent_buffer *c;
12053         struct extent_buffer *old = root->node;
12054         int level;
12055         int ret;
12056         struct btrfs_disk_key disk_key = {0,0,0};
12057
12058         level = 0;
12059
12060         if (overwrite) {
12061                 c = old;
12062                 extent_buffer_get(c);
12063                 goto init;
12064         }
12065         c = btrfs_alloc_free_block(trans, root,
12066                                    root->fs_info->nodesize,
12067                                    root->root_key.objectid,
12068                                    &disk_key, level, 0, 0);
12069         if (IS_ERR(c)) {
12070                 c = old;
12071                 extent_buffer_get(c);
12072                 overwrite = 1;
12073         }
12074 init:
12075         memset_extent_buffer(c, 0, 0, sizeof(struct btrfs_header));
12076         btrfs_set_header_level(c, level);
12077         btrfs_set_header_bytenr(c, c->start);
12078         btrfs_set_header_generation(c, trans->transid);
12079         btrfs_set_header_backref_rev(c, BTRFS_MIXED_BACKREF_REV);
12080         btrfs_set_header_owner(c, root->root_key.objectid);
12081
12082         write_extent_buffer(c, root->fs_info->fsid,
12083                             btrfs_header_fsid(), BTRFS_FSID_SIZE);
12084
12085         write_extent_buffer(c, root->fs_info->chunk_tree_uuid,
12086                             btrfs_header_chunk_tree_uuid(c),
12087                             BTRFS_UUID_SIZE);
12088
12089         btrfs_mark_buffer_dirty(c);
12090         /*
12091          * this case can happen in the following case:
12092          *
12093          * 1.overwrite previous root.
12094          *
12095          * 2.reinit reloc data root, this is because we skip pin
12096          * down reloc data tree before which means we can allocate
12097          * same block bytenr here.
12098          */
12099         if (old->start == c->start) {
12100                 btrfs_set_root_generation(&root->root_item,
12101                                           trans->transid);
12102                 root->root_item.level = btrfs_header_level(root->node);
12103                 ret = btrfs_update_root(trans, root->fs_info->tree_root,
12104                                         &root->root_key, &root->root_item);
12105                 if (ret) {
12106                         free_extent_buffer(c);
12107                         return ret;
12108                 }
12109         }
12110         free_extent_buffer(old);
12111         root->node = c;
12112         add_root_to_dirty_list(root);
12113         return 0;
12114 }
12115
12116 static int pin_down_tree_blocks(struct btrfs_fs_info *fs_info,
12117                                 struct extent_buffer *eb, int tree_root)
12118 {
12119         struct extent_buffer *tmp;
12120         struct btrfs_root_item *ri;
12121         struct btrfs_key key;
12122         u64 bytenr;
12123         int level = btrfs_header_level(eb);
12124         int nritems;
12125         int ret;
12126         int i;
12127
12128         /*
12129          * If we have pinned this block before, don't pin it again.
12130          * This can not only avoid forever loop with broken filesystem
12131          * but also give us some speedups.
12132          */
12133         if (test_range_bit(&fs_info->pinned_extents, eb->start,
12134                            eb->start + eb->len - 1, EXTENT_DIRTY, 0))
12135                 return 0;
12136
12137         btrfs_pin_extent(fs_info, eb->start, eb->len);
12138
12139         nritems = btrfs_header_nritems(eb);
12140         for (i = 0; i < nritems; i++) {
12141                 if (level == 0) {
12142                         btrfs_item_key_to_cpu(eb, &key, i);
12143                         if (key.type != BTRFS_ROOT_ITEM_KEY)
12144                                 continue;
12145                         /* Skip the extent root and reloc roots */
12146                         if (key.objectid == BTRFS_EXTENT_TREE_OBJECTID ||
12147                             key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
12148                             key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
12149                                 continue;
12150                         ri = btrfs_item_ptr(eb, i, struct btrfs_root_item);
12151                         bytenr = btrfs_disk_root_bytenr(eb, ri);
12152
12153                         /*
12154                          * If at any point we start needing the real root we
12155                          * will have to build a stump root for the root we are
12156                          * in, but for now this doesn't actually use the root so
12157                          * just pass in extent_root.
12158                          */
12159                         tmp = read_tree_block(fs_info, bytenr, 0);
12160                         if (!extent_buffer_uptodate(tmp)) {
12161                                 fprintf(stderr, "Error reading root block\n");
12162                                 return -EIO;
12163                         }
12164                         ret = pin_down_tree_blocks(fs_info, tmp, 0);
12165                         free_extent_buffer(tmp);
12166                         if (ret)
12167                                 return ret;
12168                 } else {
12169                         bytenr = btrfs_node_blockptr(eb, i);
12170
12171                         /* If we aren't the tree root don't read the block */
12172                         if (level == 1 && !tree_root) {
12173                                 btrfs_pin_extent(fs_info, bytenr,
12174                                                 fs_info->nodesize);
12175                                 continue;
12176                         }
12177
12178                         tmp = read_tree_block(fs_info, bytenr, 0);
12179                         if (!extent_buffer_uptodate(tmp)) {
12180                                 fprintf(stderr, "Error reading tree block\n");
12181                                 return -EIO;
12182                         }
12183                         ret = pin_down_tree_blocks(fs_info, tmp, tree_root);
12184                         free_extent_buffer(tmp);
12185                         if (ret)
12186                                 return ret;
12187                 }
12188         }
12189
12190         return 0;
12191 }
12192
12193 static int pin_metadata_blocks(struct btrfs_fs_info *fs_info)
12194 {
12195         int ret;
12196
12197         ret = pin_down_tree_blocks(fs_info, fs_info->chunk_root->node, 0);
12198         if (ret)
12199                 return ret;
12200
12201         return pin_down_tree_blocks(fs_info, fs_info->tree_root->node, 1);
12202 }
12203
12204 static int reset_block_groups(struct btrfs_fs_info *fs_info)
12205 {
12206         struct btrfs_block_group_cache *cache;
12207         struct btrfs_path path;
12208         struct extent_buffer *leaf;
12209         struct btrfs_chunk *chunk;
12210         struct btrfs_key key;
12211         int ret;
12212         u64 start;
12213
12214         btrfs_init_path(&path);
12215         key.objectid = 0;
12216         key.type = BTRFS_CHUNK_ITEM_KEY;
12217         key.offset = 0;
12218         ret = btrfs_search_slot(NULL, fs_info->chunk_root, &key, &path, 0, 0);
12219         if (ret < 0) {
12220                 btrfs_release_path(&path);
12221                 return ret;
12222         }
12223
12224         /*
12225          * We do this in case the block groups were screwed up and had alloc
12226          * bits that aren't actually set on the chunks.  This happens with
12227          * restored images every time and could happen in real life I guess.
12228          */
12229         fs_info->avail_data_alloc_bits = 0;
12230         fs_info->avail_metadata_alloc_bits = 0;
12231         fs_info->avail_system_alloc_bits = 0;
12232
12233         /* First we need to create the in-memory block groups */
12234         while (1) {
12235                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
12236                         ret = btrfs_next_leaf(fs_info->chunk_root, &path);
12237                         if (ret < 0) {
12238                                 btrfs_release_path(&path);
12239                                 return ret;
12240                         }
12241                         if (ret) {
12242                                 ret = 0;
12243                                 break;
12244                         }
12245                 }
12246                 leaf = path.nodes[0];
12247                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
12248                 if (key.type != BTRFS_CHUNK_ITEM_KEY) {
12249                         path.slots[0]++;
12250                         continue;
12251                 }
12252
12253                 chunk = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_chunk);
12254                 btrfs_add_block_group(fs_info, 0,
12255                                       btrfs_chunk_type(leaf, chunk),
12256                                       key.objectid, key.offset,
12257                                       btrfs_chunk_length(leaf, chunk));
12258                 set_extent_dirty(&fs_info->free_space_cache, key.offset,
12259                                  key.offset + btrfs_chunk_length(leaf, chunk));
12260                 path.slots[0]++;
12261         }
12262         start = 0;
12263         while (1) {
12264                 cache = btrfs_lookup_first_block_group(fs_info, start);
12265                 if (!cache)
12266                         break;
12267                 cache->cached = 1;
12268                 start = cache->key.objectid + cache->key.offset;
12269         }
12270
12271         btrfs_release_path(&path);
12272         return 0;
12273 }
12274
12275 static int reset_balance(struct btrfs_trans_handle *trans,
12276                          struct btrfs_fs_info *fs_info)
12277 {
12278         struct btrfs_root *root = fs_info->tree_root;
12279         struct btrfs_path path;
12280         struct extent_buffer *leaf;
12281         struct btrfs_key key;
12282         int del_slot, del_nr = 0;
12283         int ret;
12284         int found = 0;
12285
12286         btrfs_init_path(&path);
12287         key.objectid = BTRFS_BALANCE_OBJECTID;
12288         key.type = BTRFS_BALANCE_ITEM_KEY;
12289         key.offset = 0;
12290         ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
12291         if (ret) {
12292                 if (ret > 0)
12293                         ret = 0;
12294                 if (!ret)
12295                         goto reinit_data_reloc;
12296                 else
12297                         goto out;
12298         }
12299
12300         ret = btrfs_del_item(trans, root, &path);
12301         if (ret)
12302                 goto out;
12303         btrfs_release_path(&path);
12304
12305         key.objectid = BTRFS_TREE_RELOC_OBJECTID;
12306         key.type = BTRFS_ROOT_ITEM_KEY;
12307         key.offset = 0;
12308         ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
12309         if (ret < 0)
12310                 goto out;
12311         while (1) {
12312                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
12313                         if (!found)
12314                                 break;
12315
12316                         if (del_nr) {
12317                                 ret = btrfs_del_items(trans, root, &path,
12318                                                       del_slot, del_nr);
12319                                 del_nr = 0;
12320                                 if (ret)
12321                                         goto out;
12322                         }
12323                         key.offset++;
12324                         btrfs_release_path(&path);
12325
12326                         found = 0;
12327                         ret = btrfs_search_slot(trans, root, &key, &path,
12328                                                 -1, 1);
12329                         if (ret < 0)
12330                                 goto out;
12331                         continue;
12332                 }
12333                 found = 1;
12334                 leaf = path.nodes[0];
12335                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
12336                 if (key.objectid > BTRFS_TREE_RELOC_OBJECTID)
12337                         break;
12338                 if (key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
12339                         path.slots[0]++;
12340                         continue;
12341                 }
12342                 if (!del_nr) {
12343                         del_slot = path.slots[0];
12344                         del_nr = 1;
12345                 } else {
12346                         del_nr++;
12347                 }
12348                 path.slots[0]++;
12349         }
12350
12351         if (del_nr) {
12352                 ret = btrfs_del_items(trans, root, &path, del_slot, del_nr);
12353                 if (ret)
12354                         goto out;
12355         }
12356         btrfs_release_path(&path);
12357
12358 reinit_data_reloc:
12359         key.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID;
12360         key.type = BTRFS_ROOT_ITEM_KEY;
12361         key.offset = (u64)-1;
12362         root = btrfs_read_fs_root(fs_info, &key);
12363         if (IS_ERR(root)) {
12364                 fprintf(stderr, "Error reading data reloc tree\n");
12365                 ret = PTR_ERR(root);
12366                 goto out;
12367         }
12368         record_root_in_trans(trans, root);
12369         ret = btrfs_fsck_reinit_root(trans, root, 0);
12370         if (ret)
12371                 goto out;
12372         ret = btrfs_make_root_dir(trans, root, BTRFS_FIRST_FREE_OBJECTID);
12373 out:
12374         btrfs_release_path(&path);
12375         return ret;
12376 }
12377
12378 static int reinit_extent_tree(struct btrfs_trans_handle *trans,
12379                               struct btrfs_fs_info *fs_info)
12380 {
12381         u64 start = 0;
12382         int ret;
12383
12384         /*
12385          * The only reason we don't do this is because right now we're just
12386          * walking the trees we find and pinning down their bytes, we don't look
12387          * at any of the leaves.  In order to do mixed groups we'd have to check
12388          * the leaves of any fs roots and pin down the bytes for any file
12389          * extents we find.  Not hard but why do it if we don't have to?
12390          */
12391         if (btrfs_fs_incompat(fs_info, MIXED_GROUPS)) {
12392                 fprintf(stderr, "We don't support re-initing the extent tree "
12393                         "for mixed block groups yet, please notify a btrfs "
12394                         "developer you want to do this so they can add this "
12395                         "functionality.\n");
12396                 return -EINVAL;
12397         }
12398
12399         /*
12400          * first we need to walk all of the trees except the extent tree and pin
12401          * down the bytes that are in use so we don't overwrite any existing
12402          * metadata.
12403          */
12404         ret = pin_metadata_blocks(fs_info);
12405         if (ret) {
12406                 fprintf(stderr, "error pinning down used bytes\n");
12407                 return ret;
12408         }
12409
12410         /*
12411          * Need to drop all the block groups since we're going to recreate all
12412          * of them again.
12413          */
12414         btrfs_free_block_groups(fs_info);
12415         ret = reset_block_groups(fs_info);
12416         if (ret) {
12417                 fprintf(stderr, "error resetting the block groups\n");
12418                 return ret;
12419         }
12420
12421         /* Ok we can allocate now, reinit the extent root */
12422         ret = btrfs_fsck_reinit_root(trans, fs_info->extent_root, 0);
12423         if (ret) {
12424                 fprintf(stderr, "extent root initialization failed\n");
12425                 /*
12426                  * When the transaction code is updated we should end the
12427                  * transaction, but for now progs only knows about commit so
12428                  * just return an error.
12429                  */
12430                 return ret;
12431         }
12432
12433         /*
12434          * Now we have all the in-memory block groups setup so we can make
12435          * allocations properly, and the metadata we care about is safe since we
12436          * pinned all of it above.
12437          */
12438         while (1) {
12439                 struct btrfs_block_group_cache *cache;
12440
12441                 cache = btrfs_lookup_first_block_group(fs_info, start);
12442                 if (!cache)
12443                         break;
12444                 start = cache->key.objectid + cache->key.offset;
12445                 ret = btrfs_insert_item(trans, fs_info->extent_root,
12446                                         &cache->key, &cache->item,
12447                                         sizeof(cache->item));
12448                 if (ret) {
12449                         fprintf(stderr, "Error adding block group\n");
12450                         return ret;
12451                 }
12452                 btrfs_extent_post_op(trans, fs_info->extent_root);
12453         }
12454
12455         ret = reset_balance(trans, fs_info);
12456         if (ret)
12457                 fprintf(stderr, "error resetting the pending balance\n");
12458
12459         return ret;
12460 }
12461
12462 static int recow_extent_buffer(struct btrfs_root *root, struct extent_buffer *eb)
12463 {
12464         struct btrfs_path path;
12465         struct btrfs_trans_handle *trans;
12466         struct btrfs_key key;
12467         int ret;
12468
12469         printf("Recowing metadata block %llu\n", eb->start);
12470         key.objectid = btrfs_header_owner(eb);
12471         key.type = BTRFS_ROOT_ITEM_KEY;
12472         key.offset = (u64)-1;
12473
12474         root = btrfs_read_fs_root(root->fs_info, &key);
12475         if (IS_ERR(root)) {
12476                 fprintf(stderr, "Couldn't find owner root %llu\n",
12477                         key.objectid);
12478                 return PTR_ERR(root);
12479         }
12480
12481         trans = btrfs_start_transaction(root, 1);
12482         if (IS_ERR(trans))
12483                 return PTR_ERR(trans);
12484
12485         btrfs_init_path(&path);
12486         path.lowest_level = btrfs_header_level(eb);
12487         if (path.lowest_level)
12488                 btrfs_node_key_to_cpu(eb, &key, 0);
12489         else
12490                 btrfs_item_key_to_cpu(eb, &key, 0);
12491
12492         ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
12493         btrfs_commit_transaction(trans, root);
12494         btrfs_release_path(&path);
12495         return ret;
12496 }
12497
12498 static int delete_bad_item(struct btrfs_root *root, struct bad_item *bad)
12499 {
12500         struct btrfs_path path;
12501         struct btrfs_trans_handle *trans;
12502         struct btrfs_key key;
12503         int ret;
12504
12505         printf("Deleting bad item [%llu,%u,%llu]\n", bad->key.objectid,
12506                bad->key.type, bad->key.offset);
12507         key.objectid = bad->root_id;
12508         key.type = BTRFS_ROOT_ITEM_KEY;
12509         key.offset = (u64)-1;
12510
12511         root = btrfs_read_fs_root(root->fs_info, &key);
12512         if (IS_ERR(root)) {
12513                 fprintf(stderr, "Couldn't find owner root %llu\n",
12514                         key.objectid);
12515                 return PTR_ERR(root);
12516         }
12517
12518         trans = btrfs_start_transaction(root, 1);
12519         if (IS_ERR(trans))
12520                 return PTR_ERR(trans);
12521
12522         btrfs_init_path(&path);
12523         ret = btrfs_search_slot(trans, root, &bad->key, &path, -1, 1);
12524         if (ret) {
12525                 if (ret > 0)
12526                         ret = 0;
12527                 goto out;
12528         }
12529         ret = btrfs_del_item(trans, root, &path);
12530 out:
12531         btrfs_commit_transaction(trans, root);
12532         btrfs_release_path(&path);
12533         return ret;
12534 }
12535
12536 static int zero_log_tree(struct btrfs_root *root)
12537 {
12538         struct btrfs_trans_handle *trans;
12539         int ret;
12540
12541         trans = btrfs_start_transaction(root, 1);
12542         if (IS_ERR(trans)) {
12543                 ret = PTR_ERR(trans);
12544                 return ret;
12545         }
12546         btrfs_set_super_log_root(root->fs_info->super_copy, 0);
12547         btrfs_set_super_log_root_level(root->fs_info->super_copy, 0);
12548         ret = btrfs_commit_transaction(trans, root);
12549         return ret;
12550 }
12551
12552 static int populate_csum(struct btrfs_trans_handle *trans,
12553                          struct btrfs_root *csum_root, char *buf, u64 start,
12554                          u64 len)
12555 {
12556         struct btrfs_fs_info *fs_info = csum_root->fs_info;
12557         u64 offset = 0;
12558         u64 sectorsize;
12559         int ret = 0;
12560
12561         while (offset < len) {
12562                 sectorsize = fs_info->sectorsize;
12563                 ret = read_extent_data(fs_info, buf, start + offset,
12564                                        &sectorsize, 0);
12565                 if (ret)
12566                         break;
12567                 ret = btrfs_csum_file_block(trans, csum_root, start + len,
12568                                             start + offset, buf, sectorsize);
12569                 if (ret)
12570                         break;
12571                 offset += sectorsize;
12572         }
12573         return ret;
12574 }
12575
12576 static int fill_csum_tree_from_one_fs_root(struct btrfs_trans_handle *trans,
12577                                       struct btrfs_root *csum_root,
12578                                       struct btrfs_root *cur_root)
12579 {
12580         struct btrfs_path path;
12581         struct btrfs_key key;
12582         struct extent_buffer *node;
12583         struct btrfs_file_extent_item *fi;
12584         char *buf = NULL;
12585         u64 start = 0;
12586         u64 len = 0;
12587         int slot = 0;
12588         int ret = 0;
12589
12590         buf = malloc(cur_root->fs_info->sectorsize);
12591         if (!buf)
12592                 return -ENOMEM;
12593
12594         btrfs_init_path(&path);
12595         key.objectid = 0;
12596         key.offset = 0;
12597         key.type = 0;
12598         ret = btrfs_search_slot(NULL, cur_root, &key, &path, 0, 0);
12599         if (ret < 0)
12600                 goto out;
12601         /* Iterate all regular file extents and fill its csum */
12602         while (1) {
12603                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
12604
12605                 if (key.type != BTRFS_EXTENT_DATA_KEY)
12606                         goto next;
12607                 node = path.nodes[0];
12608                 slot = path.slots[0];
12609                 fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
12610                 if (btrfs_file_extent_type(node, fi) != BTRFS_FILE_EXTENT_REG)
12611                         goto next;
12612                 start = btrfs_file_extent_disk_bytenr(node, fi);
12613                 len = btrfs_file_extent_disk_num_bytes(node, fi);
12614
12615                 ret = populate_csum(trans, csum_root, buf, start, len);
12616                 if (ret == -EEXIST)
12617                         ret = 0;
12618                 if (ret < 0)
12619                         goto out;
12620 next:
12621                 /*
12622                  * TODO: if next leaf is corrupted, jump to nearest next valid
12623                  * leaf.
12624                  */
12625                 ret = btrfs_next_item(cur_root, &path);
12626                 if (ret < 0)
12627                         goto out;
12628                 if (ret > 0) {
12629                         ret = 0;
12630                         goto out;
12631                 }
12632         }
12633
12634 out:
12635         btrfs_release_path(&path);
12636         free(buf);
12637         return ret;
12638 }
12639
12640 static int fill_csum_tree_from_fs(struct btrfs_trans_handle *trans,
12641                                   struct btrfs_root *csum_root)
12642 {
12643         struct btrfs_fs_info *fs_info = csum_root->fs_info;
12644         struct btrfs_path path;
12645         struct btrfs_root *tree_root = fs_info->tree_root;
12646         struct btrfs_root *cur_root;
12647         struct extent_buffer *node;
12648         struct btrfs_key key;
12649         int slot = 0;
12650         int ret = 0;
12651
12652         btrfs_init_path(&path);
12653         key.objectid = BTRFS_FS_TREE_OBJECTID;
12654         key.offset = 0;
12655         key.type = BTRFS_ROOT_ITEM_KEY;
12656         ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
12657         if (ret < 0)
12658                 goto out;
12659         if (ret > 0) {
12660                 ret = -ENOENT;
12661                 goto out;
12662         }
12663
12664         while (1) {
12665                 node = path.nodes[0];
12666                 slot = path.slots[0];
12667                 btrfs_item_key_to_cpu(node, &key, slot);
12668                 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
12669                         goto out;
12670                 if (key.type != BTRFS_ROOT_ITEM_KEY)
12671                         goto next;
12672                 if (!is_fstree(key.objectid))
12673                         goto next;
12674                 key.offset = (u64)-1;
12675
12676                 cur_root = btrfs_read_fs_root(fs_info, &key);
12677                 if (IS_ERR(cur_root) || !cur_root) {
12678                         fprintf(stderr, "Fail to read fs/subvol tree: %lld\n",
12679                                 key.objectid);
12680                         goto out;
12681                 }
12682                 ret = fill_csum_tree_from_one_fs_root(trans, csum_root,
12683                                 cur_root);
12684                 if (ret < 0)
12685                         goto out;
12686 next:
12687                 ret = btrfs_next_item(tree_root, &path);
12688                 if (ret > 0) {
12689                         ret = 0;
12690                         goto out;
12691                 }
12692                 if (ret < 0)
12693                         goto out;
12694         }
12695
12696 out:
12697         btrfs_release_path(&path);
12698         return ret;
12699 }
12700
12701 static int fill_csum_tree_from_extent(struct btrfs_trans_handle *trans,
12702                                       struct btrfs_root *csum_root)
12703 {
12704         struct btrfs_root *extent_root = csum_root->fs_info->extent_root;
12705         struct btrfs_path path;
12706         struct btrfs_extent_item *ei;
12707         struct extent_buffer *leaf;
12708         char *buf;
12709         struct btrfs_key key;
12710         int ret;
12711
12712         btrfs_init_path(&path);
12713         key.objectid = 0;
12714         key.type = BTRFS_EXTENT_ITEM_KEY;
12715         key.offset = 0;
12716         ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
12717         if (ret < 0) {
12718                 btrfs_release_path(&path);
12719                 return ret;
12720         }
12721
12722         buf = malloc(csum_root->fs_info->sectorsize);
12723         if (!buf) {
12724                 btrfs_release_path(&path);
12725                 return -ENOMEM;
12726         }
12727
12728         while (1) {
12729                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
12730                         ret = btrfs_next_leaf(extent_root, &path);
12731                         if (ret < 0)
12732                                 break;
12733                         if (ret) {
12734                                 ret = 0;
12735                                 break;
12736                         }
12737                 }
12738                 leaf = path.nodes[0];
12739
12740                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
12741                 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
12742                         path.slots[0]++;
12743                         continue;
12744                 }
12745
12746                 ei = btrfs_item_ptr(leaf, path.slots[0],
12747                                     struct btrfs_extent_item);
12748                 if (!(btrfs_extent_flags(leaf, ei) &
12749                       BTRFS_EXTENT_FLAG_DATA)) {
12750                         path.slots[0]++;
12751                         continue;
12752                 }
12753
12754                 ret = populate_csum(trans, csum_root, buf, key.objectid,
12755                                     key.offset);
12756                 if (ret)
12757                         break;
12758                 path.slots[0]++;
12759         }
12760
12761         btrfs_release_path(&path);
12762         free(buf);
12763         return ret;
12764 }
12765
12766 /*
12767  * Recalculate the csum and put it into the csum tree.
12768  *
12769  * Extent tree init will wipe out all the extent info, so in that case, we
12770  * can't depend on extent tree, but use fs tree.  If search_fs_tree is set, we
12771  * will use fs/subvol trees to init the csum tree.
12772  */
12773 static int fill_csum_tree(struct btrfs_trans_handle *trans,
12774                           struct btrfs_root *csum_root,
12775                           int search_fs_tree)
12776 {
12777         if (search_fs_tree)
12778                 return fill_csum_tree_from_fs(trans, csum_root);
12779         else
12780                 return fill_csum_tree_from_extent(trans, csum_root);
12781 }
12782
12783 static void free_roots_info_cache(void)
12784 {
12785         if (!roots_info_cache)
12786                 return;
12787
12788         while (!cache_tree_empty(roots_info_cache)) {
12789                 struct cache_extent *entry;
12790                 struct root_item_info *rii;
12791
12792                 entry = first_cache_extent(roots_info_cache);
12793                 if (!entry)
12794                         break;
12795                 remove_cache_extent(roots_info_cache, entry);
12796                 rii = container_of(entry, struct root_item_info, cache_extent);
12797                 free(rii);
12798         }
12799
12800         free(roots_info_cache);
12801         roots_info_cache = NULL;
12802 }
12803
12804 static int build_roots_info_cache(struct btrfs_fs_info *info)
12805 {
12806         int ret = 0;
12807         struct btrfs_key key;
12808         struct extent_buffer *leaf;
12809         struct btrfs_path path;
12810
12811         if (!roots_info_cache) {
12812                 roots_info_cache = malloc(sizeof(*roots_info_cache));
12813                 if (!roots_info_cache)
12814                         return -ENOMEM;
12815                 cache_tree_init(roots_info_cache);
12816         }
12817
12818         btrfs_init_path(&path);
12819         key.objectid = 0;
12820         key.type = BTRFS_EXTENT_ITEM_KEY;
12821         key.offset = 0;
12822         ret = btrfs_search_slot(NULL, info->extent_root, &key, &path, 0, 0);
12823         if (ret < 0)
12824                 goto out;
12825         leaf = path.nodes[0];
12826
12827         while (1) {
12828                 struct btrfs_key found_key;
12829                 struct btrfs_extent_item *ei;
12830                 struct btrfs_extent_inline_ref *iref;
12831                 int slot = path.slots[0];
12832                 int type;
12833                 u64 flags;
12834                 u64 root_id;
12835                 u8 level;
12836                 struct cache_extent *entry;
12837                 struct root_item_info *rii;
12838
12839                 if (slot >= btrfs_header_nritems(leaf)) {
12840                         ret = btrfs_next_leaf(info->extent_root, &path);
12841                         if (ret < 0) {
12842                                 break;
12843                         } else if (ret) {
12844                                 ret = 0;
12845                                 break;
12846                         }
12847                         leaf = path.nodes[0];
12848                         slot = path.slots[0];
12849                 }
12850
12851                 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
12852
12853                 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
12854                     found_key.type != BTRFS_METADATA_ITEM_KEY)
12855                         goto next;
12856
12857                 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
12858                 flags = btrfs_extent_flags(leaf, ei);
12859
12860                 if (found_key.type == BTRFS_EXTENT_ITEM_KEY &&
12861                     !(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK))
12862                         goto next;
12863
12864                 if (found_key.type == BTRFS_METADATA_ITEM_KEY) {
12865                         iref = (struct btrfs_extent_inline_ref *)(ei + 1);
12866                         level = found_key.offset;
12867                 } else {
12868                         struct btrfs_tree_block_info *binfo;
12869
12870                         binfo = (struct btrfs_tree_block_info *)(ei + 1);
12871                         iref = (struct btrfs_extent_inline_ref *)(binfo + 1);
12872                         level = btrfs_tree_block_level(leaf, binfo);
12873                 }
12874
12875                 /*
12876                  * For a root extent, it must be of the following type and the
12877                  * first (and only one) iref in the item.
12878                  */
12879                 type = btrfs_extent_inline_ref_type(leaf, iref);
12880                 if (type != BTRFS_TREE_BLOCK_REF_KEY)
12881                         goto next;
12882
12883                 root_id = btrfs_extent_inline_ref_offset(leaf, iref);
12884                 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
12885                 if (!entry) {
12886                         rii = malloc(sizeof(struct root_item_info));
12887                         if (!rii) {
12888                                 ret = -ENOMEM;
12889                                 goto out;
12890                         }
12891                         rii->cache_extent.start = root_id;
12892                         rii->cache_extent.size = 1;
12893                         rii->level = (u8)-1;
12894                         entry = &rii->cache_extent;
12895                         ret = insert_cache_extent(roots_info_cache, entry);
12896                         ASSERT(ret == 0);
12897                 } else {
12898                         rii = container_of(entry, struct root_item_info,
12899                                            cache_extent);
12900                 }
12901
12902                 ASSERT(rii->cache_extent.start == root_id);
12903                 ASSERT(rii->cache_extent.size == 1);
12904
12905                 if (level > rii->level || rii->level == (u8)-1) {
12906                         rii->level = level;
12907                         rii->bytenr = found_key.objectid;
12908                         rii->gen = btrfs_extent_generation(leaf, ei);
12909                         rii->node_count = 1;
12910                 } else if (level == rii->level) {
12911                         rii->node_count++;
12912                 }
12913 next:
12914                 path.slots[0]++;
12915         }
12916
12917 out:
12918         btrfs_release_path(&path);
12919
12920         return ret;
12921 }
12922
12923 static int maybe_repair_root_item(struct btrfs_path *path,
12924                                   const struct btrfs_key *root_key,
12925                                   const int read_only_mode)
12926 {
12927         const u64 root_id = root_key->objectid;
12928         struct cache_extent *entry;
12929         struct root_item_info *rii;
12930         struct btrfs_root_item ri;
12931         unsigned long offset;
12932
12933         entry = lookup_cache_extent(roots_info_cache, root_id, 1);
12934         if (!entry) {
12935                 fprintf(stderr,
12936                         "Error: could not find extent items for root %llu\n",
12937                         root_key->objectid);
12938                 return -ENOENT;
12939         }
12940
12941         rii = container_of(entry, struct root_item_info, cache_extent);
12942         ASSERT(rii->cache_extent.start == root_id);
12943         ASSERT(rii->cache_extent.size == 1);
12944
12945         if (rii->node_count != 1) {
12946                 fprintf(stderr,
12947                         "Error: could not find btree root extent for root %llu\n",
12948                         root_id);
12949                 return -ENOENT;
12950         }
12951
12952         offset = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]);
12953         read_extent_buffer(path->nodes[0], &ri, offset, sizeof(ri));
12954
12955         if (btrfs_root_bytenr(&ri) != rii->bytenr ||
12956             btrfs_root_level(&ri) != rii->level ||
12957             btrfs_root_generation(&ri) != rii->gen) {
12958
12959                 /*
12960                  * If we're in repair mode but our caller told us to not update
12961                  * the root item, i.e. just check if it needs to be updated, don't
12962                  * print this message, since the caller will call us again shortly
12963                  * for the same root item without read only mode (the caller will
12964                  * open a transaction first).
12965                  */
12966                 if (!(read_only_mode && repair))
12967                         fprintf(stderr,
12968                                 "%sroot item for root %llu,"
12969                                 " current bytenr %llu, current gen %llu, current level %u,"
12970                                 " new bytenr %llu, new gen %llu, new level %u\n",
12971                                 (read_only_mode ? "" : "fixing "),
12972                                 root_id,
12973                                 btrfs_root_bytenr(&ri), btrfs_root_generation(&ri),
12974                                 btrfs_root_level(&ri),
12975                                 rii->bytenr, rii->gen, rii->level);
12976
12977                 if (btrfs_root_generation(&ri) > rii->gen) {
12978                         fprintf(stderr,
12979                                 "root %llu has a root item with a more recent gen (%llu) compared to the found root node (%llu)\n",
12980                                 root_id, btrfs_root_generation(&ri), rii->gen);
12981                         return -EINVAL;
12982                 }
12983
12984                 if (!read_only_mode) {
12985                         btrfs_set_root_bytenr(&ri, rii->bytenr);
12986                         btrfs_set_root_level(&ri, rii->level);
12987                         btrfs_set_root_generation(&ri, rii->gen);
12988                         write_extent_buffer(path->nodes[0], &ri,
12989                                             offset, sizeof(ri));
12990                 }
12991
12992                 return 1;
12993         }
12994
12995         return 0;
12996 }
12997
12998 /*
12999  * A regression introduced in the 3.17 kernel (more specifically in 3.17-rc2),
13000  * caused read-only snapshots to be corrupted if they were created at a moment
13001  * when the source subvolume/snapshot had orphan items. The issue was that the
13002  * on-disk root items became incorrect, referring to the pre orphan cleanup root
13003  * node instead of the post orphan cleanup root node.
13004  * So this function, and its callees, just detects and fixes those cases. Even
13005  * though the regression was for read-only snapshots, this function applies to
13006  * any snapshot/subvolume root.
13007  * This must be run before any other repair code - not doing it so, makes other
13008  * repair code delete or modify backrefs in the extent tree for example, which
13009  * will result in an inconsistent fs after repairing the root items.
13010  */
13011 static int repair_root_items(struct btrfs_fs_info *info)
13012 {
13013         struct btrfs_path path;
13014         struct btrfs_key key;
13015         struct extent_buffer *leaf;
13016         struct btrfs_trans_handle *trans = NULL;
13017         int ret = 0;
13018         int bad_roots = 0;
13019         int need_trans = 0;
13020
13021         btrfs_init_path(&path);
13022
13023         ret = build_roots_info_cache(info);
13024         if (ret)
13025                 goto out;
13026
13027         key.objectid = BTRFS_FIRST_FREE_OBJECTID;
13028         key.type = BTRFS_ROOT_ITEM_KEY;
13029         key.offset = 0;
13030
13031 again:
13032         /*
13033          * Avoid opening and committing transactions if a leaf doesn't have
13034          * any root items that need to be fixed, so that we avoid rotating
13035          * backup roots unnecessarily.
13036          */
13037         if (need_trans) {
13038                 trans = btrfs_start_transaction(info->tree_root, 1);
13039                 if (IS_ERR(trans)) {
13040                         ret = PTR_ERR(trans);
13041                         goto out;
13042                 }
13043         }
13044
13045         ret = btrfs_search_slot(trans, info->tree_root, &key, &path,
13046                                 0, trans ? 1 : 0);
13047         if (ret < 0)
13048                 goto out;
13049         leaf = path.nodes[0];
13050
13051         while (1) {
13052                 struct btrfs_key found_key;
13053
13054                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
13055                         int no_more_keys = find_next_key(&path, &key);
13056
13057                         btrfs_release_path(&path);
13058                         if (trans) {
13059                                 ret = btrfs_commit_transaction(trans,
13060                                                                info->tree_root);
13061                                 trans = NULL;
13062                                 if (ret < 0)
13063                                         goto out;
13064                         }
13065                         need_trans = 0;
13066                         if (no_more_keys)
13067                                 break;
13068                         goto again;
13069                 }
13070
13071                 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
13072
13073                 if (found_key.type != BTRFS_ROOT_ITEM_KEY)
13074                         goto next;
13075                 if (found_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
13076                         goto next;
13077
13078                 ret = maybe_repair_root_item(&path, &found_key, trans ? 0 : 1);
13079                 if (ret < 0)
13080                         goto out;
13081                 if (ret) {
13082                         if (!trans && repair) {
13083                                 need_trans = 1;
13084                                 key = found_key;
13085                                 btrfs_release_path(&path);
13086                                 goto again;
13087                         }
13088                         bad_roots++;
13089                 }
13090 next:
13091                 path.slots[0]++;
13092         }
13093         ret = 0;
13094 out:
13095         free_roots_info_cache();
13096         btrfs_release_path(&path);
13097         if (trans)
13098                 btrfs_commit_transaction(trans, info->tree_root);
13099         if (ret < 0)
13100                 return ret;
13101
13102         return bad_roots;
13103 }
13104
13105 static int clear_free_space_cache(struct btrfs_fs_info *fs_info)
13106 {
13107         struct btrfs_trans_handle *trans;
13108         struct btrfs_block_group_cache *bg_cache;
13109         u64 current = 0;
13110         int ret = 0;
13111
13112         /* Clear all free space cache inodes and its extent data */
13113         while (1) {
13114                 bg_cache = btrfs_lookup_first_block_group(fs_info, current);
13115                 if (!bg_cache)
13116                         break;
13117                 ret = btrfs_clear_free_space_cache(fs_info, bg_cache);
13118                 if (ret < 0)
13119                         return ret;
13120                 current = bg_cache->key.objectid + bg_cache->key.offset;
13121         }
13122
13123         /* Don't forget to set cache_generation to -1 */
13124         trans = btrfs_start_transaction(fs_info->tree_root, 0);
13125         if (IS_ERR(trans)) {
13126                 error("failed to update super block cache generation");
13127                 return PTR_ERR(trans);
13128         }
13129         btrfs_set_super_cache_generation(fs_info->super_copy, (u64)-1);
13130         btrfs_commit_transaction(trans, fs_info->tree_root);
13131
13132         return ret;
13133 }
13134
13135 static int do_clear_free_space_cache(struct btrfs_fs_info *fs_info,
13136                 int clear_version)
13137 {
13138         int ret = 0;
13139
13140         if (clear_version == 1) {
13141                 if (btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE)) {
13142                         error(
13143                 "free space cache v2 detected, use --clear-space-cache v2");
13144                         ret = 1;
13145                         goto close_out;
13146                 }
13147                 printf("Clearing free space cache\n");
13148                 ret = clear_free_space_cache(fs_info);
13149                 if (ret) {
13150                         error("failed to clear free space cache");
13151                         ret = 1;
13152                 } else {
13153                         printf("Free space cache cleared\n");
13154                 }
13155         } else if (clear_version == 2) {
13156                 if (!btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE)) {
13157                         printf("no free space cache v2 to clear\n");
13158                         ret = 0;
13159                         goto close_out;
13160                 }
13161                 printf("Clear free space cache v2\n");
13162                 ret = btrfs_clear_free_space_tree(fs_info);
13163                 if (ret) {
13164                         error("failed to clear free space cache v2: %d", ret);
13165                         ret = 1;
13166                 } else {
13167                         printf("free space cache v2 cleared\n");
13168                 }
13169         }
13170 close_out:
13171         return ret;
13172 }
13173
13174 const char * const cmd_check_usage[] = {
13175         "btrfs check [options] <device>",
13176         "Check structural integrity of a filesystem (unmounted).",
13177         "Check structural integrity of an unmounted filesystem. Verify internal",
13178         "trees' consistency and item connectivity. In the repair mode try to",
13179         "fix the problems found. ",
13180         "WARNING: the repair mode is considered dangerous",
13181         "",
13182         "-s|--super <superblock>     use this superblock copy",
13183         "-b|--backup                 use the first valid backup root copy",
13184         "--force                     skip mount checks, repair is not possible",
13185         "--repair                    try to repair the filesystem",
13186         "--readonly                  run in read-only mode (default)",
13187         "--init-csum-tree            create a new CRC tree",
13188         "--init-extent-tree          create a new extent tree",
13189         "--mode <MODE>               allows choice of memory/IO trade-offs",
13190         "                            where MODE is one of:",
13191         "                            original - read inodes and extents to memory (requires",
13192         "                                       more memory, does less IO)",
13193         "                            lowmem   - try to use less memory but read blocks again",
13194         "                                       when needed",
13195         "--check-data-csum           verify checksums of data blocks",
13196         "-Q|--qgroup-report          print a report on qgroup consistency",
13197         "-E|--subvol-extents <subvolid>",
13198         "                            print subvolume extents and sharing state",
13199         "-r|--tree-root <bytenr>     use the given bytenr for the tree root",
13200         "--chunk-root <bytenr>       use the given bytenr for the chunk tree root",
13201         "-p|--progress               indicate progress",
13202         "--clear-space-cache v1|v2   clear space cache for v1 or v2",
13203         NULL
13204 };
13205
13206 int cmd_check(int argc, char **argv)
13207 {
13208         struct cache_tree root_cache;
13209         struct btrfs_root *root;
13210         struct btrfs_fs_info *info;
13211         u64 bytenr = 0;
13212         u64 subvolid = 0;
13213         u64 tree_root_bytenr = 0;
13214         u64 chunk_root_bytenr = 0;
13215         char uuidbuf[BTRFS_UUID_UNPARSED_SIZE];
13216         int ret = 0;
13217         int err = 0;
13218         u64 num;
13219         int init_csum_tree = 0;
13220         int readonly = 0;
13221         int clear_space_cache = 0;
13222         int qgroup_report = 0;
13223         int qgroups_repaired = 0;
13224         unsigned ctree_flags = OPEN_CTREE_EXCLUSIVE;
13225         int force = 0;
13226
13227         while(1) {
13228                 int c;
13229                 enum { GETOPT_VAL_REPAIR = 257, GETOPT_VAL_INIT_CSUM,
13230                         GETOPT_VAL_INIT_EXTENT, GETOPT_VAL_CHECK_CSUM,
13231                         GETOPT_VAL_READONLY, GETOPT_VAL_CHUNK_TREE,
13232                         GETOPT_VAL_MODE, GETOPT_VAL_CLEAR_SPACE_CACHE,
13233                         GETOPT_VAL_FORCE };
13234                 static const struct option long_options[] = {
13235                         { "super", required_argument, NULL, 's' },
13236                         { "repair", no_argument, NULL, GETOPT_VAL_REPAIR },
13237                         { "readonly", no_argument, NULL, GETOPT_VAL_READONLY },
13238                         { "init-csum-tree", no_argument, NULL,
13239                                 GETOPT_VAL_INIT_CSUM },
13240                         { "init-extent-tree", no_argument, NULL,
13241                                 GETOPT_VAL_INIT_EXTENT },
13242                         { "check-data-csum", no_argument, NULL,
13243                                 GETOPT_VAL_CHECK_CSUM },
13244                         { "backup", no_argument, NULL, 'b' },
13245                         { "subvol-extents", required_argument, NULL, 'E' },
13246                         { "qgroup-report", no_argument, NULL, 'Q' },
13247                         { "tree-root", required_argument, NULL, 'r' },
13248                         { "chunk-root", required_argument, NULL,
13249                                 GETOPT_VAL_CHUNK_TREE },
13250                         { "progress", no_argument, NULL, 'p' },
13251                         { "mode", required_argument, NULL,
13252                                 GETOPT_VAL_MODE },
13253                         { "clear-space-cache", required_argument, NULL,
13254                                 GETOPT_VAL_CLEAR_SPACE_CACHE},
13255                         { "force", no_argument, NULL, GETOPT_VAL_FORCE },
13256                         { NULL, 0, NULL, 0}
13257                 };
13258
13259                 c = getopt_long(argc, argv, "as:br:pEQ", long_options, NULL);
13260                 if (c < 0)
13261                         break;
13262                 switch(c) {
13263                         case 'a': /* ignored */ break;
13264                         case 'b':
13265                                 ctree_flags |= OPEN_CTREE_BACKUP_ROOT;
13266                                 break;
13267                         case 's':
13268                                 num = arg_strtou64(optarg);
13269                                 if (num >= BTRFS_SUPER_MIRROR_MAX) {
13270                                         error(
13271                                         "super mirror should be less than %d",
13272                                                 BTRFS_SUPER_MIRROR_MAX);
13273                                         exit(1);
13274                                 }
13275                                 bytenr = btrfs_sb_offset(((int)num));
13276                                 printf("using SB copy %llu, bytenr %llu\n", num,
13277                                        (unsigned long long)bytenr);
13278                                 break;
13279                         case 'Q':
13280                                 qgroup_report = 1;
13281                                 break;
13282                         case 'E':
13283                                 subvolid = arg_strtou64(optarg);
13284                                 break;
13285                         case 'r':
13286                                 tree_root_bytenr = arg_strtou64(optarg);
13287                                 break;
13288                         case GETOPT_VAL_CHUNK_TREE:
13289                                 chunk_root_bytenr = arg_strtou64(optarg);
13290                                 break;
13291                         case 'p':
13292                                 ctx.progress_enabled = true;
13293                                 break;
13294                         case '?':
13295                         case 'h':
13296                                 usage(cmd_check_usage);
13297                         case GETOPT_VAL_REPAIR:
13298                                 printf("enabling repair mode\n");
13299                                 repair = 1;
13300                                 ctree_flags |= OPEN_CTREE_WRITES;
13301                                 break;
13302                         case GETOPT_VAL_READONLY:
13303                                 readonly = 1;
13304                                 break;
13305                         case GETOPT_VAL_INIT_CSUM:
13306                                 printf("Creating a new CRC tree\n");
13307                                 init_csum_tree = 1;
13308                                 repair = 1;
13309                                 ctree_flags |= OPEN_CTREE_WRITES;
13310                                 break;
13311                         case GETOPT_VAL_INIT_EXTENT:
13312                                 init_extent_tree = 1;
13313                                 ctree_flags |= (OPEN_CTREE_WRITES |
13314                                                 OPEN_CTREE_NO_BLOCK_GROUPS);
13315                                 repair = 1;
13316                                 break;
13317                         case GETOPT_VAL_CHECK_CSUM:
13318                                 check_data_csum = 1;
13319                                 break;
13320                         case GETOPT_VAL_MODE:
13321                                 check_mode = parse_check_mode(optarg);
13322                                 if (check_mode == CHECK_MODE_UNKNOWN) {
13323                                         error("unknown mode: %s", optarg);
13324                                         exit(1);
13325                                 }
13326                                 break;
13327                         case GETOPT_VAL_CLEAR_SPACE_CACHE:
13328                                 if (strcmp(optarg, "v1") == 0) {
13329                                         clear_space_cache = 1;
13330                                 } else if (strcmp(optarg, "v2") == 0) {
13331                                         clear_space_cache = 2;
13332                                         ctree_flags |= OPEN_CTREE_INVALIDATE_FST;
13333                                 } else {
13334                                         error(
13335                 "invalid argument to --clear-space-cache, must be v1 or v2");
13336                                         exit(1);
13337                                 }
13338                                 ctree_flags |= OPEN_CTREE_WRITES;
13339                                 break;
13340                         case GETOPT_VAL_FORCE:
13341                                 force = 1;
13342                                 break;
13343                 }
13344         }
13345
13346         if (check_argc_exact(argc - optind, 1))
13347                 usage(cmd_check_usage);
13348
13349         if (ctx.progress_enabled) {
13350                 ctx.tp = TASK_NOTHING;
13351                 ctx.info = task_init(print_status_check, print_status_return, &ctx);
13352         }
13353
13354         /* This check is the only reason for --readonly to exist */
13355         if (readonly && repair) {
13356                 error("repair options are not compatible with --readonly");
13357                 exit(1);
13358         }
13359
13360         /*
13361          * experimental and dangerous
13362          */
13363         if (repair && check_mode == CHECK_MODE_LOWMEM)
13364                 warning("low-memory mode repair support is only partial");
13365
13366         radix_tree_init();
13367         cache_tree_init(&root_cache);
13368
13369         ret = check_mounted(argv[optind]);
13370         if (!force) {
13371                 if (ret < 0) {
13372                         error("could not check mount status: %s",
13373                                         strerror(-ret));
13374                         err |= !!ret;
13375                         goto err_out;
13376                 } else if (ret) {
13377                         error(
13378 "%s is currently mounted, use --force if you really intend to check the filesystem",
13379                                 argv[optind]);
13380                         ret = -EBUSY;
13381                         err |= !!ret;
13382                         goto err_out;
13383                 }
13384         } else {
13385                 if (repair) {
13386                         error("repair and --force is not yet supported");
13387                         ret = 1;
13388                         err |= !!ret;
13389                         goto err_out;
13390                 }
13391                 if (ret < 0) {
13392                         warning(
13393 "cannot check mount status of %s, the filesystem could be mounted, continuing because of --force",
13394                                 argv[optind]);
13395                 } else if (ret) {
13396                         warning(
13397                         "filesystem mounted, continuing because of --force");
13398                 }
13399                 /* A block device is mounted in exclusive mode by kernel */
13400                 ctree_flags &= ~OPEN_CTREE_EXCLUSIVE;
13401         }
13402
13403         /* only allow partial opening under repair mode */
13404         if (repair)
13405                 ctree_flags |= OPEN_CTREE_PARTIAL;
13406
13407         info = open_ctree_fs_info(argv[optind], bytenr, tree_root_bytenr,
13408                                   chunk_root_bytenr, ctree_flags);
13409         if (!info) {
13410                 error("cannot open file system");
13411                 ret = -EIO;
13412                 err |= !!ret;
13413                 goto err_out;
13414         }
13415
13416         global_info = info;
13417         root = info->fs_root;
13418         uuid_unparse(info->super_copy->fsid, uuidbuf);
13419
13420         printf("Checking filesystem on %s\nUUID: %s\n", argv[optind], uuidbuf);
13421
13422         /*
13423          * Check the bare minimum before starting anything else that could rely
13424          * on it, namely the tree roots, any local consistency checks
13425          */
13426         if (!extent_buffer_uptodate(info->tree_root->node) ||
13427             !extent_buffer_uptodate(info->dev_root->node) ||
13428             !extent_buffer_uptodate(info->chunk_root->node)) {
13429                 error("critical roots corrupted, unable to check the filesystem");
13430                 err |= !!ret;
13431                 ret = -EIO;
13432                 goto close_out;
13433         }
13434
13435         if (clear_space_cache) {
13436                 ret = do_clear_free_space_cache(info, clear_space_cache);
13437                 err |= !!ret;
13438                 goto close_out;
13439         }
13440
13441         /*
13442          * repair mode will force us to commit transaction which
13443          * will make us fail to load log tree when mounting.
13444          */
13445         if (repair && btrfs_super_log_root(info->super_copy)) {
13446                 ret = ask_user("repair mode will force to clear out log tree, are you sure?");
13447                 if (!ret) {
13448                         ret = 1;
13449                         err |= !!ret;
13450                         goto close_out;
13451                 }
13452                 ret = zero_log_tree(root);
13453                 err |= !!ret;
13454                 if (ret) {
13455                         error("failed to zero log tree: %d", ret);
13456                         goto close_out;
13457                 }
13458         }
13459
13460         if (qgroup_report) {
13461                 printf("Print quota groups for %s\nUUID: %s\n", argv[optind],
13462                        uuidbuf);
13463                 ret = qgroup_verify_all(info);
13464                 err |= !!ret;
13465                 if (ret == 0)
13466                         report_qgroups(1);
13467                 goto close_out;
13468         }
13469         if (subvolid) {
13470                 printf("Print extent state for subvolume %llu on %s\nUUID: %s\n",
13471                        subvolid, argv[optind], uuidbuf);
13472                 ret = print_extent_state(info, subvolid);
13473                 err |= !!ret;
13474                 goto close_out;
13475         }
13476
13477         if (init_extent_tree || init_csum_tree) {
13478                 struct btrfs_trans_handle *trans;
13479
13480                 trans = btrfs_start_transaction(info->extent_root, 0);
13481                 if (IS_ERR(trans)) {
13482                         error("error starting transaction");
13483                         ret = PTR_ERR(trans);
13484                         err |= !!ret;
13485                         goto close_out;
13486                 }
13487
13488                 if (init_extent_tree) {
13489                         printf("Creating a new extent tree\n");
13490                         ret = reinit_extent_tree(trans, info);
13491                         err |= !!ret;
13492                         if (ret)
13493                                 goto close_out;
13494                 }
13495
13496                 if (init_csum_tree) {
13497                         printf("Reinitialize checksum tree\n");
13498                         ret = btrfs_fsck_reinit_root(trans, info->csum_root, 0);
13499                         if (ret) {
13500                                 error("checksum tree initialization failed: %d",
13501                                                 ret);
13502                                 ret = -EIO;
13503                                 err |= !!ret;
13504                                 goto close_out;
13505                         }
13506
13507                         ret = fill_csum_tree(trans, info->csum_root,
13508                                              init_extent_tree);
13509                         err |= !!ret;
13510                         if (ret) {
13511                                 error("checksum tree refilling failed: %d", ret);
13512                                 return -EIO;
13513                         }
13514                 }
13515                 /*
13516                  * Ok now we commit and run the normal fsck, which will add
13517                  * extent entries for all of the items it finds.
13518                  */
13519                 ret = btrfs_commit_transaction(trans, info->extent_root);
13520                 err |= !!ret;
13521                 if (ret)
13522                         goto close_out;
13523         }
13524         if (!extent_buffer_uptodate(info->extent_root->node)) {
13525                 error("critical: extent_root, unable to check the filesystem");
13526                 ret = -EIO;
13527                 err |= !!ret;
13528                 goto close_out;
13529         }
13530         if (!extent_buffer_uptodate(info->csum_root->node)) {
13531                 error("critical: csum_root, unable to check the filesystem");
13532                 ret = -EIO;
13533                 err |= !!ret;
13534                 goto close_out;
13535         }
13536
13537         ret = do_check_chunks_and_extents(info);
13538         err |= !!ret;
13539         if (ret)
13540                 error(
13541                 "errors found in extent allocation tree or chunk allocation");
13542
13543         ret = repair_root_items(info);
13544         err |= !!ret;
13545         if (ret < 0) {
13546                 error("failed to repair root items: %s", strerror(-ret));
13547                 goto close_out;
13548         }
13549         if (repair) {
13550                 fprintf(stderr, "Fixed %d roots.\n", ret);
13551                 ret = 0;
13552         } else if (ret > 0) {
13553                 fprintf(stderr,
13554                        "Found %d roots with an outdated root item.\n",
13555                        ret);
13556                 fprintf(stderr,
13557                         "Please run a filesystem check with the option --repair to fix them.\n");
13558                 ret = 1;
13559                 err |= !!ret;
13560                 goto close_out;
13561         }
13562
13563         if (!ctx.progress_enabled) {
13564                 if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE))
13565                         fprintf(stderr, "checking free space tree\n");
13566                 else
13567                         fprintf(stderr, "checking free space cache\n");
13568         }
13569         ret = check_space_cache(root);
13570         err |= !!ret;
13571         if (ret) {
13572                 if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE))
13573                         error("errors found in free space tree");
13574                 else
13575                         error("errors found in free space cache");
13576                 goto out;
13577         }
13578
13579         /*
13580          * We used to have to have these hole extents in between our real
13581          * extents so if we don't have this flag set we need to make sure there
13582          * are no gaps in the file extents for inodes, otherwise we can just
13583          * ignore it when this happens.
13584          */
13585         no_holes = btrfs_fs_incompat(root->fs_info, NO_HOLES);
13586         ret = do_check_fs_roots(info, &root_cache);
13587         err |= !!ret;
13588         if (ret) {
13589                 error("errors found in fs roots");
13590                 goto out;
13591         }
13592
13593         fprintf(stderr, "checking csums\n");
13594         ret = check_csums(root);
13595         err |= !!ret;
13596         if (ret) {
13597                 error("errors found in csum tree");
13598                 goto out;
13599         }
13600
13601         fprintf(stderr, "checking root refs\n");
13602         /* For low memory mode, check_fs_roots_v2 handles root refs */
13603         if (check_mode != CHECK_MODE_LOWMEM) {
13604                 ret = check_root_refs(root, &root_cache);
13605                 err |= !!ret;
13606                 if (ret) {
13607                         error("errors found in root refs");
13608                         goto out;
13609                 }
13610         }
13611
13612         while (repair && !list_empty(&root->fs_info->recow_ebs)) {
13613                 struct extent_buffer *eb;
13614
13615                 eb = list_first_entry(&root->fs_info->recow_ebs,
13616                                       struct extent_buffer, recow);
13617                 list_del_init(&eb->recow);
13618                 ret = recow_extent_buffer(root, eb);
13619                 err |= !!ret;
13620                 if (ret) {
13621                         error("fails to fix transid errors");
13622                         break;
13623                 }
13624         }
13625
13626         while (!list_empty(&delete_items)) {
13627                 struct bad_item *bad;
13628
13629                 bad = list_first_entry(&delete_items, struct bad_item, list);
13630                 list_del_init(&bad->list);
13631                 if (repair) {
13632                         ret = delete_bad_item(root, bad);
13633                         err |= !!ret;
13634                 }
13635                 free(bad);
13636         }
13637
13638         if (info->quota_enabled) {
13639                 fprintf(stderr, "checking quota groups\n");
13640                 ret = qgroup_verify_all(info);
13641                 err |= !!ret;
13642                 if (ret) {
13643                         error("failed to check quota groups");
13644                         goto out;
13645                 }
13646                 report_qgroups(0);
13647                 ret = repair_qgroups(info, &qgroups_repaired);
13648                 err |= !!ret;
13649                 if (err) {
13650                         error("failed to repair quota groups");
13651                         goto out;
13652                 }
13653                 ret = 0;
13654         }
13655
13656         if (!list_empty(&root->fs_info->recow_ebs)) {
13657                 error("transid errors in file system");
13658                 ret = 1;
13659                 err |= !!ret;
13660         }
13661 out:
13662         printf("found %llu bytes used, ",
13663                (unsigned long long)bytes_used);
13664         if (err)
13665                 printf("error(s) found\n");
13666         else
13667                 printf("no error found\n");
13668         printf("total csum bytes: %llu\n",(unsigned long long)total_csum_bytes);
13669         printf("total tree bytes: %llu\n",
13670                (unsigned long long)total_btree_bytes);
13671         printf("total fs tree bytes: %llu\n",
13672                (unsigned long long)total_fs_tree_bytes);
13673         printf("total extent tree bytes: %llu\n",
13674                (unsigned long long)total_extent_tree_bytes);
13675         printf("btree space waste bytes: %llu\n",
13676                (unsigned long long)btree_space_waste);
13677         printf("file data blocks allocated: %llu\n referenced %llu\n",
13678                 (unsigned long long)data_bytes_allocated,
13679                 (unsigned long long)data_bytes_referenced);
13680
13681         free_qgroup_counts();
13682         free_root_recs_tree(&root_cache);
13683 close_out:
13684         close_ctree(root);
13685 err_out:
13686         if (ctx.progress_enabled)
13687                 task_deinit(ctx.info);
13688
13689         return err;
13690 }